From 721dfa4e4fbe8ffcb9b0d1462bb7367022eeac50 Mon Sep 17 00:00:00 2001 From: Dominic Ricottone Date: Wed, 4 May 2022 14:15:04 -0500 Subject: [PATCH] Adding exporters Wrote and tested the long CSV exporter. Stubbed out the JSON exporter. --- .gitignore | 7 ++++--- exporter/json.py | 40 ++++++++++++++++++++++++++++++++++++++++ exporter/long_csv.py | 39 +++++++++++++++++++++++++++++++++++++++ main.py | 22 +++++++--------------- 4 files changed, 90 insertions(+), 18 deletions(-) create mode 100644 exporter/json.py create mode 100644 exporter/long_csv.py diff --git a/.gitignore b/.gitignore index 0775351..cc0c550 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ -.venv -data -**/__pycache__ +.venv/ +data/ +**/__pycache__/ +analysis/timesheets_sas.csv diff --git a/exporter/json.py b/exporter/json.py new file mode 100644 index 0000000..b280d4f --- /dev/null +++ b/exporter/json.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python3 + +#TODO: dumps dictionary into JSON file + +def encode_dict(timesheets): + """Given a list of timesheets, which themselves are lists of time entries, + create a nested dictionary to represent the data. + + ``` + { + 'PROJECT': { + datetime.datetime(DATE): decimal.Decimal(HOURS), + ... + }, + ... + } + ``` + """ + projects = {} + + for timesheet in timesheets: + for entry in timesheet: + # identify the project key to use + key = entry.project + if entry.time_code in ("HOL", "OTU", "VAC", "OPL", ): + key = entry.time_code + + # set new dictionary for new keys + if key not in projects.keys(): + projects[key] = {} + + # set hours into the projects dictionary + for date, hours in entry.data.items(): + projects[key][date] = hours + + return projects + +def export(filename, timesheets): + pass + diff --git a/exporter/long_csv.py b/exporter/long_csv.py new file mode 100644 index 0000000..4465404 --- /dev/null +++ b/exporter/long_csv.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python3 + +import csv + +def handle_date(date): + return date.strftime("%m/%d/%Y") + +def encode_list(timesheets): + """Given a list of timesheets, which themselves are lists of time entries, + create a long list of data. + + ``` + [ ['PROJECT', 'MM/DD/YYYY', decimal.Decimal(HOURS)], + ... + ] + ``` + """ + projects = [] + + for timesheet in timesheets: + for entry in timesheet: + # identify the project key to use + key = entry.project + if entry.time_code in ("HOL", "OTU", "VAC", "OPL", ): + key = entry.time_code + + # set hours into the projects list + for date, hours in entry.data.items(): + projects.append([key, handle_date(date), hours]) + + return projects + +def export(filename, timesheets): + """Main routine.""" + with open(filename, "w", newline="") as f: + writer = csv.writer(f) + for row in encode_list(timesheets): + writer.writerow(row) + diff --git a/main.py b/main.py index 7cfe598..b2cea10 100644 --- a/main.py +++ b/main.py @@ -8,8 +8,10 @@ from parser.xml import parse as parse_xml from parser.pdf import parse as parse_pdf from parser.timesheet import parse as parse_timesheet +from exporter.long_csv import export + def main(filelist): - projects = {} + timesheets = [] print(f"processing {len(filelist)} files") for filename in (filelist): @@ -19,20 +21,10 @@ def main(filelist): parse_pdf(filename, xml_filename) parse_xml(xml_filename, csv_filename) - entries = parse_timesheet(csv_filename) - for entry in entries: - if entry.time_code in ("HOL", "OTU", "VAC", "OPL", ): - if entry.time_code not in projects.keys(): - projects[entry.time_code] = {} - for date, hours in entry.data.items(): - projects[entry.time_code][date] = hours - else: - if entry.project not in projects.keys(): - projects[entry.project] = {} - for date, hours in entry.data.items(): - projects[entry.project][date] = hours - - pprint(projects) + timesheets.append(parse_timesheet(csv_filename)) + + dest_filename = pathlib.Path("analysis/timesheets_sas.csv") + export(dest_filename, timesheets) if __name__ == "__main__": filelist = [] -- 2.45.2