M .gitignore => .gitignore +4 -3
@@ 1,3 1,4 @@
-.venv
-data
-**/__pycache__
+.venv/
+data/
+**/__pycache__/
+analysis/timesheets_sas.csv
A exporter/json.py => exporter/json.py +40 -0
@@ 0,0 1,40 @@
+#!/usr/bin/env python3
+
+#TODO: dumps dictionary into JSON file
+
+def encode_dict(timesheets):
+ """Given a list of timesheets, which themselves are lists of time entries,
+ create a nested dictionary to represent the data.
+
+ ```
+ {
+ 'PROJECT': {
+ datetime.datetime(DATE): decimal.Decimal(HOURS),
+ ...
+ },
+ ...
+ }
+ ```
+ """
+ projects = {}
+
+ for timesheet in timesheets:
+ for entry in timesheet:
+ # identify the project key to use
+ key = entry.project
+ if entry.time_code in ("HOL", "OTU", "VAC", "OPL", ):
+ key = entry.time_code
+
+ # set new dictionary for new keys
+ if key not in projects.keys():
+ projects[key] = {}
+
+ # set hours into the projects dictionary
+ for date, hours in entry.data.items():
+ projects[key][date] = hours
+
+ return projects
+
+def export(filename, timesheets):
+ pass
+
A exporter/long_csv.py => exporter/long_csv.py +39 -0
@@ 0,0 1,39 @@
+#!/usr/bin/env python3
+
+import csv
+
+def handle_date(date):
+ return date.strftime("%m/%d/%Y")
+
+def encode_list(timesheets):
+ """Given a list of timesheets, which themselves are lists of time entries,
+ create a long list of data.
+
+ ```
+ [ ['PROJECT', 'MM/DD/YYYY', decimal.Decimal(HOURS)],
+ ...
+ ]
+ ```
+ """
+ projects = []
+
+ for timesheet in timesheets:
+ for entry in timesheet:
+ # identify the project key to use
+ key = entry.project
+ if entry.time_code in ("HOL", "OTU", "VAC", "OPL", ):
+ key = entry.time_code
+
+ # set hours into the projects list
+ for date, hours in entry.data.items():
+ projects.append([key, handle_date(date), hours])
+
+ return projects
+
+def export(filename, timesheets):
+ """Main routine."""
+ with open(filename, "w", newline="") as f:
+ writer = csv.writer(f)
+ for row in encode_list(timesheets):
+ writer.writerow(row)
+
M main.py => main.py +7 -15
@@ 8,8 8,10 @@ from parser.xml import parse as parse_xml
from parser.pdf import parse as parse_pdf
from parser.timesheet import parse as parse_timesheet
+from exporter.long_csv import export
+
def main(filelist):
- projects = {}
+ timesheets = []
print(f"processing {len(filelist)} files")
for filename in (filelist):
@@ 19,20 21,10 @@ def main(filelist):
parse_pdf(filename, xml_filename)
parse_xml(xml_filename, csv_filename)
- entries = parse_timesheet(csv_filename)
- for entry in entries:
- if entry.time_code in ("HOL", "OTU", "VAC", "OPL", ):
- if entry.time_code not in projects.keys():
- projects[entry.time_code] = {}
- for date, hours in entry.data.items():
- projects[entry.time_code][date] = hours
- else:
- if entry.project not in projects.keys():
- projects[entry.project] = {}
- for date, hours in entry.data.items():
- projects[entry.project][date] = hours
-
- pprint(projects)
+ timesheets.append(parse_timesheet(csv_filename))
+
+ dest_filename = pathlib.Path("analysis/timesheets_sas.csv")
+ export(dest_filename, timesheets)
if __name__ == "__main__":
filelist = []