From 6899c67fa4f55e9d7a82f6b2cd780bc43185f3fc Mon Sep 17 00:00:00 2001 From: Dominic Ricottone Date: Wed, 11 May 2022 22:27:35 -0500 Subject: [PATCH] With data scraping complete, moving on to analysis Basic summation of projects, as proof of concept Basic SAS program for importing CSV data and storing as time series data --- analysis/timesheets.sas | 26 ++++++++++++++++++++++++++ analysis/totals.py | 32 ++++++++++++++++++++++++++++++++ main.py | 4 ++++ 3 files changed, 62 insertions(+) create mode 100644 analysis/timesheets.sas create mode 100644 analysis/totals.py diff --git a/analysis/timesheets.sas b/analysis/timesheets.sas new file mode 100644 index 0000000..a6047ca --- /dev/null +++ b/analysis/timesheets.sas @@ -0,0 +1,26 @@ +proc import datafile="/home/u44593168/timesheets_sas.csv" + out=work.raw + dbms=csv + replace; + getnames=no; +run; + +data work.clean; + set work.raw; + project = VAR1; + date = VAR2; + hours = VAR3; + drop VAR1 VAR2 VAR3; +run; + +proc sort data=work.clean; + by project date; +run; + +proc timeseries data=work.clean + out=work.final; + by project; + id date interval=day accumulate=total; + var hours; +run; + diff --git a/analysis/totals.py b/analysis/totals.py new file mode 100644 index 0000000..8df1a8d --- /dev/null +++ b/analysis/totals.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python3 + +def totals(timesheets): + projects = {} + + for timesheet in timesheets: + for entry in timesheet: + key = entry.project + if entry.time_code in ("HOL", "OTU", "VAC", "OPL", ): + key = entry.time_code + + if key not in projects.keys(): + projects[key] = {} + projects[key]["name"] = entry.label + projects[key]["hours"] = 0 + + for hours in entry.data.values(): + projects[key]["hours"] += hours + + for project, data in projects.items(): + print(f"{project:20} {data['name']:100} {data['hours']}") + +def total_ocps2020(timesheets): + total = 0 + for timesheet in timesheets: + for entry in timesheet: + if entry.project == "20032.001.20.005": + for date, hours in entry.data.items(): + total += hours + break + print(f"{total} hours spent on OCPS 2020") + diff --git a/main.py b/main.py index b2cea10..b9a9f4c 100644 --- a/main.py +++ b/main.py @@ -10,6 +10,8 @@ from parser.timesheet import parse as parse_timesheet from exporter.long_csv import export +from analysis.totals import totals, total_ocps2020 + def main(filelist): timesheets = [] @@ -26,6 +28,8 @@ def main(filelist): dest_filename = pathlib.Path("analysis/timesheets_sas.csv") export(dest_filename, timesheets) + total_ocps2020(timesheets) + if __name__ == "__main__": filelist = [] for filename in sys.argv[1:]: -- 2.45.2