From 8ebab2a421aa9bb58b0f3fd43e517190ba12e752 Mon Sep 17 00:00:00 2001 From: Dominic Ricottone Date: Tue, 3 May 2022 21:35:27 -0500 Subject: [PATCH] Fully functional timesheet parser. The timesheet parser is a complete success. Some minor issues were ironed out in the XML parser as well. Next steps: writing to a time series database and beginning analysis. --- main.py | 21 +- parser/timeentry.py | 163 -------------- parser/timesheet.py | 536 +++++++++++++++++++------------------------- parser/xml.py | 4 +- 4 files changed, 245 insertions(+), 479 deletions(-) delete mode 100644 parser/timeentry.py diff --git a/main.py b/main.py index eb3c0f6..7cfe598 100644 --- a/main.py +++ b/main.py @@ -2,12 +2,15 @@ import sys import pathlib +from pprint import pprint from parser.xml import parse as parse_xml from parser.pdf import parse as parse_pdf -from parser.timesheet import TimeSheet +from parser.timesheet import parse as parse_timesheet def main(filelist): + projects = {} + print(f"processing {len(filelist)} files") for filename in (filelist): xml_filename = filename.parent.joinpath(filename.name + ".xml") @@ -16,8 +19,20 @@ def main(filelist): parse_pdf(filename, xml_filename) parse_xml(xml_filename, csv_filename) - #timesheet = TimeSheet(semistructured_data) - #timesheet.report_issues() + entries = parse_timesheet(csv_filename) + for entry in entries: + if entry.time_code in ("HOL", "OTU", "VAC", "OPL", ): + if entry.time_code not in projects.keys(): + projects[entry.time_code] = {} + for date, hours in entry.data.items(): + projects[entry.time_code][date] = hours + else: + if entry.project not in projects.keys(): + projects[entry.project] = {} + for date, hours in entry.data.items(): + projects[entry.project][date] = hours + + pprint(projects) if __name__ == "__main__": filelist = [] diff --git a/parser/timeentry.py b/parser/timeentry.py deleted file mode 100644 index 51faff8..0000000 --- a/parser/timeentry.py +++ /dev/null @@ -1,163 +0,0 @@ -#!/usr/bin/env python3 - -from re import compile as re_compile - -PROJECT_PATTERN = re_compile("(OHORG\.(MTG|ONB|PFR|TPD|TOH)\.[01][0-9]\.00[0-9]|UNALW\.EMW\.0[0-9]\.00[0-9]|FSERV\.PPM\.00\.000|[0-9]{5}\.[0-9]{3}\.[0-9]{2}\.[0-9]{3})") -TIMETYPE_PATTERN = re_compile("(FSERV|[0-9]{3,5})") -DATE_PATTERN = re_compile("[ADFJMNOS][aceopu][bcglnprtvy] [0-9][0-9]?, 20[12][89012]") - -ENTRY_PATTERNS = ( - re_compile("[1-9][0-9]?"), - re_compile("(HOL|OTU|ST|VAC)"), - re_compile("(OHORG\.(MTG|ONB|PFR|TPD|TOH)\.[01][0-9]\.00[0-9]|UNALW\.EMW\.0[0-9]\.00[0-9]|FSERV\.PPM\.00\.000|[0-9]{5}\.[0-9]{3}\.[0-9]{2}\.[0-9]{3})"), - re_compile("(FSERV|[0-9]{3,5})"), - re_compile("."), - re_compile("Notes"), - re_compile("[ADFJMNOS][aceopu][bcglnprtvy] [0-9][0-9]?, 20[12][89012] -"), - re_compile("[ADFJMNOS][aceopu][bcglnprtvy] [0-9][0-9]?, 20[12][89012] Other"), - re_compile("."), - re_compile("Approved"), -) - -class TimeEntry(object): - def __init__(self, semistructured_data, timesheet_name, entry_number): - self._issues = [] - self._timesheet = timesheet_name - self._entry = entry_number - self._data = self.beat_data_with_a_stick(semistructured_data) - self.parse_entry_number() - self.parse_entry_project() - self.parse_entry_notes() - self.parse_entry_approval() - - def __str__(self): - return f"entry #{self._entry} in the timesheet for {self._timesheet}" - - def __len__(self): - return len(self._data) - - def report_issues(self): - if self._issues: - print(f"There are some issues in {self}...") - for issue in self._issues: - print(issue) - for index, line in enumerate(self._data): - print(index, line) - - def log_issue(self, *parts): - self._issues.append(''.join(parts)) - - - def assert_entry_item_any(self, index, label, should_be_any): - if self._data[index][0] not in should_be_any: - self.log_issue( - "parse_entry: ", - f"{label} not one of {', '.join(should_be_any)} ", - f"(is {self._data[index][0]})", - ) - - def assert_entry_item_match(self, index, label, pattern): - if not pattern.match(self._data[index][0]): - self.log_issue( - "parse_entry: ", - f"{label} does not look correct ", - f"(is {self._data[index][0]})", - ) - - def assert_entry_item_at(self, index, label, should_be_at): - if self._data[index][1] != should_be_at: - self.log_issue( - "parse_entry: ", - f"{label} not at {should_be_at}px left ", - f"(is at {self._data[index][1]}px)", - ) - - def parse_entry_number(self): - del self._data[0] - - def parse_entry_project(self): - self.assert_entry_item_any(0, "time code", ("HOL", "OTU", "ST", "VAC", )) - self.assert_entry_item_at(0, "time code", 40) - self._timecode = self._data[0][0] - del self._data[0] - - if self._timecode in ("HOL", "OTU", "VAC", ): - self._project = "" - self._timetype = "" - elif " " in self._data[0][0]: - self.assert_entry_item_match(0, "project", PROJECT_PATTERN) - self.assert_entry_item_at(0, "project", 120) - project_timetype = self._data[0][0].split(" ", 1) - self._project = project_timetype[0] - self._timetype = project_timetype[1] - del self._data[0] - else: - self.assert_entry_item_match(0, "project", PROJECT_PATTERN) - self.assert_entry_item_at(0, "project", 120) - self._project = self._data[0][0] - self.assert_entry_item_match(1, "time type", TIMETYPE_PATTERN) - self.assert_entry_item_at(1, "time type", 201) - self._timetype = self._data[0][0] - del self._data[0:2] - - self._name = self._data[0][0] - del self._data[0] - - def parse_entry_note(self): - if self._data[0][0] == "Line Note:": - # There is no start or end field, just the text field - self._notes.append({ - "start": "", - "end": "", - "text": self._data[1][0], - }) - del self._data[0:2] - else: - match = DATE_PATTERN.match(self._data[0][0]) - if match: - self._notes.append({}) - - # Handle the start field - self.assert_entry_item_at(0, "note start", 40) - self._notes[-1]["start"] = match - - # Handle the start/end separator if it is separate from the - # start field - if self._data[1][0] == "-": - del self._data[1] - - # Handle the end field - self.assert_entry_item_at(1, "note end", 99) - match = DATE_PATTERN.match(self._data[1][0]) - if match: - self._notes[-1]["end"] = match - - # Handle the 'Approved' note if it floated up - if self._data[2][0] == "Approved": - del self._data[2] - - # Handle the text field - self.assert_entry_item_at(2, "note text", 201) - self._notes[-1]["text"] = self._data[2][0] - - del self._data[0:3] - - def parse_entry_notes(self): - self._notes = [] - if len(self) and self._data[0][0] == "Notes": - del self._data[0] - - # One or two notes follow - if len(self): - self.parse_entry_note() - if len(self): - self.parse_entry_note() - - def parse_entry_approval(self): - if len(self) and self._data[0][0] == "Approved": - del self._data[0] - - def beat_data_with_a_stick(self, data): - data = [i for i in data if i[1]<400] + [i for i in data if i[1]>=400] - return data - diff --git a/parser/timesheet.py b/parser/timesheet.py index f26626f..e6be2f1 100644 --- a/parser/timesheet.py +++ b/parser/timesheet.py @@ -1,329 +1,243 @@ #!/usr/bin/env python3 +import datetime +import decimal +import csv +import sys from re import compile as re_compile -from .timeentry import TimeEntry, ENTRY_PATTERNS - -PAGE_PATTERNS = ( - re_compile("[ADFJMNOS][aceopu][bcglnprtvy] [1-9][0-9]?, 20[12][89012] [012][0-9]:[0-5][0-9]"), - re_compile("\(GMT-0[456]:00\) .*"), - re_compile("Page [1-9][0-9]?"), - re_compile("of [1-9][0-9]?"), - re_compile("Timesheet"), - re_compile("[ADFJMNOS][aceopu][bcglnprtvy] [1-9][0-9]?, 20[12][89012] - [ADFJMNOS][aceopu][bcglnprtvy] [1-9][0-9]?, 20[12][89012]"), - re_compile("ID"), - re_compile("Time Code"), - re_compile("Project"), - re_compile("TimeType"), -) - -def move_item(data, index, destination): - """Re-order a list, moving an item from `index` to `destination`.""" - if index < destination: - return data[:index] + data[index+1:destination+1] + data[index:index+1] + data[destination+1:] - elif index > destination: - return data[:destination] + data[index:index+1] + data[destination:index] + data[index+1:] - else: - return data - -def remove_item(data, index): - """Adjust a list, removing an item from `index`.""" - del data[index] - return data - -def is_multiple_8(string): - try: - return int(float(string)) % 8 == 0 - except: - return False - -class TimeSheet(object): - def __init__(self, semistructured_data): - self._warnings = [] - self._issues = [] - self._data = self.beat_data_with_a_stick(semistructured_data) - self.assert_header() - self.parse_header() - self.parse_footer() - self.parse_pages() - self.parse_entries() - - def __str__(self): - return self.header.get("dates", "TimeSheet(...)") - - def __len__(self): - return len(self._data) - - def report_issues(self): - if self._warnings: - print(f"There are some warnings in the timesheet for {self}...") - for warning in self._warnings: - print(warning) - if self._issues: - print(f"There are some issues in the timesheet for {self}...") - for issue in self._issues: - print(issue) - for index, line in enumerate(self._data): - print(index, line) - for entry in self._entries: - entry.report_issues() - - def log_issue(self, *parts): - self._issues.append(''.join(parts)) - - def log_warning(self, *parts): - self._warnings.append(''.join(parts)) - - def beat_data_with_a_stick(self, data): - """A fragile solution to the problems resulting from - PDF-to-HTML-to-long data conversions. +ID_PATTERN = re_compile("[1-2]?[0-9]$") +TIME_CODE_PATTERN = re_compile("(ST|VAC|HOL|OTU|OPL)") +PROJECT_PATTERN = re_compile("[A-Z0-9]{5}\.[A-Z0-9]{3}\.[A-Z0-9]{2}\.[A-Z0-9]{3}") +TIMETYPE_PATTERN = re_compile("[A-Z0-9.]{3,}") +WEEK_BEGINNING_AND_WEEK_PATTERN = re_compile("Week Beginning: [0-9][0-9] [ADFJMNOS][aceopu][bcglnprtvy], 20[12][89012]") +WEEK_BEGINNING_PATTERN = re_compile("Week Beginning:") +WEEK_PATTERN = re_compile("[0-9][0-9] [ADFJMNOS][aceopu][bcglnprtvy], 20[12][89012]") +HOURS_PATTERN = re_compile("[0-9][0-9]?\.(00|25|50|75)") +TOTAL_HOURS_PATTERN = re_compile("Total Hours for line ") +APPROVED_PATTERN = re_compile("Approved") +NOTES_PATTERN = re_compile("Notes") + +def printf(string, *variables): + """Print to STDERR with formatting.""" + sys.stderr.write(string.format(*variables)) + sys.stderr.write("\n") + +def is_approximately(location, target): + """Tests if a location is close enough to a target to be considered equal. + + PDFs store the rendered location of a textbox, not the mathematically- + ideal location. The net effect is that, while you can rely on the y + dimension to identify a row, you cannot rely on the x dimension to + identify a column. My solution is to make equivalence a bit fuzzy, to the + effect of +/- 5 pixels. + """ + return (target-8 <= location <= target+8) + +class TimeEntry(object): + def __init__(self): + self.label = None + self.project = None + self.time_code = None + self.data = {} + self.reference_date = None + self.in_notes = False + + def assert_equal(self, value, should_be): + if value != should_be: + printf("{0} is not {1}", value, should_be) + + def set_hours(self, day_offset, hours): + """Given a string like '1.25' and a day offset between 0 and 6, set + hours into a date. """ - # a small number of timesheets have a 'Doc.No.' field - if data[14][0] == "Doc.No." and data[17][0] == "1": - self.log_warning("beat_data_with_a_stick: killing 'Doc.No.' fields") - #Note: subtract 1 due to the other operation realigning indices - data = remove_item(data, 14) - data = remove_item(data, 17-1) - elif data[16][0] == "Doc.No." and data[19][0] == "1": - self.log_warning( - "beat_data_with_a_stick: ", - "killing 'Doc.No.' label and field", - ) - data = remove_item(data, 16) - data = remove_item(data, 19-1) - - # 'Post Status:' can float up if the status is 'Not posted' - if data[6][0] == "Post Status:" and data[7][0] == "Not posted": - self.log_warning( - "beat_data_with_a_stick: ", - "re-sorting post status label and field", - ) - #Note: add/subtract 1 due to the other operation realigning indices - data = move_item(data, 6, 17+1) - data = move_item(data, 7-1, 20) - - # 'Function:' and '[1] Full Time' can float up together - if data[4][0] == "Function:" and data[5][0] == "[1] Full Time": - self.log_warning( - "beat_data_with_a_stick: ", - "re-sorting function label and field", + if self.reference_date is None: + printf( + "hours ({0}) set before a reference date", + hours, ) - #Note: add/subtract 1 due to the other operation realigning indices - data = move_item(data, 4, 10+1) - data = move_item(data, 5-1, 13) - elif data[8][0] == "Function:" and data[11][0] == "[1] Full Time": - self.log_warning( - "beat_data_with_a_stick: ", - "re-sorting function label and field", + return + + target = self.reference_date + if day_offset != 0: + target += datetime.timedelta(days=day_offset) + self.data[target] = decimal.Decimal(hours) + + def set_total_week_hours(self, total_hours): + """Given a string like '1.25', validate set hours for a week.""" + if self.reference_date is None: + printf( + "total week hours ({0}) set before a reference date", + total_hours, ) - data = move_item(data, 8, 10) - data = move_item(data, 11, 13) - - # 'Percent Billability:' can float down - if data[28][0] == "Percent Billability:": - self.log_warning( - "beat_data_with_a_stick: ", - "re-sorting percent billability label", + return + + target = self.reference_date + sum_hours = decimal.Decimal(0) + for _ in range(7): + if target in self.data: + sum_hours += self.data[target] + target += datetime.timedelta(days=1) + + self.assert_equal(sum_hours, decimal.Decimal(total_hours)) + + def set_total_line_hours(self, total_hours): + """Given a string like '1.25', validate set hours for a line entry.""" + if self.reference_date is None: + printf( + "total line hours ({0}) set before a reference date", + total_hours, ) - data = move_item(data, 28, 25) + return - # 'Validation:' and 'Passed' can float up - if data[16][0] == "Validation:" and data[19][0] == "Passed": - self.log_warning( - "beat_data_with_a_stick: ", - "re-sorting validation label and field", - ) - data = move_item(data, 16, 17) - data = move_item(data, 19, 20) - - # 'Posted'/'Not posted' can float up - if data[17][0] == "Posted" or data[17][0] == "Not posted": - self.log_warning( - "beat_data_with_a_stick: ", - "re-sorting post status field", - ) - data = move_item(data, 17, 19) + sum_hours = decimal.Decimal(0) + for date, hours in self.data.items(): + sum_hours += hours - # 'ID', 'Time Code', 'Project', and 'TimeType' and float down - if data[41][0] == "ID" and data[42][0] == "Time Code" and data[43][0] == "Project" and data[44][0] == "TimeType": - self.log_warning( - "beat_data_with_a_stick: ", - "re-sorting ID, Time Code, Project, and TimeType header", - ) - data = move_item(data, 41, 30) - data = move_item(data, 42, 31) - data = move_item(data, 43, 32) - data = move_item(data, 44, 33) - elif data[43][0] == "ID" and data[44][0] == "Time Code" and data[45][0] == "Project" and data[46][0] == "TimeType": - self.log_warning( - "beat_data_with_a_stick: ", - "re-sorting ID, Time Code, Project, and TimeType header", - ) - data = move_item(data, 43, 30) - data = move_item(data, 44, 31) - data = move_item(data, 45, 32) - data = move_item(data, 46, 33) - - return data - - def assert_header_item(self, index, should_be): - if self._data[index][0] != should_be: - self.log_issue( - "assert_header: ", - f"item {index} is not {should_be} ", - f"(is {self._data[index][0]})", - ) + self.assert_equal(sum_hours, decimal.Decimal(total_hours)) - def assert_header_item_any(self, index, should_be_any): - if self._data[index][0] not in should_be_any: - self.log_issue( - "assert_header: ", - f"item {index} is not one of {', '.join(should_be_any)} ", - f"(is {self._data[index][0]})", - ) + def advance_reference_date(self): + """Mark the reference date as invalid.""" + self.reference_date = None - def assert_header(self): - """Validate a document based on the header lines.""" - self.assert_header_item(0, "Timesheet") - self.assert_header_item(2, "Location:") - self.assert_header_item(3, "[E01] Fors Marsh Group") - self.assert_header_item(4, "Department:") - self.assert_header_item_any(5, ("[3200] Advanced Analytics", "[3230] Data Management", )) - self.assert_header_item(6, "Employee Type:") - self.assert_header_item(7, "[1] Annual Salary") - self.assert_header_item(8, "Location (Default") - self.assert_header_item(9, "[LOCAL] Location") - self.assert_header_item(10, "Function:") - self.assert_header_item(11, "Exempt:") - self.assert_header_item(12, "Status:") - self.assert_header_item(13, "[1] Full Time") - self.assert_header_item(14, "Yes") - self.assert_header_item_any(15, ("Approved", "Closed", "On Hold [Draft]", )) - self.assert_header_item(16, "Post Status:") - self.assert_header_item(17, "Validation:") - self.assert_header_item(18, "Date/Time:") - self.assert_header_item_any(19, ("Posted", "Not posted", )) - self.assert_header_item_any(20, ("Passed", "Warnings", )) - #21 should be like "Mon N, YYYY HH:MM" - self.assert_header_item(22, "Total Timesheet:") - self.assert_header_item(23, "Standard Hours:") - self.assert_header_item(24, "Total Billable:") - self.assert_header_item(25, "Percent Billability:") - if not is_multiple_8(self._data[27][0]): - self.log_issue( - "assert_header: ", - "item 27 is not a multiple of 8 ", - f"(is {self._data[27]})", - ) - self.assert_header_item(30, "ID") - self.assert_header_item(31, "Time Code") - self.assert_header_item(32, "Project") - self.assert_header_item(33, "TimeType") - - def parse_header(self): - """Read data from the document header and clear those lines.""" - self.header = { - "dates": self._data[1][0], - "employ_location": self._data[3][0], - "employ_location_default": self._data[9][0], - "employ_dept": self._data[5][0], - "employ_type": self._data[7][0], - "employ_exempt": self._data[14][0], - "status": self._data[15][0], - "status_posting": self._data[19][0], - "status_validation": self._data[20][0], - "status_timestamp": self._data[21][0], - "hours": self._data[26][0], - "hours_minimum": self._data[27][0], - "hours_billable": self._data[28][0], - "percent_billable": self._data[29][0], - } - del self._data[:34] - - def parse_footer(self): - """Loop though lines to identify the document footer and clear it.""" - target = None - for n in range(len(self)-1, 0, -1): - if self._data[n][0] == "Hours Distribution by Time Code": - target = n - break - if target is None: - self.log_issue( - "parse_footer: ", - "could not locate document footer", - ) + def set_reference_date(self, date): + """Given a string like '01 Jan, 2022', set the reference date for + subsequent method calls. + """ + self.reference_date = datetime.datetime.strptime(date, "%d %b, %Y") + + def set_time_code(self, time_code): + """Given 'ST', 'HOL', or 'VAC', set the time code.""" + self.time_code = time_code + + def set_project(self, project): + """Given a string like '12345.123.12.123', set the official project + code. + """ + self.project = project + + def set_label(self, label): + """Given a string, set the human-readable project label.""" + if not self.label and not self.in_notes: + self.label = label + + def mark_notes(self): + """Mark that an entry is receiving notes. Subsequent labels should be + ignored. + """ + self.in_notes = True + +class TimeSheet(object): + def __init__(self, data): + self.data = data + self.entries = [] + for row in range(len(self.data)): + self.parse_row(row) + + def set_hours(self, day, hours): + """Given a string like '1.25' and a day offset between 0 and 6, set + hours into a date. + """ + if " " in hours: + two_hours = hours.split(" ", 1) + self.entries[-1].set_hours(day, two_hours[0]) + self.entries[-1].set_hours(day+1, two_hours[1]) else: - del self._data[target:] + self.entries[-1].set_hours(day, hours) + + def set_total_week_hours(self, total_hours): + """Given a string like '1.25', validate set hours for a week.""" + self.entries[-1].set_total_week_hours(total_hours) + + def set_total_line_hours(self, total_hours): + """Given a string like '1.25', validate set hours for a line entry.""" + self.entries[-1].set_total_line_hours(total_hours) - def parse_pages(self): - """Loop through lines to identify page headers and footers, and clear - those lines. + def advance_reference_date(self): + """Mark the reference date as invalid.""" + self.entries[-1].advance_reference_date() + + def set_reference_date(self, date): + """Given a string like '01 Jan, 2022', set the reference date for + subsequent method calls. """ - # Pages begin with "Timesheet" - page_breaks = [] - for index, line in enumerate(self._data): - if line[0] == "Timesheet": - page_breaks.append(index) - if not page_breaks: - self.log_issue( - "parse_pages: ", - "could not locate any page breaks", - ) + self.entries[-1].set_reference_date(date) + + def set_time_code(self, time_code): + """Given 'ST', 'HOL', or 'VAC', set the time code.""" + self.entries[-1].set_time_code(time_code) + + def set_project(self, project): + """Given a string like '12345.123.12.123', set the official project + code. + """ + self.entries[-1].set_project(project) + + def set_label(self, label): + """Given a string, set the human-readable project label.""" + self.entries[-1].set_label(label) - # At each page break, there is a page footer sequence before: - # + "Mmm N, YYYY HH:MM" - # + "(GMT-0H:00) TZNAME" - # + "Page N" - # + "of N" - # ...and a page header sequence after: - # + "Mmm N, YYYY - Mmm N, YYYY" - # + "ID" - # + "Time Code" - # + "Project" - # + "TimeType" - # None of this is useful data. Immediately delete it. - for page_break in reversed(page_breaks): - i = page_break - 7 - # don't question why this is necessary - if str(self)=="May 16, 2019 - May 31, 2019" and page_break == 275: - i -= 30 - - while i <= page_break: - j = 0 - while j < len(PAGE_PATTERNS): - if PAGE_PATTERNS[j].match(self._data[i][0]): - del self._data[i] - j = 0 - else: - j += 1 - i += 1 - - def parse_entries(self): - """Loop through lines to identify time entries.""" - i = 0 - entries = [] - while i < len(self): - if self._data[i][1] == 20: - if ENTRY_PATTERNS[0].match(self._data[i][0]): - entries.append([self._data[i]]) - else: - self.log_issue( - "parse_entries: ", - "something unexpected in the entry start position ", - f"({self._data[i][0]})", - ) + def mark_notes(self): + """Mark that an entry is receiving notes. Subsequent labels should be + ignored. + """ + self.entries[-1].mark_notes() + + def parse_row(self, index): + """Parse a row of data and dispatch between time entry methods.""" + if len(self.data[index])<3: + return + + if APPROVED_PATTERN.match(self.data[index][2]): + pass + elif WEEK_BEGINNING_AND_WEEK_PATTERN.match(self.data[index][2]): + self.set_reference_date(self.data[index][2].split(": ", 1)[1]) + elif WEEK_BEGINNING_PATTERN.match(self.data[index][2]): + self.advance_reference_date() + elif TOTAL_HOURS_PATTERN.match(self.data[index][2]): + self.set_total_line_hours(self.data[index][2].split(": ", 1)[1]) + elif WEEK_PATTERN.match(self.data[index][2]): + self.set_reference_date(self.data[index][2]) + elif HOURS_PATTERN.match(self.data[index][2]): + x = int(float(self.data[index][0])) + if is_approximately(x, 572): + self.set_hours(0, self.data[index][2]) + elif is_approximately(x, 597): + self.set_hours(1, self.data[index][2]) + elif is_approximately(x, 622): + self.set_hours(2, self.data[index][2]) + elif is_approximately(x, 647): + self.set_hours(3, self.data[index][2]) + elif is_approximately(x, 672): + self.set_hours(4, self.data[index][2]) + elif is_approximately(x, 697): + self.set_hours(5, self.data[index][2]) + elif is_approximately(x, 722): + self.set_hours(6, self.data[index][2]) + elif is_approximately(x, 751): + self.set_total_week_hours(self.data[index][2]) else: - if self._data[i][0] in ("Mon", "Tue Wed", "Thu", "Fri", "Sat", "Sun", "Total", ): - pass - elif len(entries)>0: - entries[-1].append(self._data[i]) - else: - self.log_issue( - "parse_entries: ", - "something unexpected before any entries started ", - f"({self._data[i][0]})", - ) - i += 1 - - self._entries = [TimeEntry(e, str(self), e[0][0]) for e in entries] + printf( + "found hours ({0}) but they fell through all conditions", + self.data[index][2], + ) + elif TIME_CODE_PATTERN.match(self.data[index][2]): + self.set_time_code(self.data[index][2]) + elif PROJECT_PATTERN.match(self.data[index][2]): + self.set_project(self.data[index][2]) + elif TIMETYPE_PATTERN.match(self.data[index][2]): + pass + elif ID_PATTERN.match(self.data[index][2]): + self.entries.append(TimeEntry()) + elif NOTES_PATTERN.match(self.data[index][2]): + self.mark_notes() + else: + self.set_label(self.data[index][2]) + +def parse(filename): + with open(filename, "r", newline="") as f: + reader = csv.reader(f) + timesheet = TimeSheet([row for row in reader]) + entries = timesheet.entries + return entries diff --git a/parser/xml.py b/parser/xml.py index 7ac3b7f..3b6a92a 100644 --- a/parser/xml.py +++ b/parser/xml.py @@ -187,11 +187,11 @@ class TimeSheetHandler(handler.ContentHandler): loc = (int(float(location_xy[0])), int(float(location_xy[1])), ) if self.pagenum == "1": - if is_approximately(loc, (333, 524, )): + if is_approximately(loc, (335, 524, )): self.in_header_footer_parts["timesheet_label"] = True return True - elif is_approximately(loc, (333, 504, )): + elif is_approximately(loc, (335, 504, )): self.in_header_footer_parts["timesheet_value"] = True return True -- 2.45.2