~dricottone/fmg-timesheets

ref: f441822f16af2386abe50dc1d4b8161566843f3e fmg-timesheets/parse.py -rw-r--r-- 1.1 KiB
f441822fDominic Ricottone Significant updates 2 years ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#!/usr/bin/env python3

"""The parsers are developed, debugged, and refactored in this file.

When they mature, I refactor them into standalone modules under the `parser`
directory.

Eventually, the entire parse step will mature and be abstracted into a
single function call, which will be appropriate to call in `main.py`.

If you can see this file, then I'm not done yet.
"""

from pprint import pprint

from parser.html import parse as parse_html
from parser.pdf import parse as parse_pdf
from parser.timesheet import Timesheet

def read_timesheet(filename):
    unstructured_data = parse_pdf(filename)
    semistructured_data = parse_html(unstructured_data)
    return semistructured_data

def parse_timesheet(data):
    t = Timesheet(data)
    t.report_issues()
    #for index, line in enumerate(t._data):
    #    print(index, line)
    return []

def extract_projects(structured_data):
    return []

def timesheet(filename):
    unstructured_data = read_timesheet(filename)
    structured_data = parse_timesheet(unstructured_data)
    projects = extract_projects(structured_data)
    return projects