~dricottone/digestion

ref: ce5b7f835e1ffb0d4aed72bbaf9920a025293312 digestion/main.go -rw-r--r-- 2.7 KiB
ce5b7f83Dominic Ricottone Refactored and refined API 4 years ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
package main

import (
	"fmt"
	"os"
	"strings"
	"io"
	"bufio"
	"regexp"

	"git.dominic-ricottone.com/digestion/message"
)

const (
	ParsingPreHeader  = "ParsingPreHeader"
	ParsingHeader     = "ParsingHeader"
	ParsingPartHeader = "ParsingPartHeader"
	ParsingContent    = "ParsingContent"
)

func contains_nonempty(s1, s2 string) bool {
	return s2 != "" && strings.Contains(s1, s2)
}

func first_submatch(r regexp.Regexp, s string) string {
	matches := r.FindStringSubmatch(s)
	if matches != nil {
		return strings.Replace(matches[1], " ", "", -1)
	} else {
		return ""
	}
}

func parse_stream(reader io.Reader) {
	// Create scanner from reader
	input := bufio.NewScanner(reader)

	// Compile regular expressions
	re_message_break, err := regexp.Compile("^-+$")
	if err != nil {
		fmt.Printf("internal error - %v\n", err)
		os.Exit(1)
	}
	re_header, err := regexp.Compile(
		"^(?:Date|From|Subject|To|Cc|Message-ID|" +
		"Content-(?:Type|Transfer-Encoding)):",
	)
	if err != nil {
		fmt.Printf("internal error - %v\n", err)
		os.Exit(1)
	}
	re_multipart, err := regexp.Compile(".*boundary=\"(.*)\".*")
	if err != nil {
		fmt.Printf("internal error - %v\n", err)
		os.Exit(1)
	}

	parsing := ParsingPreHeader
	current_message := message.NewMessage()
	current_boundary := ""

	for input.Scan() {
		line := input.Text()
		tline := strings.TrimSpace(line)

		if parsing == ParsingPreHeader {
			if re_header.MatchString(tline) {
				parsing = ParsingHeader
				current_message.SetHeader(tline)
			}
		} else if parsing == ParsingHeader {
			if tline == "" {
				parsing = ParsingContent
				current_boundary = first_submatch(*re_multipart, current_message.Header.ContentType)
			} else {
				current_message.SetHeader(line)
			}
		} else if parsing == ParsingPartHeader {
			if tline == "" {
				parsing = ParsingContent
			} else {
				current_message.SetPartHeader(line)
			}
		} else if parsing == ParsingContent {
			if re_message_break.MatchString(tline) {
				parsing = ParsingPreHeader
				current_message.Dump()
				current_message = message.NewMessage()
				current_boundary = ""
			} else if contains_nonempty(tline, current_boundary) {
				parsing = ParsingPartHeader
				current_message.AppendPart()
			} else {
				current_message.AppendContent(tline)
			}
		}
	}

	// Check for scanner errors
	if err = input.Err(); err != nil {
		fmt.Printf("internal error - %v\n", err)
		os.Exit(1)
	}
}

func parse_file(filename string) {
	// Check file
	file, err := os.Open(filename)
	if err != nil {
		fmt.Printf("cannot read file '%s'\n", filename)
		os.Exit(1)
	}
	defer file.Close()

	// Parse
	parse_stream(file)
}

func main() {
	// Check STDIN
	_, err := os.Stdin.Stat()
	if err != nil {
		fmt.Println("cannot read input")
		os.Exit(1)
	}

	// Parse
	parse_stream(os.Stdin)
}