~dricottone/digestion

ref: ee94f07cfd199e1fd4738e541dae35d3e957ff93 digestion/main.go -rw-r--r-- 2.9 KiB
ee94f07cDominic Ricottone Updating go version, dependency version, and module name 2 years ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
package main

import (
	"fmt"
	"os"
	"strings"
	"io"
	"bufio"
	"regexp"
	"flag"

	"git.dominic-ricottone.com/~dricottone/digestion/message"
)

const LINE_LENGTH = 80

const (
	ParsingPreHeader  = "ParsingPreHeader"
	ParsingHeader     = "ParsingHeader"
	ParsingPartHeader = "ParsingPartHeader"
	ParsingContent    = "ParsingContent"
)

func contains_nonempty(s1, s2 string) bool {
	return s2 != "" && strings.Contains(s1, s2)
}

func first_submatch(r regexp.Regexp, s string) string {
	matches := r.FindStringSubmatch(s)
	if matches != nil {
		return strings.Replace(matches[1], " ", "", -1)
	} else {
		return ""
	}
}

func parse_stream(reader io.Reader, length int) {
	// Create scanner from reader
	input := bufio.NewScanner(reader)

	// Compile regular expressions
	re_message_break, err := regexp.Compile("^-{5,}$")
	if err != nil {
		fmt.Printf("internal error - %v\n", err)
		os.Exit(1)
	}
	re_header, err := regexp.Compile(
		"^(?:Date|From|Subject|To|Cc|Message-ID|" +
		"Content-(?:Type|Transfer-Encoding)):",
	)
	if err != nil {
		fmt.Printf("internal error - %v\n", err)
		os.Exit(1)
	}
	re_multipart, err := regexp.Compile(".*boundary=\"(.*)\".*")
	if err != nil {
		fmt.Printf("internal error - %v\n", err)
		os.Exit(1)
	}

	parsing := ParsingContent
	current_message := message.NewMessage()
	current_boundary := ""

	for input.Scan() {
		line := input.Text()
		tline := strings.TrimSpace(line)

		if parsing == ParsingPreHeader {
			if re_header.MatchString(tline) {
				parsing = ParsingHeader
				current_message.SetHeader(tline)
			}
		} else if parsing == ParsingHeader {
			if tline == "" {
				parsing = ParsingContent
				current_boundary = first_submatch(*re_multipart, current_message.Header.ContentType)
			} else {
				current_message.SetHeader(line)
			}
		} else if parsing == ParsingPartHeader {
			if tline == "" {
				parsing = ParsingContent
			} else {
				current_message.SetPartHeader(line)
			}
		} else if parsing == ParsingContent {
			if re_message_break.MatchString(tline) {
				parsing = ParsingPreHeader
				current_message.Dump(length)
				current_message = message.NewMessage()
				current_boundary = ""
			} else if contains_nonempty(tline, current_boundary) {
				parsing = ParsingPartHeader
				current_message.AppendPart()
			} else {
				current_message.AppendContent(tline)
			}
		}
	}

	// Check for scanner errors
	if err = input.Err(); err != nil {
		fmt.Printf("internal error - %v\n", err)
		os.Exit(1)
	}
}

func parse_file(filename string, length int) {
	// Check file
	file, err := os.Open(filename)
	if err != nil {
		fmt.Printf("cannot read file '%s'\n", filename)
		os.Exit(1)
	}
	defer file.Close()

	// Parse
	parse_stream(file, length)
}

func main() {
	// Check STDIN
	_, err := os.Stdin.Stat()
	if err != nil {
		fmt.Println("cannot read input")
		os.Exit(1)
	}

	// Look for arguments
	var length = flag.Int("length", LINE_LENGTH, "maximum length of lines")
	flag.Parse()

	// Parse
	parse_stream(os.Stdin, *length)
}