~dricottone/digestion

ref: 4ee014a14f1f8605e585d22fc78ba17d366a7362 digestion/main.go -rw-r--r-- 2.9 KiB
4ee014a1Dominic Ricottone Golang version bump 2 years ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
package main

import (
	"fmt"
	"os"
	"strings"
	"io"
	"bufio"
	"regexp"
	"flag"

	"git.dominic-ricottone.com/digestion/message"
)

const LINE_LENGTH = 80

const (
	ParsingPreHeader  = "ParsingPreHeader"
	ParsingHeader     = "ParsingHeader"
	ParsingPartHeader = "ParsingPartHeader"
	ParsingContent    = "ParsingContent"
)

func contains_nonempty(s1, s2 string) bool {
	return s2 != "" && strings.Contains(s1, s2)
}

func first_submatch(r regexp.Regexp, s string) string {
	matches := r.FindStringSubmatch(s)
	if matches != nil {
		return strings.Replace(matches[1], " ", "", -1)
	} else {
		return ""
	}
}

func parse_stream(reader io.Reader, length int) {
	// Create scanner from reader
	input := bufio.NewScanner(reader)

	// Compile regular expressions
	re_message_break, err := regexp.Compile("^-{5,}$")
	if err != nil {
		fmt.Printf("internal error - %v\n", err)
		os.Exit(1)
	}
	re_header, err := regexp.Compile(
		"^(?:Date|From|Subject|To|Cc|Message-ID|" +
		"Content-(?:Type|Transfer-Encoding)):",
	)
	if err != nil {
		fmt.Printf("internal error - %v\n", err)
		os.Exit(1)
	}
	re_multipart, err := regexp.Compile(".*boundary=\"(.*)\".*")
	if err != nil {
		fmt.Printf("internal error - %v\n", err)
		os.Exit(1)
	}

	parsing := ParsingContent
	current_message := message.NewMessage()
	current_boundary := ""

	for input.Scan() {
		line := input.Text()
		tline := strings.TrimSpace(line)

		if parsing == ParsingPreHeader {
			if re_header.MatchString(tline) {
				parsing = ParsingHeader
				current_message.SetHeader(tline)
			}
		} else if parsing == ParsingHeader {
			if tline == "" {
				parsing = ParsingContent
				current_boundary = first_submatch(*re_multipart, current_message.Header.ContentType)
			} else {
				current_message.SetHeader(line)
			}
		} else if parsing == ParsingPartHeader {
			if tline == "" {
				parsing = ParsingContent
			} else {
				current_message.SetPartHeader(line)
			}
		} else if parsing == ParsingContent {
			if re_message_break.MatchString(tline) {
				parsing = ParsingPreHeader
				current_message.Dump(length)
				current_message = message.NewMessage()
				current_boundary = ""
			} else if contains_nonempty(tline, current_boundary) {
				parsing = ParsingPartHeader
				current_message.AppendPart()
			} else {
				current_message.AppendContent(tline)
			}
		}
	}

	// Check for scanner errors
	if err = input.Err(); err != nil {
		fmt.Printf("internal error - %v\n", err)
		os.Exit(1)
	}
}

func parse_file(filename string, length int) {
	// Check file
	file, err := os.Open(filename)
	if err != nil {
		fmt.Printf("cannot read file '%s'\n", filename)
		os.Exit(1)
	}
	defer file.Close()

	// Parse
	parse_stream(file, length)
}

func main() {
	// Check STDIN
	_, err := os.Stdin.Stat()
	if err != nil {
		fmt.Println("cannot read input")
		os.Exit(1)
	}

	// Look for arguments
	var length = flag.Int("length", LINE_LENGTH, "maximum length of lines")
	flag.Parse()

	// Parse
	parse_stream(os.Stdin, *length)
}