From 62eccd92f6655185139d9015022c8b9953a51521 Mon Sep 17 00:00:00 2001 From: Dominic Ricottone Date: Wed, 19 Aug 2020 14:54:54 -0400 Subject: [PATCH] Refactored into subdirectories; Added common decoders --- decoder/base64.go | 18 ++++ decoder/encodings.go | 53 ++++++++++ decoder/quotedprintable.go | 20 ++++ main.go | 203 ++----------------------------------- message/header.go | 113 +++++++++++++++++++++ message/message.go | 105 +++++++++++++++++++ message/part.go | 24 +++++ 7 files changed, 344 insertions(+), 192 deletions(-) create mode 100644 decoder/base64.go create mode 100644 decoder/encodings.go create mode 100644 decoder/quotedprintable.go create mode 100644 message/header.go create mode 100644 message/message.go create mode 100644 message/part.go diff --git a/decoder/base64.go b/decoder/base64.go new file mode 100644 index 0000000..9eff889 --- /dev/null +++ b/decoder/base64.go @@ -0,0 +1,18 @@ +package decoder + +import ( + "encoding/base64" +) + +func decode_base64(lines []string) ([]string, error) { + decoded := []string{} + for _, line := range lines { + decoded_line, err := base64.StdEncoding.DecodeString(line) + if err != nil { + return decoded, err + } + decoded = append(decoded, string(decoded_line)) + } + return decoded, nil +} + diff --git a/decoder/encodings.go b/decoder/encodings.go new file mode 100644 index 0000000..b7ab551 --- /dev/null +++ b/decoder/encodings.go @@ -0,0 +1,53 @@ +package decoder + +import ( + "strings" +) + +const ( + UTF8 = "UTF8" + Base64 = "Base64" + QuotedPrintable = "QuotedPrintable" + Unknown = "Unknown" +) + +func determine_encoding(encoding string) string { + if strings.Contains(encoding, "base64") { + return Base64 + } else if strings.Contains(encoding, "quoted-printable") { + return QuotedPrintable + } else if strings.Contains(encoding, "utf-8") { + return UTF8 + } else { + return Unknown + } +} + +// Translate an encoding label into a numeric value according to preference +// of use in processing. Preference tiers are: +// 1. UTF-8 +// 2. base64, quoted-printable +func EvaluateEncoding(encoding string) int { + switch determine_encoding(encoding) { + case UTF8: + return 0 + case Base64: + return 1 + case QuotedPrintable: + return 1 + default: + return 10 + } +} + +func DecodeArray(lines []string, encoding string) ([]string, error) { + switch determine_encoding(encoding) { + case Base64: + return decode_base64(lines) + case QuotedPrintable: + return decode_quotedprintable(lines) + default: + return lines, nil + } +} + diff --git a/decoder/quotedprintable.go b/decoder/quotedprintable.go new file mode 100644 index 0000000..7288c1a --- /dev/null +++ b/decoder/quotedprintable.go @@ -0,0 +1,20 @@ +package decoder + +import ( + "io/ioutil" + "mime/quotedprintable" + "strings" +) + +func decode_quotedprintable(lines []string) ([]string, error) { + decoded := []string{} + for _, line := range lines { + decoded_line, err := ioutil.ReadAll(quotedprintable.NewReader(strings.NewReader(line))) + if err != nil { + return nil, err + } + decoded = append(decoded, string(decoded_line)) + } + return decoded, nil +} + diff --git a/main.go b/main.go index 1132392..4949f3c 100644 --- a/main.go +++ b/main.go @@ -8,186 +8,9 @@ import ( "bufio" "regexp" - "git.dominic-ricottone.com/textwrap/common" + "git.dominic-ricottone.com/digestion/message" ) -// An enumeration of header parts -const( - HeaderSubject = "HeaderSubject" - HeaderDate = "HeaderDate" - HeaderFrom = "HeaderFrom" - HeaderTo = "HeaderTo" - HeaderCc = "HeaderCc" - HeaderMessageID = "HeaderMessageID" - HeaderContentType = "HeaderContentType" - HeaderContentEncoding = "HeaderContentEncoding" -) - -// A message header container, used within message containers -type MessageHeader struct { - Subject string - Date string - From string - To string - Cc string - MessageID string - ContentType string - LastSet string -} - -// Builder for a message header -func NewHeader() *MessageHeader { - return &MessageHeader{"", "", "", "", "", "", "", ""} -} - -// A message part header container, used within message part containers -type MessagePartHeader struct { - ContentType string - ContentEncoding string - LastSet string -} - -// Builder for a message part header -func NewPartHeader() *MessagePartHeader { - return &MessagePartHeader{"", "", ""} -} - -// A message part container, used within message containers -type MessagePart struct { - Header *MessagePartHeader - Content []string -} - -// Builder for a message part -func NewPart() *MessagePart { - return &MessagePart{NewPartHeader(), []string{""}} -} - -// A message container -type Message struct { - Header *MessageHeader - Parts []*MessagePart - PartBoundary *regexp.Regexp -} - -// Builder for a message -func NewMessage() *Message { - return &Message{NewHeader(), []*MessagePart{NewPart()}, nil} -} - -// Message setters -func (m *Message) SetHeader(line string) { - if strings.HasPrefix(line, "Subject:") { - m.Header.Subject = line[8:] - m.Header.LastSet = HeaderSubject - } else if strings.HasPrefix(line, "Date:") { - m.Header.Date = line[5:] - m.Header.LastSet = HeaderDate - } else if strings.HasPrefix(line, "From:") { - m.Header.From = line[5:] - m.Header.LastSet = HeaderFrom - } else if strings.HasPrefix(line, "To:") { - m.Header.To = line[3:] - m.Header.LastSet = HeaderTo - } else if strings.HasPrefix(line, "Cc:") { - m.Header.Cc = line[3:] - m.Header.LastSet = HeaderCc - } else if strings.HasPrefix(line, "Message-ID:") { - m.Header.MessageID = line[11:] - m.Header.LastSet = HeaderMessageID - } else if strings.HasPrefix(line, "Content-Type:") { - m.Header.ContentType = line[13:] - m.Header.LastSet = HeaderContentType - } -} - -func (m *Message) AppendLastHeader(s string) { - switch m.Header.LastSet { - case HeaderSubject: - m.Header.Subject += " " + s - case HeaderDate: - m.Header.Date += " " + s - case HeaderFrom: - m.Header.From += " " + s - case HeaderTo: - m.Header.To += " " + s - case HeaderCc: - m.Header.Cc += " " + s - case HeaderMessageID: - m.Header.MessageID += " " + s - case HeaderContentType: - m.Header.ContentType += " " + s - } -} - -func (m *Message) SetPartHeader(line string) { - if strings.HasPrefix(line, "Content-Type:") { - m.Parts[len(m.Parts)-1].Header.ContentType = line[13:] - m.Parts[len(m.Parts)-1].Header.LastSet = HeaderContentType - } else if strings.HasPrefix(line, "Content-Transfer-Encoding:") { - m.Parts[len(m.Parts)-1].Header.ContentEncoding = line[26:] - m.Parts[len(m.Parts)-1].Header.LastSet = HeaderContentEncoding - } -} - -func (m *Message) AppendLastPartHeader(s string) { - switch m.Parts[len(m.Parts)-1].Header.LastSet { - case HeaderContentType: - m.Parts[len(m.Parts)-1].Header.ContentType += " " + s - case HeaderContentEncoding: - m.Parts[len(m.Parts)-1].Header.ContentEncoding += " " + s - } -} - -func (m *Message) AppendPart() { - m.Parts = append(m.Parts, NewPart()) -} - -func (m *Message) AppendContent(s string) { - i := len(m.Parts)-1 - m.Parts[i].Content = append(m.Parts[i].Content, s) -} - -func (m *Message) FindBoundary(re *regexp.Regexp) { - match := re.FindStringSubmatch(m.Header.ContentType) - if match != nil { - boundary := strings.Replace(match[1], " ", "", -1) - m.PartBoundary, _ = regexp.Compile(".*" + boundary + ".*") - } -} - -// Message logic -func (m *Message) MatchBoundary(line string) bool { - if m.PartBoundary != nil { - return m.PartBoundary.MatchString(line) - } else { - return false - } -} - -// A message printer -func (m *Message) Dump() { - fmt.Printf("Subject: %s\n", m.Header.Subject) - fmt.Printf("Date: %s\n", m.Header.Date) - fmt.Printf("From: %s\n", m.Header.From) - fmt.Printf("To: %s\n", m.Header.To) - fmt.Printf("Cc: %s\n", m.Header.Cc) - fmt.Printf("MessageID: %s\n", m.Header.MessageID) - fmt.Printf("ContentType: %s\n", m.Header.ContentType) - for i := 0; i < len(m.Parts); i++ { - fmt.Printf("Part %d:\n", i) - fmt.Printf("ContentType: %s\n", m.Parts[i].Header.ContentType) - fmt.Printf("ContentEncoding: %s\n", m.Parts[i].Header.ContentEncoding) - - wrapped, _ := common.WrapArray(m.Parts[i].Content, 80) - for j := 0; j < len(wrapped); j++ { - fmt.Printf("%s\n", wrapped[j]) - } - fmt.Println("EOF") - } -} - -// Parser statuses const ( ParsingPreHeader = "ParsingPreHeader" ParsingHeader = "ParsingHeader" @@ -220,7 +43,7 @@ func parse_stream(reader io.Reader) { } parsing := ParsingPreHeader - message := NewMessage() + current_message := message.NewMessage() for input.Scan() { line := input.Text() @@ -229,35 +52,31 @@ func parse_stream(reader io.Reader) { if parsing == ParsingPreHeader { if re_header.MatchString(tline) { parsing = ParsingHeader - message.SetHeader(tline) + current_message.SetHeader(tline) } } else if parsing == ParsingHeader { if tline == "" { parsing = ParsingContent - message.FindBoundary(re_multipart) - } else if strings.HasPrefix(line, "\t") { - message.AppendLastHeader(tline) + current_message.FindBoundary(re_multipart) } else { - message.SetHeader(tline) + current_message.SetHeader(line) } } else if parsing == ParsingPartHeader { if tline == "" { parsing = ParsingContent - } else if strings.HasPrefix(line, "\t") { - message.AppendLastPartHeader(tline) } else { - message.SetPartHeader(tline) + current_message.SetPartHeader(line) } } else if parsing == ParsingContent { if re_message_break.MatchString(tline) { parsing = ParsingPreHeader - message.Dump() - message = NewMessage() - } else if message.MatchBoundary(tline) { + current_message.Dump() + current_message = message.NewMessage() + } else if current_message.MatchBoundary(tline) { parsing = ParsingPartHeader - message.AppendPart() + current_message.AppendPart() } else { - message.AppendContent(tline) + current_message.AppendContent(tline) } } } diff --git a/message/header.go b/message/header.go new file mode 100644 index 0000000..008a5c6 --- /dev/null +++ b/message/header.go @@ -0,0 +1,113 @@ +package message + +import ( + "strings" +) + +const ( + HeaderSubject = "HeaderSubject" + HeaderDate = "HeaderDate" + HeaderFrom = "HeaderFrom" + HeaderTo = "HeaderTo" + HeaderCc = "HeaderCc" + HeaderMessageID = "HeaderMessageID" + HeaderContentType = "HeaderContentType" + HeaderContentEncoding = "HeaderContentEncoding" +) + +// Message headers +type MessageHeader struct { + Subject string + Date string + From string + To string + Cc string + MessageID string + ContentType string + LastSet string +} + +func NewHeader() *MessageHeader { + return &MessageHeader{"", "", "", "", "", "", "", ""} +} + +func (m *MessageHeader) SetHeader(s string) { + if strings.HasPrefix(s, "\t") { + m.append_last_set(s) + } else if strings.HasPrefix(s, "Subject:") { + m.Subject = strings.TrimSpace(s[8:]) + m.LastSet = HeaderSubject + } else if strings.HasPrefix(s, "Date:") { + m.Date = strings.TrimSpace(s[5:]) + m.LastSet = HeaderDate + } else if strings.HasPrefix(s, "From:") { + m.From = strings.TrimSpace(s[5:]) + m.LastSet = HeaderFrom + } else if strings.HasPrefix(s, "To:") { + m.To = strings.TrimSpace(s[3:]) + m.LastSet = HeaderTo + } else if strings.HasPrefix(s, "Cc:") { + m.Cc = strings.TrimSpace(s[3:]) + m.LastSet = HeaderCc + } else if strings.HasPrefix(s, "Message-ID:") { + m.MessageID = strings.TrimSpace(s[11:]) + m.LastSet = HeaderMessageID + } else if strings.HasPrefix(s, "Content-Type:") { + m.ContentType = strings.TrimSpace(s[13:]) + m.LastSet = HeaderContentType + } +} + +func (m *MessageHeader) append_last_set(s string) { + s = strings.TrimSpace(s) + switch m.LastSet { + case HeaderSubject: + m.Subject += " " + s + case HeaderDate: + m.Date += " " + s + case HeaderFrom: + m.From += " " + s + case HeaderTo: + m.To += " " + s + case HeaderCc: + m.Cc += " " + s + case HeaderMessageID: + m.MessageID += " " + s + case HeaderContentType: + m.ContentType += " " + s + } +} + +// Message part headers +type MessagePartHeader struct { + ContentType string + ContentEncoding string + LastSet string +} + +func NewPartHeader() *MessagePartHeader { + return &MessagePartHeader{"", "", ""} +} + +func (m *MessagePartHeader) SetHeader(s string) { + if strings.HasPrefix(s, "\t") { + m.append_last_set(s) + } else if strings.HasPrefix(s, "Content-Type:") { + m.ContentType = strings.TrimSpace(s[13:]) + m.LastSet = HeaderContentType + } else if strings.HasPrefix(s, "Content-Transfer-Encoding:") { + m.ContentEncoding = strings.TrimSpace(s[26:]) + m.LastSet = HeaderContentEncoding + } +} + +func (m *MessagePartHeader) append_last_set(s string) { + s = strings.TrimSpace(s) + switch m.LastSet { + case HeaderContentType: + m.ContentType += " " + s + case HeaderContentEncoding: + m.ContentEncoding += " " + s + } +} + diff --git a/message/message.go b/message/message.go new file mode 100644 index 0000000..b159902 --- /dev/null +++ b/message/message.go @@ -0,0 +1,105 @@ +package message + +import ( + "fmt" + "strings" + "regexp" + + textwrap "git.dominic-ricottone.com/textwrap/common" + + "git.dominic-ricottone.com/digestion/decoder" +) + +type Message struct { + Header *MessageHeader + Parts []*MessagePart + PartBoundary *regexp.Regexp +} + +func NewMessage() *Message { + return &Message{NewHeader(), []*MessagePart{NewPart()}, nil} +} + +func (m *Message) SetHeader(s string) { + m.Header.SetHeader(s) +} + +func (m *Message) SetPartHeader(s string) { + m.Parts[len(m.Parts)-1].Header.SetHeader(s) +} + +func (m *Message) AppendPart() { + m.Parts = append(m.Parts, NewPart()) +} + +func (m *Message) AppendContent(s string) { + i := len(m.Parts)-1 + m.Parts[i].Content = append(m.Parts[i].Content, s) +} + +func (m *Message) FindBoundary(re *regexp.Regexp) { + match := re.FindStringSubmatch(m.Header.ContentType) + if match != nil { + boundary := strings.Replace(match[1], " ", "", -1) + m.PartBoundary, _ = regexp.Compile(".*" + boundary + ".*") + } +} + +func (m *Message) MatchBoundary(line string) bool { + if m.PartBoundary != nil { + return m.PartBoundary.MatchString(line) + } else { + return false + } +} + +func (m *Message) DetermineBestPart() int { + // Handle cases with obvious best part + number_parts := len(m.Parts) + if number_parts == 0 { + return -1 + } else if number_parts == 1 { + return 0 + } + + // Evaluate each part--lower is better + evaluations := []int{} + for i := 0; i < number_parts; i++ { + value := m.Parts[i].evaluateContentType() + value += m.Parts[i].evaluateContentEncoding() + evaluations = append(evaluations, value) + } + + // Find minimum value and return that part index + best_part_index := 0 + for i := 1; i < number_parts; i++ { + if evaluations[i] < evaluations[best_part_index] { + best_part_index = i + } + } + return best_part_index +} + +func (m *Message) Dump() { + fmt.Printf("Subject: %s\n", m.Header.Subject) + fmt.Printf("Date: %s\n", m.Header.Date) + fmt.Printf("From: %s\n", m.Header.From) + fmt.Printf("To: %s\n", m.Header.To) + fmt.Printf("Cc: %s\n", m.Header.Cc) + fmt.Printf("MessageID: %s\n", m.Header.MessageID) + fmt.Printf("ContentType: %s\n", m.Header.ContentType) + + if index := m.DetermineBestPart(); index != -1 { + fmt.Printf("ContentType: %s\n", m.Parts[index].Header.ContentType) + fmt.Printf("ContentEncoding: %s\n", m.Parts[index].Header.ContentEncoding) + + decoded, _ := decoder.DecodeArray(m.Parts[index].Content, m.Parts[index].Header.ContentEncoding) + + wrapped, _ := textwrap.WrapArray(decoded, 80) + + for _, line := range wrapped { + fmt.Printf("%s\n", line) + } + } +} + diff --git a/message/part.go b/message/part.go new file mode 100644 index 0000000..a49e8ad --- /dev/null +++ b/message/part.go @@ -0,0 +1,24 @@ +package message + +import ( + "git.dominic-ricottone.com/digestion/decoder" +) + +type MessagePart struct { + Header *MessagePartHeader + Content []string +} + +func NewPart() *MessagePart { + return &MessagePart{NewPartHeader(), []string{""}} +} + +func (m *MessagePart) evaluateContentType() int { + return 0 +} + +func (m *MessagePart) evaluateContentEncoding() int { + return decoder.EvaluateEncoding(m.Header.ContentEncoding) +} + + -- 2.45.2