From ce5b7f835e1ffb0d4aed72bbaf9920a025293312 Mon Sep 17 00:00:00 2001 From: Dominic Ricottone Date: Wed, 19 Aug 2020 18:23:57 -0400 Subject: [PATCH] Refactored and refined API --- decoder/encodings.go | 41 +++++------------- main.go | 19 +++++++- message/format.go | 101 +++++++++++++++++++++++++++++++++++++++++++ message/message.go | 79 +-------------------------------- message/part.go | 20 ++++++--- 5 files changed, 145 insertions(+), 115 deletions(-) create mode 100644 message/format.go diff --git a/decoder/encodings.go b/decoder/encodings.go index b7ab551..20bb069 100644 --- a/decoder/encodings.go +++ b/decoder/encodings.go @@ -5,46 +5,29 @@ import ( ) const ( - UTF8 = "UTF8" - Base64 = "Base64" - QuotedPrintable = "QuotedPrintable" - Unknown = "Unknown" + EncodedUTF8 = "EncodedUTF8" + EncodedBase64 = "EncodedBase64" + EncodedQuotedPrintable = "EncodedQuotedPrintable" + EncodedUnknown = "EncodedUnknown" ) -func determine_encoding(encoding string) string { +func DetermineEncoding(encoding string) string { if strings.Contains(encoding, "base64") { - return Base64 + return EncodedBase64 } else if strings.Contains(encoding, "quoted-printable") { - return QuotedPrintable + return EncodedQuotedPrintable } else if strings.Contains(encoding, "utf-8") { - return UTF8 + return EncodedUTF8 } else { - return Unknown - } -} - -// Translate an encoding label into a numeric value according to preference -// of use in processing. Preference tiers are: -// 1. UTF-8 -// 2. base64, quoted-printable -func EvaluateEncoding(encoding string) int { - switch determine_encoding(encoding) { - case UTF8: - return 0 - case Base64: - return 1 - case QuotedPrintable: - return 1 - default: - return 10 + return EncodedUnknown } } func DecodeArray(lines []string, encoding string) ([]string, error) { - switch determine_encoding(encoding) { - case Base64: + switch DetermineEncoding(encoding) { + case EncodedBase64: return decode_base64(lines) - case QuotedPrintable: + case EncodedQuotedPrintable: return decode_quotedprintable(lines) default: return lines, nil diff --git a/main.go b/main.go index 4949f3c..0e85ce4 100644 --- a/main.go +++ b/main.go @@ -18,6 +18,19 @@ const ( ParsingContent = "ParsingContent" ) +func contains_nonempty(s1, s2 string) bool { + return s2 != "" && strings.Contains(s1, s2) +} + +func first_submatch(r regexp.Regexp, s string) string { + matches := r.FindStringSubmatch(s) + if matches != nil { + return strings.Replace(matches[1], " ", "", -1) + } else { + return "" + } +} + func parse_stream(reader io.Reader) { // Create scanner from reader input := bufio.NewScanner(reader) @@ -44,6 +57,7 @@ func parse_stream(reader io.Reader) { parsing := ParsingPreHeader current_message := message.NewMessage() + current_boundary := "" for input.Scan() { line := input.Text() @@ -57,7 +71,7 @@ func parse_stream(reader io.Reader) { } else if parsing == ParsingHeader { if tline == "" { parsing = ParsingContent - current_message.FindBoundary(re_multipart) + current_boundary = first_submatch(*re_multipart, current_message.Header.ContentType) } else { current_message.SetHeader(line) } @@ -72,7 +86,8 @@ func parse_stream(reader io.Reader) { parsing = ParsingPreHeader current_message.Dump() current_message = message.NewMessage() - } else if current_message.MatchBoundary(tline) { + current_boundary = "" + } else if contains_nonempty(tline, current_boundary) { parsing = ParsingPartHeader current_message.AppendPart() } else { diff --git a/message/format.go b/message/format.go new file mode 100644 index 0000000..4d0f713 --- /dev/null +++ b/message/format.go @@ -0,0 +1,101 @@ +package message + +import ( + "fmt" + "strings" + + textwrap "git.dominic-ricottone.com/textwrap/common" + + "git.dominic-ricottone.com/digestion/decoder" +) + +func (m *Message) determine_best_part() int { + // Handle cases with obvious best part + number_parts := len(m.Parts) + if number_parts == 0 { + return -1 + } else if number_parts == 1 { + return 0 + } + + // Evaluate each part--lower is better + evaluations := []int{} + for i := 0; i < number_parts; i++ { + value := m.Parts[i].evaluate_type() + value += m.Parts[i].evaluate_encoding() + evaluations = append(evaluations, value) + } + + // Find minimum value and return that part index + best_part_index := 0 + for i := 1; i < number_parts; i++ { + if evaluations[i] < evaluations[best_part_index] { + best_part_index = i + } + } + return best_part_index +} + +func (m *Message) format_header() []string { + buffer := []string{} + if m.Header.Subject != "" { + buffer = append(buffer, fmt.Sprintf("Subject: %s", m.Header.Subject)) + } + if m.Header.Date != "" { + buffer = append(buffer, fmt.Sprintf("Date: %s", m.Header.Date)) + } + if m.Header.From != "" { + buffer = append(buffer, fmt.Sprintf("From: %s", m.Header.From)) + } + if m.Header.To != "" { + buffer = append(buffer, fmt.Sprintf("To: %s", m.Header.To)) + } + if m.Header.Cc != "" { + buffer = append(buffer, fmt.Sprintf("Cc: %s", m.Header.Cc)) + } + //if m.Header.MessageID != "" { + // buffer = append(buffer, fmt.Sprintf("MessageID: %s", m.Header.MessageID)) + //} + //if m.Header.ContentType != "" { + // buffer = append(buffer, fmt.Sprintf("ContentType: %s", m.Header.ContentType)) + //} + return buffer +} + +func (m *Message) format_content(length int) ([]string, error) { + best_part := m.determine_best_part() + buffer := []string{} + + // Handle messages with no content + if best_part == -1 { + return buffer, nil + } + + // Decode best part's content + decoded, err := decoder.DecodeArray(m.Parts[best_part].Content, m.Parts[best_part].Header.ContentEncoding) + if err != nil { + return buffer, err + } + + // Wrap text content + wrapped, err := textwrap.WrapArray(decoded, length) + if err != nil { + return decoded, err + } + + return wrapped, nil +} + +func (m *Message) Dump() { + header := m.format_header() + content, err := m.format_content(80) + if err != nil { + fmt.Printf("error: %s", err) + } + + for _, line := range append(header, content...) { + fmt.Printf("%s\n", line) + } + fmt.Printf("\n%s\n\n", strings.Repeat("-", 80)) +} + diff --git a/message/message.go b/message/message.go index b159902..9a25a5c 100644 --- a/message/message.go +++ b/message/message.go @@ -1,23 +1,12 @@ package message -import ( - "fmt" - "strings" - "regexp" - - textwrap "git.dominic-ricottone.com/textwrap/common" - - "git.dominic-ricottone.com/digestion/decoder" -) - type Message struct { Header *MessageHeader Parts []*MessagePart - PartBoundary *regexp.Regexp } func NewMessage() *Message { - return &Message{NewHeader(), []*MessagePart{NewPart()}, nil} + return &Message{NewHeader(), []*MessagePart{NewPart()}} } func (m *Message) SetHeader(s string) { @@ -37,69 +26,3 @@ func (m *Message) AppendContent(s string) { m.Parts[i].Content = append(m.Parts[i].Content, s) } -func (m *Message) FindBoundary(re *regexp.Regexp) { - match := re.FindStringSubmatch(m.Header.ContentType) - if match != nil { - boundary := strings.Replace(match[1], " ", "", -1) - m.PartBoundary, _ = regexp.Compile(".*" + boundary + ".*") - } -} - -func (m *Message) MatchBoundary(line string) bool { - if m.PartBoundary != nil { - return m.PartBoundary.MatchString(line) - } else { - return false - } -} - -func (m *Message) DetermineBestPart() int { - // Handle cases with obvious best part - number_parts := len(m.Parts) - if number_parts == 0 { - return -1 - } else if number_parts == 1 { - return 0 - } - - // Evaluate each part--lower is better - evaluations := []int{} - for i := 0; i < number_parts; i++ { - value := m.Parts[i].evaluateContentType() - value += m.Parts[i].evaluateContentEncoding() - evaluations = append(evaluations, value) - } - - // Find minimum value and return that part index - best_part_index := 0 - for i := 1; i < number_parts; i++ { - if evaluations[i] < evaluations[best_part_index] { - best_part_index = i - } - } - return best_part_index -} - -func (m *Message) Dump() { - fmt.Printf("Subject: %s\n", m.Header.Subject) - fmt.Printf("Date: %s\n", m.Header.Date) - fmt.Printf("From: %s\n", m.Header.From) - fmt.Printf("To: %s\n", m.Header.To) - fmt.Printf("Cc: %s\n", m.Header.Cc) - fmt.Printf("MessageID: %s\n", m.Header.MessageID) - fmt.Printf("ContentType: %s\n", m.Header.ContentType) - - if index := m.DetermineBestPart(); index != -1 { - fmt.Printf("ContentType: %s\n", m.Parts[index].Header.ContentType) - fmt.Printf("ContentEncoding: %s\n", m.Parts[index].Header.ContentEncoding) - - decoded, _ := decoder.DecodeArray(m.Parts[index].Content, m.Parts[index].Header.ContentEncoding) - - wrapped, _ := textwrap.WrapArray(decoded, 80) - - for _, line := range wrapped { - fmt.Printf("%s\n", line) - } - } -} - diff --git a/message/part.go b/message/part.go index a49e8ad..7039d9c 100644 --- a/message/part.go +++ b/message/part.go @@ -5,20 +5,28 @@ import ( ) type MessagePart struct { - Header *MessagePartHeader - Content []string + Header *MessagePartHeader + Content []string } func NewPart() *MessagePart { return &MessagePart{NewPartHeader(), []string{""}} } -func (m *MessagePart) evaluateContentType() int { +func (m *MessagePart) evaluate_type() int { return 0 } -func (m *MessagePart) evaluateContentEncoding() int { - return decoder.EvaluateEncoding(m.Header.ContentEncoding) +func (m *MessagePart) evaluate_encoding() int { + switch decoder.DetermineEncoding(m.Header.ContentEncoding) { + case decoder.EncodedUTF8: + return 0 + case decoder.EncodedBase64: + return 1 + case decoder.EncodedQuotedPrintable: + return 1 + default: + return 10 + } } - -- 2.45.2