A decoder/base64.go => decoder/base64.go +18 -0
@@ 0,0 1,18 @@
+package decoder
+
+import (
+ "encoding/base64"
+)
+
+func decode_base64(lines []string) ([]string, error) {
+ decoded := []string{}
+ for _, line := range lines {
+ decoded_line, err := base64.StdEncoding.DecodeString(line)
+ if err != nil {
+ return decoded, err
+ }
+ decoded = append(decoded, string(decoded_line))
+ }
+ return decoded, nil
+}
+
A decoder/encodings.go => decoder/encodings.go +53 -0
@@ 0,0 1,53 @@
+package decoder
+
+import (
+ "strings"
+)
+
+const (
+ UTF8 = "UTF8"
+ Base64 = "Base64"
+ QuotedPrintable = "QuotedPrintable"
+ Unknown = "Unknown"
+)
+
+func determine_encoding(encoding string) string {
+ if strings.Contains(encoding, "base64") {
+ return Base64
+ } else if strings.Contains(encoding, "quoted-printable") {
+ return QuotedPrintable
+ } else if strings.Contains(encoding, "utf-8") {
+ return UTF8
+ } else {
+ return Unknown
+ }
+}
+
+// Translate an encoding label into a numeric value according to preference
+// of use in processing. Preference tiers are:
+// 1. UTF-8
+// 2. base64, quoted-printable
+func EvaluateEncoding(encoding string) int {
+ switch determine_encoding(encoding) {
+ case UTF8:
+ return 0
+ case Base64:
+ return 1
+ case QuotedPrintable:
+ return 1
+ default:
+ return 10
+ }
+}
+
+func DecodeArray(lines []string, encoding string) ([]string, error) {
+ switch determine_encoding(encoding) {
+ case Base64:
+ return decode_base64(lines)
+ case QuotedPrintable:
+ return decode_quotedprintable(lines)
+ default:
+ return lines, nil
+ }
+}
+
A decoder/quotedprintable.go => decoder/quotedprintable.go +20 -0
@@ 0,0 1,20 @@
+package decoder
+
+import (
+ "io/ioutil"
+ "mime/quotedprintable"
+ "strings"
+)
+
+func decode_quotedprintable(lines []string) ([]string, error) {
+ decoded := []string{}
+ for _, line := range lines {
+ decoded_line, err := ioutil.ReadAll(quotedprintable.NewReader(strings.NewReader(line)))
+ if err != nil {
+ return nil, err
+ }
+ decoded = append(decoded, string(decoded_line))
+ }
+ return decoded, nil
+}
+
M main.go => main.go +11 -192
@@ 8,186 8,9 @@ import (
"bufio"
"regexp"
- "git.dominic-ricottone.com/textwrap/common"
+ "git.dominic-ricottone.com/digestion/message"
)
-// An enumeration of header parts
-const(
- HeaderSubject = "HeaderSubject"
- HeaderDate = "HeaderDate"
- HeaderFrom = "HeaderFrom"
- HeaderTo = "HeaderTo"
- HeaderCc = "HeaderCc"
- HeaderMessageID = "HeaderMessageID"
- HeaderContentType = "HeaderContentType"
- HeaderContentEncoding = "HeaderContentEncoding"
-)
-
-// A message header container, used within message containers
-type MessageHeader struct {
- Subject string
- Date string
- From string
- To string
- Cc string
- MessageID string
- ContentType string
- LastSet string
-}
-
-// Builder for a message header
-func NewHeader() *MessageHeader {
- return &MessageHeader{"", "", "", "", "", "", "", ""}
-}
-
-// A message part header container, used within message part containers
-type MessagePartHeader struct {
- ContentType string
- ContentEncoding string
- LastSet string
-}
-
-// Builder for a message part header
-func NewPartHeader() *MessagePartHeader {
- return &MessagePartHeader{"", "", ""}
-}
-
-// A message part container, used within message containers
-type MessagePart struct {
- Header *MessagePartHeader
- Content []string
-}
-
-// Builder for a message part
-func NewPart() *MessagePart {
- return &MessagePart{NewPartHeader(), []string{""}}
-}
-
-// A message container
-type Message struct {
- Header *MessageHeader
- Parts []*MessagePart
- PartBoundary *regexp.Regexp
-}
-
-// Builder for a message
-func NewMessage() *Message {
- return &Message{NewHeader(), []*MessagePart{NewPart()}, nil}
-}
-
-// Message setters
-func (m *Message) SetHeader(line string) {
- if strings.HasPrefix(line, "Subject:") {
- m.Header.Subject = line[8:]
- m.Header.LastSet = HeaderSubject
- } else if strings.HasPrefix(line, "Date:") {
- m.Header.Date = line[5:]
- m.Header.LastSet = HeaderDate
- } else if strings.HasPrefix(line, "From:") {
- m.Header.From = line[5:]
- m.Header.LastSet = HeaderFrom
- } else if strings.HasPrefix(line, "To:") {
- m.Header.To = line[3:]
- m.Header.LastSet = HeaderTo
- } else if strings.HasPrefix(line, "Cc:") {
- m.Header.Cc = line[3:]
- m.Header.LastSet = HeaderCc
- } else if strings.HasPrefix(line, "Message-ID:") {
- m.Header.MessageID = line[11:]
- m.Header.LastSet = HeaderMessageID
- } else if strings.HasPrefix(line, "Content-Type:") {
- m.Header.ContentType = line[13:]
- m.Header.LastSet = HeaderContentType
- }
-}
-
-func (m *Message) AppendLastHeader(s string) {
- switch m.Header.LastSet {
- case HeaderSubject:
- m.Header.Subject += " " + s
- case HeaderDate:
- m.Header.Date += " " + s
- case HeaderFrom:
- m.Header.From += " " + s
- case HeaderTo:
- m.Header.To += " " + s
- case HeaderCc:
- m.Header.Cc += " " + s
- case HeaderMessageID:
- m.Header.MessageID += " " + s
- case HeaderContentType:
- m.Header.ContentType += " " + s
- }
-}
-
-func (m *Message) SetPartHeader(line string) {
- if strings.HasPrefix(line, "Content-Type:") {
- m.Parts[len(m.Parts)-1].Header.ContentType = line[13:]
- m.Parts[len(m.Parts)-1].Header.LastSet = HeaderContentType
- } else if strings.HasPrefix(line, "Content-Transfer-Encoding:") {
- m.Parts[len(m.Parts)-1].Header.ContentEncoding = line[26:]
- m.Parts[len(m.Parts)-1].Header.LastSet = HeaderContentEncoding
- }
-}
-
-func (m *Message) AppendLastPartHeader(s string) {
- switch m.Parts[len(m.Parts)-1].Header.LastSet {
- case HeaderContentType:
- m.Parts[len(m.Parts)-1].Header.ContentType += " " + s
- case HeaderContentEncoding:
- m.Parts[len(m.Parts)-1].Header.ContentEncoding += " " + s
- }
-}
-
-func (m *Message) AppendPart() {
- m.Parts = append(m.Parts, NewPart())
-}
-
-func (m *Message) AppendContent(s string) {
- i := len(m.Parts)-1
- m.Parts[i].Content = append(m.Parts[i].Content, s)
-}
-
-func (m *Message) FindBoundary(re *regexp.Regexp) {
- match := re.FindStringSubmatch(m.Header.ContentType)
- if match != nil {
- boundary := strings.Replace(match[1], " ", "", -1)
- m.PartBoundary, _ = regexp.Compile(".*" + boundary + ".*")
- }
-}
-
-// Message logic
-func (m *Message) MatchBoundary(line string) bool {
- if m.PartBoundary != nil {
- return m.PartBoundary.MatchString(line)
- } else {
- return false
- }
-}
-
-// A message printer
-func (m *Message) Dump() {
- fmt.Printf("Subject: %s\n", m.Header.Subject)
- fmt.Printf("Date: %s\n", m.Header.Date)
- fmt.Printf("From: %s\n", m.Header.From)
- fmt.Printf("To: %s\n", m.Header.To)
- fmt.Printf("Cc: %s\n", m.Header.Cc)
- fmt.Printf("MessageID: %s\n", m.Header.MessageID)
- fmt.Printf("ContentType: %s\n", m.Header.ContentType)
- for i := 0; i < len(m.Parts); i++ {
- fmt.Printf("Part %d:\n", i)
- fmt.Printf("ContentType: %s\n", m.Parts[i].Header.ContentType)
- fmt.Printf("ContentEncoding: %s\n", m.Parts[i].Header.ContentEncoding)
-
- wrapped, _ := common.WrapArray(m.Parts[i].Content, 80)
- for j := 0; j < len(wrapped); j++ {
- fmt.Printf("%s\n", wrapped[j])
- }
- fmt.Println("EOF")
- }
-}
-
-// Parser statuses
const (
ParsingPreHeader = "ParsingPreHeader"
ParsingHeader = "ParsingHeader"
@@ 220,7 43,7 @@ func parse_stream(reader io.Reader) {
}
parsing := ParsingPreHeader
- message := NewMessage()
+ current_message := message.NewMessage()
for input.Scan() {
line := input.Text()
@@ 229,35 52,31 @@ func parse_stream(reader io.Reader) {
if parsing == ParsingPreHeader {
if re_header.MatchString(tline) {
parsing = ParsingHeader
- message.SetHeader(tline)
+ current_message.SetHeader(tline)
}
} else if parsing == ParsingHeader {
if tline == "" {
parsing = ParsingContent
- message.FindBoundary(re_multipart)
- } else if strings.HasPrefix(line, "\t") {
- message.AppendLastHeader(tline)
+ current_message.FindBoundary(re_multipart)
} else {
- message.SetHeader(tline)
+ current_message.SetHeader(line)
}
} else if parsing == ParsingPartHeader {
if tline == "" {
parsing = ParsingContent
- } else if strings.HasPrefix(line, "\t") {
- message.AppendLastPartHeader(tline)
} else {
- message.SetPartHeader(tline)
+ current_message.SetPartHeader(line)
}
} else if parsing == ParsingContent {
if re_message_break.MatchString(tline) {
parsing = ParsingPreHeader
- message.Dump()
- message = NewMessage()
- } else if message.MatchBoundary(tline) {
+ current_message.Dump()
+ current_message = message.NewMessage()
+ } else if current_message.MatchBoundary(tline) {
parsing = ParsingPartHeader
- message.AppendPart()
+ current_message.AppendPart()
} else {
- message.AppendContent(tline)
+ current_message.AppendContent(tline)
}
}
}
A => +113 -0
@@ 0,0 1,113 @@
package message
import (
"strings"
)
const (
HeaderSubject = "HeaderSubject"
HeaderDate = "HeaderDate"
HeaderFrom = "HeaderFrom"
HeaderTo = "HeaderTo"
HeaderCc = "HeaderCc"
HeaderMessageID = "HeaderMessageID"
HeaderContentType = "HeaderContentType"
HeaderContentEncoding = "HeaderContentEncoding"
)
// Message headers
type MessageHeader struct {
Subject string
Date string
From string
To string
Cc string
MessageID string
ContentType string
LastSet string
}
func NewHeader() *MessageHeader {
return &MessageHeader{"", "", "", "", "", "", "", ""}
}
func (m *MessageHeader) SetHeader(s string) {
if strings.HasPrefix(s, "\t") {
m.append_last_set(s)
} else if strings.HasPrefix(s, "Subject:") {
m.Subject = strings.TrimSpace(s[8:])
m.LastSet = HeaderSubject
} else if strings.HasPrefix(s, "Date:") {
m.Date = strings.TrimSpace(s[5:])
m.LastSet = HeaderDate
} else if strings.HasPrefix(s, "From:") {
m.From = strings.TrimSpace(s[5:])
m.LastSet = HeaderFrom
} else if strings.HasPrefix(s, "To:") {
m.To = strings.TrimSpace(s[3:])
m.LastSet = HeaderTo
} else if strings.HasPrefix(s, "Cc:") {
m.Cc = strings.TrimSpace(s[3:])
m.LastSet = HeaderCc
} else if strings.HasPrefix(s, "Message-ID:") {
m.MessageID = strings.TrimSpace(s[11:])
m.LastSet = HeaderMessageID
} else if strings.HasPrefix(s, "Content-Type:") {
m.ContentType = strings.TrimSpace(s[13:])
m.LastSet = HeaderContentType
}
}
func (m *MessageHeader) append_last_set(s string) {
s = strings.TrimSpace(s)
switch m.LastSet {
case HeaderSubject:
m.Subject += " " + s
case HeaderDate:
m.Date += " " + s
case HeaderFrom:
m.From += " " + s
case HeaderTo:
m.To += " " + s
case HeaderCc:
m.Cc += " " + s
case HeaderMessageID:
m.MessageID += " " + s
case HeaderContentType:
m.ContentType += " " + s
}
}
// Message part headers
type MessagePartHeader struct {
ContentType string
ContentEncoding string
LastSet string
}
func NewPartHeader() *MessagePartHeader {
return &MessagePartHeader{"", "", ""}
}
func (m *MessagePartHeader) SetHeader(s string) {
if strings.HasPrefix(s, "\t") {
m.append_last_set(s)
} else if strings.HasPrefix(s, "Content-Type:") {
m.ContentType = strings.TrimSpace(s[13:])
m.LastSet = HeaderContentType
} else if strings.HasPrefix(s, "Content-Transfer-Encoding:") {
m.ContentEncoding = strings.TrimSpace(s[26:])
m.LastSet = HeaderContentEncoding
}
}
func (m *MessagePartHeader) append_last_set(s string) {
s = strings.TrimSpace(s)
switch m.LastSet {
case HeaderContentType:
m.ContentType += " " + s
case HeaderContentEncoding:
m.ContentEncoding += " " + s
}
}
A message/message.go => message/message.go +105 -0
@@ 0,0 1,105 @@
+package message
+
+import (
+ "fmt"
+ "strings"
+ "regexp"
+
+ textwrap "git.dominic-ricottone.com/textwrap/common"
+
+ "git.dominic-ricottone.com/digestion/decoder"
+)
+
+type Message struct {
+ Header *MessageHeader
+ Parts []*MessagePart
+ PartBoundary *regexp.Regexp
+}
+
+func NewMessage() *Message {
+ return &Message{NewHeader(), []*MessagePart{NewPart()}, nil}
+}
+
+func (m *Message) SetHeader(s string) {
+ m.Header.SetHeader(s)
+}
+
+func (m *Message) SetPartHeader(s string) {
+ m.Parts[len(m.Parts)-1].Header.SetHeader(s)
+}
+
+func (m *Message) AppendPart() {
+ m.Parts = append(m.Parts, NewPart())
+}
+
+func (m *Message) AppendContent(s string) {
+ i := len(m.Parts)-1
+ m.Parts[i].Content = append(m.Parts[i].Content, s)
+}
+
+func (m *Message) FindBoundary(re *regexp.Regexp) {
+ match := re.FindStringSubmatch(m.Header.ContentType)
+ if match != nil {
+ boundary := strings.Replace(match[1], " ", "", -1)
+ m.PartBoundary, _ = regexp.Compile(".*" + boundary + ".*")
+ }
+}
+
+func (m *Message) MatchBoundary(line string) bool {
+ if m.PartBoundary != nil {
+ return m.PartBoundary.MatchString(line)
+ } else {
+ return false
+ }
+}
+
+func (m *Message) DetermineBestPart() int {
+ // Handle cases with obvious best part
+ number_parts := len(m.Parts)
+ if number_parts == 0 {
+ return -1
+ } else if number_parts == 1 {
+ return 0
+ }
+
+ // Evaluate each part--lower is better
+ evaluations := []int{}
+ for i := 0; i < number_parts; i++ {
+ value := m.Parts[i].evaluateContentType()
+ value += m.Parts[i].evaluateContentEncoding()
+ evaluations = append(evaluations, value)
+ }
+
+ // Find minimum value and return that part index
+ best_part_index := 0
+ for i := 1; i < number_parts; i++ {
+ if evaluations[i] < evaluations[best_part_index] {
+ best_part_index = i
+ }
+ }
+ return best_part_index
+}
+
+func (m *Message) Dump() {
+ fmt.Printf("Subject: %s\n", m.Header.Subject)
+ fmt.Printf("Date: %s\n", m.Header.Date)
+ fmt.Printf("From: %s\n", m.Header.From)
+ fmt.Printf("To: %s\n", m.Header.To)
+ fmt.Printf("Cc: %s\n", m.Header.Cc)
+ fmt.Printf("MessageID: %s\n", m.Header.MessageID)
+ fmt.Printf("ContentType: %s\n", m.Header.ContentType)
+
+ if index := m.DetermineBestPart(); index != -1 {
+ fmt.Printf("ContentType: %s\n", m.Parts[index].Header.ContentType)
+ fmt.Printf("ContentEncoding: %s\n", m.Parts[index].Header.ContentEncoding)
+
+ decoded, _ := decoder.DecodeArray(m.Parts[index].Content, m.Parts[index].Header.ContentEncoding)
+
+ wrapped, _ := textwrap.WrapArray(decoded, 80)
+
+ for _, line := range wrapped {
+ fmt.Printf("%s\n", line)
+ }
+ }
+}
+
A message/part.go => message/part.go +24 -0
@@ 0,0 1,24 @@
+package message
+
+import (
+ "git.dominic-ricottone.com/digestion/decoder"
+)
+
+type MessagePart struct {
+ Header *MessagePartHeader
+ Content []string
+}
+
+func NewPart() *MessagePart {
+ return &MessagePart{NewPartHeader(), []string{""}}
+}
+
+func (m *MessagePart) evaluateContentType() int {
+ return 0
+}
+
+func (m *MessagePart) evaluateContentEncoding() int {
+ return decoder.EvaluateEncoding(m.Header.ContentEncoding)
+}
+
+