~dricottone/digestion

62eccd92f6655185139d9015022c8b9953a51521 — Dominic Ricottone 4 years ago 6664dff
Refactored into subdirectories; Added common decoders
A decoder/base64.go => decoder/base64.go +18 -0
@@ 0,0 1,18 @@
package decoder

import (
	"encoding/base64"
)

func decode_base64(lines []string) ([]string, error) {
	decoded := []string{}
	for _, line := range lines {
		decoded_line, err := base64.StdEncoding.DecodeString(line)
		if err != nil {
			return decoded, err
		}
		decoded = append(decoded, string(decoded_line))
	}
	return decoded, nil
}


A decoder/encodings.go => decoder/encodings.go +53 -0
@@ 0,0 1,53 @@
package decoder

import (
	"strings"
)

const (
	UTF8            = "UTF8"
	Base64          = "Base64"
	QuotedPrintable = "QuotedPrintable"
	Unknown         = "Unknown"
)

func determine_encoding(encoding string) string {
	if strings.Contains(encoding, "base64") {
		return Base64
	} else if strings.Contains(encoding, "quoted-printable") {
		return QuotedPrintable
	} else if strings.Contains(encoding, "utf-8") {
		return UTF8
	} else {
		return Unknown
	}
}

// Translate an encoding label into a numeric value according to preference
// of use in processing. Preference tiers are:
//  1. UTF-8
//  2. base64, quoted-printable
func EvaluateEncoding(encoding string) int {
	switch determine_encoding(encoding) {
	case UTF8:
		return 0
	case Base64:
		return 1
	case QuotedPrintable:
		return 1
	default:
		return 10
	}
}

func DecodeArray(lines []string, encoding string) ([]string, error) {
	switch determine_encoding(encoding) {
	case Base64:
		return decode_base64(lines)
	case QuotedPrintable:
		return decode_quotedprintable(lines)
	default:
		return lines, nil
	}
}


A decoder/quotedprintable.go => decoder/quotedprintable.go +20 -0
@@ 0,0 1,20 @@
package decoder

import (
	"io/ioutil"
	"mime/quotedprintable"
	"strings"
)

func decode_quotedprintable(lines []string) ([]string, error) {
	decoded := []string{}
	for _, line := range lines {
		decoded_line, err := ioutil.ReadAll(quotedprintable.NewReader(strings.NewReader(line)))
		if err != nil {
			return nil, err
		}
		decoded = append(decoded, string(decoded_line))
	}
	return decoded, nil
}


M main.go => main.go +11 -192
@@ 8,186 8,9 @@ import (
	"bufio"
	"regexp"

	"git.dominic-ricottone.com/textwrap/common"
	"git.dominic-ricottone.com/digestion/message"
)

// An enumeration of header parts
const(
	HeaderSubject         = "HeaderSubject"
	HeaderDate            = "HeaderDate"
	HeaderFrom            = "HeaderFrom"
	HeaderTo              = "HeaderTo"
	HeaderCc              = "HeaderCc"
	HeaderMessageID       = "HeaderMessageID"
	HeaderContentType     = "HeaderContentType"
	HeaderContentEncoding = "HeaderContentEncoding"
)

// A message header container, used within message containers
type MessageHeader struct {
	Subject     string
	Date        string
	From        string
	To          string
	Cc          string
	MessageID   string
	ContentType string
	LastSet     string
}

// Builder for a message header
func NewHeader() *MessageHeader {
	return &MessageHeader{"", "", "", "", "", "", "", ""}
}

// A message part header container, used within message part containers
type MessagePartHeader struct {
	ContentType     string
	ContentEncoding string
	LastSet         string
}

// Builder for a message part header
func NewPartHeader() *MessagePartHeader {
	return &MessagePartHeader{"", "", ""}
}

// A message part container, used within message containers
type MessagePart struct {
	Header      *MessagePartHeader
	Content     []string
}

// Builder for a message part
func NewPart() *MessagePart {
	return &MessagePart{NewPartHeader(), []string{""}}
}

// A message container
type Message struct {
	Header       *MessageHeader
	Parts        []*MessagePart
	PartBoundary *regexp.Regexp
}

// Builder for a message
func NewMessage() *Message {
	return &Message{NewHeader(), []*MessagePart{NewPart()}, nil}
}

// Message setters
func (m *Message) SetHeader(line string) {
	if strings.HasPrefix(line, "Subject:") {
		m.Header.Subject = line[8:]
		m.Header.LastSet = HeaderSubject
	} else if strings.HasPrefix(line, "Date:") {
		m.Header.Date = line[5:]
		m.Header.LastSet = HeaderDate
	} else if strings.HasPrefix(line, "From:") {
		m.Header.From = line[5:]
		m.Header.LastSet = HeaderFrom
	} else if strings.HasPrefix(line, "To:") {
		m.Header.To = line[3:]
		m.Header.LastSet = HeaderTo
	} else if strings.HasPrefix(line, "Cc:") {
		m.Header.Cc = line[3:]
		m.Header.LastSet = HeaderCc
	} else if strings.HasPrefix(line, "Message-ID:") {
		m.Header.MessageID = line[11:]
		m.Header.LastSet = HeaderMessageID
	} else if strings.HasPrefix(line, "Content-Type:") {
		m.Header.ContentType = line[13:]
		m.Header.LastSet = HeaderContentType
	}
}

func (m *Message) AppendLastHeader(s string) {
	switch m.Header.LastSet {
	case HeaderSubject:
		m.Header.Subject += " " + s
	case HeaderDate:
		m.Header.Date += " " + s
	case HeaderFrom:
		m.Header.From += " " + s
	case HeaderTo:
		m.Header.To += " " + s
	case HeaderCc:
		m.Header.Cc += " " + s
	case HeaderMessageID:
		m.Header.MessageID += " " + s
	case HeaderContentType:
		m.Header.ContentType += " " + s
	}
}

func (m *Message) SetPartHeader(line string) {
	if strings.HasPrefix(line, "Content-Type:") {
		m.Parts[len(m.Parts)-1].Header.ContentType = line[13:]
		m.Parts[len(m.Parts)-1].Header.LastSet = HeaderContentType
	} else if strings.HasPrefix(line, "Content-Transfer-Encoding:") {
		m.Parts[len(m.Parts)-1].Header.ContentEncoding = line[26:]
		m.Parts[len(m.Parts)-1].Header.LastSet = HeaderContentEncoding
	}
}

func (m *Message) AppendLastPartHeader(s string) {
	switch m.Parts[len(m.Parts)-1].Header.LastSet {
	case HeaderContentType:
		m.Parts[len(m.Parts)-1].Header.ContentType += " " + s
	case HeaderContentEncoding:
		m.Parts[len(m.Parts)-1].Header.ContentEncoding += " " + s
	}
}

func (m *Message) AppendPart() {
	m.Parts = append(m.Parts, NewPart())
}

func (m *Message) AppendContent(s string) {
	i := len(m.Parts)-1
	m.Parts[i].Content = append(m.Parts[i].Content, s)
}

func (m *Message) FindBoundary(re *regexp.Regexp) {
	match := re.FindStringSubmatch(m.Header.ContentType)
	if match != nil {
		boundary := strings.Replace(match[1], " ", "", -1)
		m.PartBoundary, _ = regexp.Compile(".*" + boundary + ".*")
	}
}

// Message logic
func (m *Message) MatchBoundary(line string) bool {
	if m.PartBoundary != nil {
		return m.PartBoundary.MatchString(line)
	} else {
		return false
	}
}

// A message printer
func (m *Message) Dump() {
	fmt.Printf("Subject: %s\n", m.Header.Subject)
	fmt.Printf("Date: %s\n", m.Header.Date)
	fmt.Printf("From: %s\n", m.Header.From)
	fmt.Printf("To: %s\n", m.Header.To)
	fmt.Printf("Cc: %s\n", m.Header.Cc)
	fmt.Printf("MessageID: %s\n", m.Header.MessageID)
	fmt.Printf("ContentType: %s\n", m.Header.ContentType)
	for i := 0; i < len(m.Parts); i++ {
		fmt.Printf("Part %d:\n", i)
		fmt.Printf("ContentType: %s\n", m.Parts[i].Header.ContentType)
		fmt.Printf("ContentEncoding: %s\n", m.Parts[i].Header.ContentEncoding)

		wrapped, _ := common.WrapArray(m.Parts[i].Content, 80)
		for j := 0; j < len(wrapped); j++ {
			fmt.Printf("%s\n", wrapped[j])
		}
		fmt.Println("EOF")
	}
}

// Parser statuses
const (
	ParsingPreHeader  = "ParsingPreHeader"
	ParsingHeader     = "ParsingHeader"


@@ 220,7 43,7 @@ func parse_stream(reader io.Reader) {
	}

	parsing := ParsingPreHeader
	message := NewMessage()
	current_message := message.NewMessage()

	for input.Scan() {
		line := input.Text()


@@ 229,35 52,31 @@ func parse_stream(reader io.Reader) {
		if parsing == ParsingPreHeader {
			if re_header.MatchString(tline) {
				parsing = ParsingHeader
				message.SetHeader(tline)
				current_message.SetHeader(tline)
			}
		} else if parsing == ParsingHeader {
			if tline == "" {
				parsing = ParsingContent
				message.FindBoundary(re_multipart)
			} else if strings.HasPrefix(line, "\t") {
				message.AppendLastHeader(tline)
				current_message.FindBoundary(re_multipart)
			} else {
				message.SetHeader(tline)
				current_message.SetHeader(line)
			}
		} else if parsing == ParsingPartHeader {
			if tline == "" {
				parsing = ParsingContent
			} else if strings.HasPrefix(line, "\t") {
				message.AppendLastPartHeader(tline)
			} else {
				message.SetPartHeader(tline)
				current_message.SetPartHeader(line)
			}
		} else if parsing == ParsingContent {
			if re_message_break.MatchString(tline) {
				parsing = ParsingPreHeader
				message.Dump()
				message = NewMessage()
			} else if message.MatchBoundary(tline) {
				current_message.Dump()
				current_message = message.NewMessage()
			} else if current_message.MatchBoundary(tline) {
				parsing = ParsingPartHeader
				message.AppendPart()
				current_message.AppendPart()
			} else {
				message.AppendContent(tline)
				current_message.AppendContent(tline)
			}
		}
	}

A message/header.go => message/header.go +113 -0
@@ 0,0 1,113 @@
package message

import (
	"strings"
)

const (
	HeaderSubject         = "HeaderSubject"
	HeaderDate            = "HeaderDate"
	HeaderFrom            = "HeaderFrom"
	HeaderTo              = "HeaderTo"
	HeaderCc              = "HeaderCc"
	HeaderMessageID       = "HeaderMessageID"
	HeaderContentType     = "HeaderContentType"
	HeaderContentEncoding = "HeaderContentEncoding"
)

// Message headers
type MessageHeader struct {
	Subject     string
	Date        string
	From        string
	To          string
	Cc          string
	MessageID   string
	ContentType string
	LastSet     string
}

func NewHeader() *MessageHeader {
	return &MessageHeader{"", "", "", "", "", "", "", ""}
}

func (m *MessageHeader) SetHeader(s string) {
	if strings.HasPrefix(s, "\t") {
		m.append_last_set(s)
	} else if strings.HasPrefix(s, "Subject:") {
		m.Subject = strings.TrimSpace(s[8:])
		m.LastSet = HeaderSubject
	} else if strings.HasPrefix(s, "Date:") {
		m.Date = strings.TrimSpace(s[5:])
		m.LastSet = HeaderDate
	} else if strings.HasPrefix(s, "From:") {
		m.From = strings.TrimSpace(s[5:])
		m.LastSet = HeaderFrom
	} else if strings.HasPrefix(s, "To:") {
		m.To = strings.TrimSpace(s[3:])
		m.LastSet = HeaderTo
	} else if strings.HasPrefix(s, "Cc:") {
		m.Cc = strings.TrimSpace(s[3:])
		m.LastSet = HeaderCc
	} else if strings.HasPrefix(s, "Message-ID:") {
		m.MessageID = strings.TrimSpace(s[11:])
		m.LastSet = HeaderMessageID
	} else if strings.HasPrefix(s, "Content-Type:") {
		m.ContentType = strings.TrimSpace(s[13:])
		m.LastSet = HeaderContentType
	}
}

func (m *MessageHeader) append_last_set(s string) {
	s = strings.TrimSpace(s)
	switch m.LastSet {
	case HeaderSubject:
		m.Subject += " " + s
	case HeaderDate:
		m.Date += " " + s
	case HeaderFrom:
		m.From += " " + s
	case HeaderTo:
		m.To += " " + s
	case HeaderCc:
		m.Cc += " " + s
	case HeaderMessageID:
		m.MessageID += " " + s
	case HeaderContentType:
		m.ContentType += " " + s
	}
}

// Message part headers
type MessagePartHeader struct {
	ContentType     string
	ContentEncoding string
	LastSet         string
}

func NewPartHeader() *MessagePartHeader {
	return &MessagePartHeader{"", "", ""}
}

func (m *MessagePartHeader) SetHeader(s string) {
	if strings.HasPrefix(s, "\t") {
		m.append_last_set(s)
	} else if strings.HasPrefix(s, "Content-Type:") {
		m.ContentType = strings.TrimSpace(s[13:])
		m.LastSet = HeaderContentType
	} else if strings.HasPrefix(s, "Content-Transfer-Encoding:") {
		m.ContentEncoding = strings.TrimSpace(s[26:])
		m.LastSet = HeaderContentEncoding
	}
}

func (m *MessagePartHeader) append_last_set(s string) {
	s = strings.TrimSpace(s)
	switch m.LastSet {
	case HeaderContentType:
		m.ContentType += " " + s
	case HeaderContentEncoding:
		m.ContentEncoding += " " + s
	}
}


A message/message.go => message/message.go +105 -0
@@ 0,0 1,105 @@
package message

import (
	"fmt"
	"strings"
	"regexp"

	textwrap "git.dominic-ricottone.com/textwrap/common"

	"git.dominic-ricottone.com/digestion/decoder"
)

type Message struct {
	Header       *MessageHeader
	Parts        []*MessagePart
	PartBoundary *regexp.Regexp
}

func NewMessage() *Message {
	return &Message{NewHeader(), []*MessagePart{NewPart()}, nil}
}

func (m *Message) SetHeader(s string) {
	m.Header.SetHeader(s)
}

func (m *Message) SetPartHeader(s string) {
	m.Parts[len(m.Parts)-1].Header.SetHeader(s)
}

func (m *Message) AppendPart() {
	m.Parts = append(m.Parts, NewPart())
}

func (m *Message) AppendContent(s string) {
	i := len(m.Parts)-1
	m.Parts[i].Content = append(m.Parts[i].Content, s)
}

func (m *Message) FindBoundary(re *regexp.Regexp) {
	match := re.FindStringSubmatch(m.Header.ContentType)
	if match != nil {
		boundary := strings.Replace(match[1], " ", "", -1)
		m.PartBoundary, _ = regexp.Compile(".*" + boundary + ".*")
	}
}

func (m *Message) MatchBoundary(line string) bool {
	if m.PartBoundary != nil {
		return m.PartBoundary.MatchString(line)
	} else {
		return false
	}
}

func (m *Message) DetermineBestPart() int {
	// Handle cases with obvious best part
	number_parts := len(m.Parts)
	if number_parts == 0 {
		return -1
	} else if number_parts == 1 {
		return 0
	}

	// Evaluate each part--lower is better
	evaluations := []int{}
	for i := 0; i < number_parts; i++ {
		value := m.Parts[i].evaluateContentType()
		value += m.Parts[i].evaluateContentEncoding()
		evaluations = append(evaluations, value)
	}

	// Find minimum value and return that part index
	best_part_index := 0
	for i := 1; i < number_parts; i++ {
		if evaluations[i] < evaluations[best_part_index] {
			best_part_index = i
		}
	}
	return best_part_index
}

func (m *Message) Dump() {
	fmt.Printf("Subject: %s\n", m.Header.Subject)
	fmt.Printf("Date: %s\n", m.Header.Date)
	fmt.Printf("From: %s\n", m.Header.From)
	fmt.Printf("To: %s\n", m.Header.To)
	fmt.Printf("Cc: %s\n", m.Header.Cc)
	fmt.Printf("MessageID: %s\n", m.Header.MessageID)
	fmt.Printf("ContentType: %s\n", m.Header.ContentType)

	if index := m.DetermineBestPart(); index != -1 {
		fmt.Printf("ContentType: %s\n", m.Parts[index].Header.ContentType)
		fmt.Printf("ContentEncoding: %s\n", m.Parts[index].Header.ContentEncoding)

		decoded, _ := decoder.DecodeArray(m.Parts[index].Content, m.Parts[index].Header.ContentEncoding)

		wrapped, _ := textwrap.WrapArray(decoded, 80)

		for _, line := range wrapped {
			fmt.Printf("%s\n", line)
		}
	}
}


A message/part.go => message/part.go +24 -0
@@ 0,0 1,24 @@
package message

import (
	"git.dominic-ricottone.com/digestion/decoder"
)

type MessagePart struct {
	Header      *MessagePartHeader
	Content     []string
}

func NewPart() *MessagePart {
	return &MessagePart{NewPartHeader(), []string{""}}
}

func (m *MessagePart) evaluateContentType() int {
	return 0
}

func (m *MessagePart) evaluateContentEncoding() int {
	return decoder.EvaluateEncoding(m.Header.ContentEncoding)
}