~dricottone/digestion

ce5b7f835e1ffb0d4aed72bbaf9920a025293312 — Dominic Ricottone 4 years ago 62eccd9
Refactored and refined API
5 files changed, 145 insertions(+), 115 deletions(-)

M decoder/encodings.go
M main.go
A message/format.go
M message/message.go
M message/part.go
M decoder/encodings.go => decoder/encodings.go +12 -29
@@ 5,46 5,29 @@ import (
)

const (
	UTF8            = "UTF8"
	Base64          = "Base64"
	QuotedPrintable = "QuotedPrintable"
	Unknown         = "Unknown"
	EncodedUTF8            = "EncodedUTF8"
	EncodedBase64          = "EncodedBase64"
	EncodedQuotedPrintable = "EncodedQuotedPrintable"
	EncodedUnknown         = "EncodedUnknown"
)

func determine_encoding(encoding string) string {
func DetermineEncoding(encoding string) string {
	if strings.Contains(encoding, "base64") {
		return Base64
		return EncodedBase64
	} else if strings.Contains(encoding, "quoted-printable") {
		return QuotedPrintable
		return EncodedQuotedPrintable
	} else if strings.Contains(encoding, "utf-8") {
		return UTF8
		return EncodedUTF8
	} else {
		return Unknown
	}
}

// Translate an encoding label into a numeric value according to preference
// of use in processing. Preference tiers are:
//  1. UTF-8
//  2. base64, quoted-printable
func EvaluateEncoding(encoding string) int {
	switch determine_encoding(encoding) {
	case UTF8:
		return 0
	case Base64:
		return 1
	case QuotedPrintable:
		return 1
	default:
		return 10
		return EncodedUnknown
	}
}

func DecodeArray(lines []string, encoding string) ([]string, error) {
	switch determine_encoding(encoding) {
	case Base64:
	switch DetermineEncoding(encoding) {
	case EncodedBase64:
		return decode_base64(lines)
	case QuotedPrintable:
	case EncodedQuotedPrintable:
		return decode_quotedprintable(lines)
	default:
		return lines, nil

M main.go => main.go +17 -2
@@ 18,6 18,19 @@ const (
	ParsingContent    = "ParsingContent"
)

func contains_nonempty(s1, s2 string) bool {
	return s2 != "" && strings.Contains(s1, s2)
}

func first_submatch(r regexp.Regexp, s string) string {
	matches := r.FindStringSubmatch(s)
	if matches != nil {
		return strings.Replace(matches[1], " ", "", -1)
	} else {
		return ""
	}
}

func parse_stream(reader io.Reader) {
	// Create scanner from reader
	input := bufio.NewScanner(reader)


@@ 44,6 57,7 @@ func parse_stream(reader io.Reader) {

	parsing := ParsingPreHeader
	current_message := message.NewMessage()
	current_boundary := ""

	for input.Scan() {
		line := input.Text()


@@ 57,7 71,7 @@ func parse_stream(reader io.Reader) {
		} else if parsing == ParsingHeader {
			if tline == "" {
				parsing = ParsingContent
				current_message.FindBoundary(re_multipart)
				current_boundary = first_submatch(*re_multipart, current_message.Header.ContentType)
			} else {
				current_message.SetHeader(line)
			}


@@ 72,7 86,8 @@ func parse_stream(reader io.Reader) {
				parsing = ParsingPreHeader
				current_message.Dump()
				current_message = message.NewMessage()
			} else if current_message.MatchBoundary(tline) {
				current_boundary = ""
			} else if contains_nonempty(tline, current_boundary) {
				parsing = ParsingPartHeader
				current_message.AppendPart()
			} else {

A message/format.go => message/format.go +101 -0
@@ 0,0 1,101 @@
package message

import (
	"fmt"
	"strings"

	textwrap "git.dominic-ricottone.com/textwrap/common"

	"git.dominic-ricottone.com/digestion/decoder"
)

func (m *Message) determine_best_part() int {
	// Handle cases with obvious best part
	number_parts := len(m.Parts)
	if number_parts == 0 {
		return -1
	} else if number_parts == 1 {
		return 0
	}

	// Evaluate each part--lower is better
	evaluations := []int{}
	for i := 0; i < number_parts; i++ {
		value := m.Parts[i].evaluate_type()
		value += m.Parts[i].evaluate_encoding()
		evaluations = append(evaluations, value)
	}

	// Find minimum value and return that part index
	best_part_index := 0
	for i := 1; i < number_parts; i++ {
		if evaluations[i] < evaluations[best_part_index] {
			best_part_index = i
		}
	}
	return best_part_index
}

func (m *Message) format_header() []string {
	buffer := []string{}
	if m.Header.Subject != "" {
		buffer = append(buffer, fmt.Sprintf("Subject: %s", m.Header.Subject))
	}
	if m.Header.Date != "" {
		buffer = append(buffer, fmt.Sprintf("Date: %s", m.Header.Date))
	}
	if m.Header.From != "" {
		buffer = append(buffer, fmt.Sprintf("From: %s", m.Header.From))
	}
	if m.Header.To != "" {
		buffer = append(buffer, fmt.Sprintf("To: %s", m.Header.To))
	}
	if m.Header.Cc != "" {
		buffer = append(buffer, fmt.Sprintf("Cc: %s", m.Header.Cc))
	}
	//if m.Header.MessageID != "" {
	//	buffer = append(buffer, fmt.Sprintf("MessageID: %s", m.Header.MessageID))
	//}
	//if m.Header.ContentType != "" {
	//	buffer = append(buffer, fmt.Sprintf("ContentType: %s", m.Header.ContentType))
	//}
	return buffer
}

func (m *Message) format_content(length int) ([]string, error) {
	best_part := m.determine_best_part()
	buffer := []string{}

	// Handle messages with no content
	if best_part == -1 {
		return buffer, nil
	}

	// Decode best part's content
	decoded, err := decoder.DecodeArray(m.Parts[best_part].Content, m.Parts[best_part].Header.ContentEncoding)
	if err != nil {
		return buffer, err
	}

	// Wrap text content
	wrapped, err := textwrap.WrapArray(decoded, length)
	if err != nil {
		return decoded, err
	}

	return wrapped, nil
}

func (m *Message) Dump() {
	header := m.format_header()
	content, err := m.format_content(80)
	if err != nil {
		fmt.Printf("error: %s", err)
	}

	for _, line := range append(header, content...) {
		fmt.Printf("%s\n", line)
	}
	fmt.Printf("\n%s\n\n", strings.Repeat("-", 80))
}


M message/message.go => message/message.go +1 -78
@@ 1,23 1,12 @@
package message

import (
	"fmt"
	"strings"
	"regexp"

	textwrap "git.dominic-ricottone.com/textwrap/common"

	"git.dominic-ricottone.com/digestion/decoder"
)

type Message struct {
	Header       *MessageHeader
	Parts        []*MessagePart
	PartBoundary *regexp.Regexp
}

func NewMessage() *Message {
	return &Message{NewHeader(), []*MessagePart{NewPart()}, nil}
	return &Message{NewHeader(), []*MessagePart{NewPart()}}
}

func (m *Message) SetHeader(s string) {


@@ 37,69 26,3 @@ func (m *Message) AppendContent(s string) {
	m.Parts[i].Content = append(m.Parts[i].Content, s)
}

func (m *Message) FindBoundary(re *regexp.Regexp) {
	match := re.FindStringSubmatch(m.Header.ContentType)
	if match != nil {
		boundary := strings.Replace(match[1], " ", "", -1)
		m.PartBoundary, _ = regexp.Compile(".*" + boundary + ".*")
	}
}

func (m *Message) MatchBoundary(line string) bool {
	if m.PartBoundary != nil {
		return m.PartBoundary.MatchString(line)
	} else {
		return false
	}
}

func (m *Message) DetermineBestPart() int {
	// Handle cases with obvious best part
	number_parts := len(m.Parts)
	if number_parts == 0 {
		return -1
	} else if number_parts == 1 {
		return 0
	}

	// Evaluate each part--lower is better
	evaluations := []int{}
	for i := 0; i < number_parts; i++ {
		value := m.Parts[i].evaluateContentType()
		value += m.Parts[i].evaluateContentEncoding()
		evaluations = append(evaluations, value)
	}

	// Find minimum value and return that part index
	best_part_index := 0
	for i := 1; i < number_parts; i++ {
		if evaluations[i] < evaluations[best_part_index] {
			best_part_index = i
		}
	}
	return best_part_index
}

func (m *Message) Dump() {
	fmt.Printf("Subject: %s\n", m.Header.Subject)
	fmt.Printf("Date: %s\n", m.Header.Date)
	fmt.Printf("From: %s\n", m.Header.From)
	fmt.Printf("To: %s\n", m.Header.To)
	fmt.Printf("Cc: %s\n", m.Header.Cc)
	fmt.Printf("MessageID: %s\n", m.Header.MessageID)
	fmt.Printf("ContentType: %s\n", m.Header.ContentType)

	if index := m.DetermineBestPart(); index != -1 {
		fmt.Printf("ContentType: %s\n", m.Parts[index].Header.ContentType)
		fmt.Printf("ContentEncoding: %s\n", m.Parts[index].Header.ContentEncoding)

		decoded, _ := decoder.DecodeArray(m.Parts[index].Content, m.Parts[index].Header.ContentEncoding)

		wrapped, _ := textwrap.WrapArray(decoded, 80)

		for _, line := range wrapped {
			fmt.Printf("%s\n", line)
		}
	}
}


M message/part.go => message/part.go +14 -6
@@ 5,20 5,28 @@ import (
)

type MessagePart struct {
	Header      *MessagePartHeader
	Content     []string
	Header             *MessagePartHeader
	Content            []string
}

func NewPart() *MessagePart {
	return &MessagePart{NewPartHeader(), []string{""}}
}

func (m *MessagePart) evaluateContentType() int {
func (m *MessagePart) evaluate_type() int {
	return 0
}

func (m *MessagePart) evaluateContentEncoding() int {
	return decoder.EvaluateEncoding(m.Header.ContentEncoding)
func (m *MessagePart) evaluate_encoding() int {
	switch decoder.DetermineEncoding(m.Header.ContentEncoding) {
	case decoder.EncodedUTF8:
		return 0
	case decoder.EncodedBase64:
		return 1
	case decoder.EncodedQuotedPrintable:
		return 1
	default:
		return 10
	}
}