~dricottone/epub2html

ref: f7d5416bafe786b9eed8459efee8f52d09385d2f epub2html/xml.go -rw-r--r-- 1.9 KiB
f7d5416bDominic Ricottone Fixing blockquotes 2 years ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
// Structs for parsing X(HT)?ML files in e-pub archives

package main

import (
	"io"
	"encoding/xml"
)

type Head struct {
	Title string `xml:"title"`
}

type Paragraph struct {
	Text  string `xml:",innerxml"`
	Order int    `xml:"-"`
}

type BlockQuote struct {
	Paragraphs []Paragraph `xml:"p"`
	Order      int         `xml:"-"`
}

type Division struct {
	Divisions   []Division   `xml:"div"`
	Paragraphs  []Paragraph  `xml:"p"`
	BlockQuotes []BlockQuote `xml:"blockquote"`
}

func (d *Division) UnmarshalXML(decoder *xml.Decoder, start xml.StartElement) error {
	counter := 0

	for {
		token, err := decoder.Token()
		if err == io.EOF {
			break
		}
		if err != nil {
			return err
		}

		switch token.(type) {
		case xml.StartElement:
			new_start := token.(xml.StartElement)
			if (new_start.Name.Local == "p") {
				target := Paragraph{}
				decoder.DecodeElement(&target, &new_start)

				target.Order = counter
				counter += 1

				d.Paragraphs = append(d.Paragraphs, target)
			} else if (new_start.Name.Local == "blockquote") {
				target := BlockQuote{}
				decoder.DecodeElement(&target, &new_start)

				target.Order = counter
				counter += 1

				d.BlockQuotes = append(d.BlockQuotes, target)
			} else if (new_start.Name.Local == "div") {
				target := Division{}
				decoder.DecodeElement(&target, &new_start)

				d.Divisions = append(d.Divisions, target)
			}
		}
	}

	return nil
}

type Body struct {
	Title    string   `xml:"h3"`
	Division Division `xml:"div"`
}

type Xhtml struct {
	XMLName xml.Name `xml:"html"`
	Head    Head     `xml:"head"`
	Body    Body     `xml:"body"`
}

type Content struct {
	Src string `xml:"src,attr"`
}

type NavPoint struct {
	Label   string  `xml:"navLabel>text"`
	Content Content `xml:"content"`
	Order   int     `xml:"playOrder,attr"`
}

type Ncx struct {
	XMLName   xml.Name   `xml:"ncx"`
	Title     string     `xml:"docTitle>text"`
	NavPoints []NavPoint `xml:"navMap>navPoint"`
}