package main
import (
"io"
"fmt"
"strings"
"sort"
"archive/zip"
"encoding/xml"
)
// e-pub XHTML features arbitrary nesting of divisions. To strip the excess
// div tags, we need to recursively extract paragraphs from divisions.
// Recommended usage:
// Xhtml.Body.Division.Paragraphs = normalize_division(Xhtml.Body.Division)
// Xhtml.Body.Division.Divisions = []Division{}
func normalize_division(div Division) []Paragraph {
// If div contains p tags, return those
if (len(div.Paragraphs) != 0) {
return div.Paragraphs
}
var pars []Paragraph
// If div contains blockquote tags, return the nested p tags
if (len(div.BlockQuotes) != 0) {
for _, quote := range div.BlockQuotes {
for _, par := range quote.Paragraphs {
pars = append(pars, par)
}
}
return pars
}
// Else recurse on nested div tags
for _, nested_div := range div.Divisions {
pars = append(pars, normalize_division(nested_div)...)
}
return pars
}
func dump_archive(filename string) error {
// Open archive
areader, err := zip.OpenReader(filename)
if err != nil {
return err
}
defer areader.Close()
// Loop over files in archive
for _, file := range areader.File {
// Skip these less useful files
if (file.Name == "mimetype" || strings.HasPrefix(file.Name, "META-INF") || strings.HasSuffix(file.Name, ".css") || file.Name == "content.opf") {
fmt.Printf("Skipping %s...\n", file.Name)
continue
}
// Open file and copy into a string builder
fmt.Printf("Contents of %s:\n", file.Name)
freader, err := file.Open()
if err != nil {
fmt.Printf("error: %s\n", err)
}
buffer := new(strings.Builder)
if _, err := io.Copy(buffer, freader); err != nil {
fmt.Printf("error: %s\n", err)
}
if (file.Name == "toc.ncx") {
target := Ncx{}
if err = xml.Unmarshal([]byte(buffer.String()), &target); err != nil {
fmt.Printf("error: %s\n", err)
}
fmt.Println(target.Title)
sort.Slice(target.NavPoints, func(i, j int) bool {
return target.NavPoints[i].Order < target.NavPoints[j].Order
})
html, err := xml.MarshalIndent(&target, "", " ")
if err != nil {
fmt.Printf("error: %s\n", err)
}
fmt.Println(string(html))
} else {
target := Xhtml{}
if err = xml.Unmarshal([]byte(buffer.String()), &target); err != nil {
fmt.Printf("error: %s\n", err)
}
target.Body.Division.Paragraphs = normalize_division(target.Body.Division)
target.Body.Division.Divisions = []Division{}
html, err := xml.MarshalIndent(&target, "", " ")
if err != nil {
fmt.Printf("error: %s\n", err)
}
fmt.Println(string(html))
}
}
return nil
}
func main() {
// process arguments
if err := dump_archive("the_future_is_female.epub"); err != nil {
fmt.Printf("fatal error: %s\n", err)
}
}