~dricottone/parcels

ref: 92dafdc0448b6e46e77d10b613c3e7ebb0c077c7 parcels/common/parcels.go -rw-r--r-- 3.5 KiB
92dafdc0Dominic Ricottone Bug fix 2 years ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
package common

import (
	"fmt"
	"io"
	"bufio"
	"strings"
	"regexp"
)

var urlPattern = regexp.MustCompile(UrlPattern)

// Modify a string such that replacement occupies it from the beginning index
// to the end index.
func replace(str string, beginning int, end int, replacement int) string {
	return str[:beginning] + fmt.Sprintf("[%d]", replacement) + str[end:]
}

// Pull a URL from a scanner.
func pullFromScanner(scanner *bufio.Scanner, target int) (string, error) {
	target_url := ""
	count_urls_skipped := 0

	for scanner.Scan() {
		// find all matches (and count of matches) on this line
		line := scanner.Text()
		matches := urlPattern.FindAllStringIndex(line, -1)
		count_urls_after_line := count_urls_skipped + len(matches)

		// if target url is on this line, pull it from matches
		if target < count_urls_after_line {
			target_beg := matches[target - count_urls_skipped][0]
			target_end := matches[target - count_urls_skipped][1]
			target_url = line[target_beg:target_end]
			break
		}

		// else update count skipped and go to next line
		count_urls_skipped = count_urls_after_line
	}

	// Check for scanner errors
	if err := scanner.Err(); err != nil {
		return target_url, err
	}

	return target_url, nil
}

// Parse URLs from a scanner. Return two strings: the modified content of the
// scanner, and a list of URLs. Use offset to indicate how many URLs preceded
// this scanner.
func parseFromScanner(scanner *bufio.Scanner, offset int) (string, string, error) {
	var content strings.Builder
	var urls strings.Builder
	cursor := offset

	for scanner.Scan() {
		// find all matches (and count of matches) on this line
		line := scanner.Text()
		matches := urlPattern.FindAllStringIndex(line, -1)
		count := len(matches)
		var new_urls = make([]string, count)

		// looping backwards, extract each URL and replace it in the
		// content
		for i := count - 1; i >= 0; i-- {
			target_beg := matches[i][0]
			target_end := matches[i][1]
			new_urls[i] = line[target_beg:target_end]
			line = replace(line, target_beg, target_end, cursor+i)
		}

		// update the content
		content.WriteString(line)
		content.WriteString("\n")

		// update the list of urls
		for i, url := range new_urls {
			urls.WriteString(fmt.Sprintf("[%d] %s\n", cursor+i, url))
		}

		// update the cursor
		cursor += count
	}

	// Check for scanner errors
	if err := scanner.Err(); err != nil {
		return content.String(), urls.String(), err
	}

	return content.String(), urls.String(), nil
}

// Pull a URL from a reader. Use offset to indicate how many URLs preceded this
// reader.
func PullFromReader(reader io.Reader, target int, offset int) (string, error) {
	return pullFromScanner(bufio.NewScanner(reader), target-offset)
}

// Pull a URL from a string. Use offset to indicate how many URLs preceded this
// string.
func PullFromString(str string, target int, offset int) (string, error) {
	return PullFromReader(strings.NewReader(str), target, offset)
}

// Parse URLs from a reader. Return two strings: the modified content of the
// reader and a list of URLs. Use offset to indicate how many URLs preceded
// this reader.
func ParseFromReader(reader io.Reader, offset int) (string, string, error) {
	return parseFromScanner(bufio.NewScanner(reader), offset)
}

// Parse URLs from a string. Return two strings: the modified content of the
// original string and a list of URLs. Use offset to indicate how many URLs
// preceded the original string.
func ParseFromString(str string, offset int) (string, string, error) {
	return ParseFromReader(strings.NewReader(str), offset)
}