-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathhtml.go
81 lines (73 loc) · 2.02 KB
/
html.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
package tokeq
import (
"io"
"net/http"
"strings"
"golang.org/x/net/html"
"golang.org/x/net/html/charset"
)
// IsTextHTML - compare header content type
func IsTextHTML(contentType string) bool {
// mediatype, _, err := mime.ParseMediaType(contentType)
// if err != nil {
// return false
// }
// return mediatype == "text/html"
return strings.Contains(contentType, "text/html")
}
// FindNodes - recursively find nodes
func FindNodes(input *html.Node, match Matcher, callback MatcherCallback) {
for c := input.FirstChild; c != nil; c = c.NextSibling {
if match(c.Type, c.DataAtom) {
callback(c)
}
FindNodes(c, match, callback)
}
}
// ParseResponse - wrapps sequence of URL fate functions
// user is response to handle: defer response.Body.Close()
func ParseResponse(response *http.Response, toks ...Tok) error {
contentType := response.Header.Get("Content-Type")
if !IsTextHTML(contentType) {
return ErrResponseBodyIsNotHTML
}
if response.Body == nil {
return ErrResponseBodyIsEmpty
}
r, err := charset.NewReader(response.Body, contentType)
if err != nil {
return err
}
return ParseReader(r, toks...)
}
// ParseResponseWithDefer - same as ParseResponse but with defer response.Body.Close()
func ParseResponseWithDefer(response *http.Response, toks ...Tok) error {
contentType := response.Header.Get("Content-Type")
if !IsTextHTML(contentType) {
return ErrResponseBodyIsNotHTML
}
if response.Body == nil {
return ErrResponseBodyIsEmpty
}
defer response.Body.Close()
r, err := charset.NewReader(response.Body, contentType)
if err != nil {
return err
}
return ParseReader(r, toks...)
}
// ParseReader - parses io.Reader, expected input is HTML page
func ParseReader(input io.Reader, toks ...Tok) error {
document, err := html.Parse(input)
if err != nil {
return err
}
DissectNodes(document, toks...)
return nil
}
// DissectNodes - range toks through recursively through FindNodes
func DissectNodes(input *html.Node, toks ...Tok) {
for _, tok := range toks {
FindNodes(input, tok.Match, tok.Callback)
}
}