-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvalid.go
119 lines (101 loc) · 2.4 KB
/
valid.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
package htmlValidator
import (
"github.com/tdewolff/parse/v2"
"github.com/tdewolff/parse/v2/html"
"io"
"unicode/utf8"
)
type ValidateObj struct {
Size struct {
Bytes uint64
Symbols uint64
}
Tags struct {
Global uint64
Errors map[string]uint32
Delimiter uint32
Bold uint32
Italic uint32
Underline uint32
LineThrough uint32
Quote uint32
SubScript uint32
SuperScript uint32
Paragraphs struct {
Global uint32
Def uint32
Left uint32
Right uint32
Center uint32
}
}
}
/* Универсальный метод проверки (собирается статистика по всем меткам) */
func Validate(htmlText io.Reader) ValidateObj {
obj := ValidateObj{}
obj.Tags.Errors = make(map[string]uint32)
waitParagraph := false
parser := html.NewLexer(parse.NewInput(htmlText))
for {
typeToken, data := parser.Next()
switch typeToken {
case html.StartTagCloseToken, html.StartTagVoidToken:
continue
case html.AttributeToken:
if waitParagraph { //обрабатываем только ожидающие атрибуты
key := string(parser.AttrKey())
switch key {
case AttrLeft:
obj.Tags.Paragraphs.Left += 1
case AttrRight:
obj.Tags.Paragraphs.Right += 1
case AttrCenter:
obj.Tags.Paragraphs.Center += 1
default:
obj.Tags.Paragraphs.Def += 1
}
waitParagraph = false
}
case html.StartTagToken:
tag := string(parser.AttrKey())
switch tag {
case TagParagraph:
obj.Tags.Paragraphs.Global += 1
waitParagraph = true
case TagDelimiter:
obj.Tags.Delimiter += 1
case TagBold:
obj.Tags.Bold += 1
case TagItalic:
obj.Tags.Italic += 1
case TagUnderline:
obj.Tags.Underline += 1
case TagLineThrough:
obj.Tags.LineThrough += 1
case TagQuote:
obj.Tags.Quote += 1
case TagSubScript:
obj.Tags.SubScript += 1
case TagSuperScript:
obj.Tags.SuperScript += 1
default:
obj.Tags.Errors[tag] += 1
}
case html.EndTagToken:
waitParagraph = false
continue
case html.TextToken:
if waitParagraph {
obj.Tags.Paragraphs.Def += 1
}
waitParagraph = false
obj.Size.Bytes += uint64(len(data))
obj.Size.Symbols += uint64(utf8.RuneCountInString(string(data)))
case html.ErrorToken:
return obj
default:
continue
}
}
}
//###################################################################//