-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathvectorspace.go
70 lines (49 loc) · 1.1 KB
/
vectorspace.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
// SPDX-License-Identifier: MIT
package golangvectorspace
import (
"math"
"regexp"
"strings"
)
type Concordance map[string]float64
func (con Concordance) Magnitude() float64 {
total := 0.0
for _, v := range con {
total = total + math.Pow(v, 2)
}
return math.Sqrt(total)
}
var nonAlphanumericRegex = regexp.MustCompile(`[^\p{L}\p{N} ]+`)
func clearString(str string) string {
return nonAlphanumericRegex.ReplaceAllString(str, "")
}
func BuildConcordance(document string) Concordance {
var con map[string]float64
con = make(map[string]float64)
words := strings.Fields(clearString(strings.ToLower(document)))
for _, key := range words {
_, ok := con[key]
key = strings.Trim(key, " ")
if ok && key != "" {
con[key] = con[key] + 1
} else {
con[key] = 1
}
}
return con
}
func Relation(con1 Concordance, con2 Concordance) float64 {
topvalue := 0.0
for name, count := range con1 {
_, ok := con2[name]
if ok {
topvalue = topvalue + (count * con2[name])
}
}
mag := con1.Magnitude() * con2.Magnitude()
if mag != 0 {
return topvalue / mag
} else {
return 0
}
}