-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathbuilder.go
84 lines (67 loc) · 1.59 KB
/
builder.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
package randtxt
import (
"strings"
"github.com/pboyd/markov"
)
// ModelBuilder builds a model that Generator can use.
type ModelBuilder struct {
chain markov.WriteChain
ngramSize int
TagSet TagSet
}
// NewModelBuilder creates a ModelBuilder instance.
//
// The model will be written to "chain".
//
// ngramSize is the number of words to include in each ngram. Must be greater
// than 0.
//
// See cmd/readtsv for an example.
func NewModelBuilder(chain markov.WriteChain, ngramSize int) *ModelBuilder {
return &ModelBuilder{
chain: chain,
ngramSize: ngramSize,
TagSet: PennTreebankTagSet,
}
}
// Feed reads tags from one or more channels and writes them to the output
// chain.
func (b *ModelBuilder) Feed(sources ...<-chan Tag) error {
ngrams := make([]<-chan interface{}, len(sources))
for i, source := range sources {
ngrams[i] = b.joinTags(source)
}
return markov.Feed(b.chain, ngrams...)
}
func (b *ModelBuilder) joinTags(tags <-chan Tag) <-chan interface{} {
ngrams := make(chan interface{})
go func() {
defer close(ngrams)
var prev Tag
ngram := make([]string, 0, b.ngramSize)
for tag := range tags {
tag = b.TagSet.Normalize(tag, prev)
if tag.Text == "" {
continue
}
prev = tag
gram := tag.String()
if b.ngramSize == 1 {
ngrams <- gram
continue
}
if len(ngram) < b.ngramSize {
ngram = append(ngram, gram)
if len(ngram) < b.ngramSize {
continue
}
} else {
ngrams <- gram
copy(ngram[0:], ngram[1:])
ngram[b.ngramSize-1] = gram
}
ngrams <- strings.Join(ngram, " ")
}
}()
return ngrams
}