-
Notifications
You must be signed in to change notification settings - Fork 50
/
Copy pathaggregator.go
107 lines (95 loc) · 2.62 KB
/
aggregator.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
package producer
import (
"bytes"
"crypto/md5"
"github.com/aws/aws-sdk-go-v2/aws"
ktypes "github.com/aws/aws-sdk-go-v2/service/kinesis/types"
"google.golang.org/protobuf/proto"
)
var (
magicNumber = []byte{0xF3, 0x89, 0x9A, 0xC2}
)
type Aggregator struct {
buf []*Record
pkeys []string
nbytes int
}
// Size return how many bytes stored in the aggregator.
// including partition keys.
func (a *Aggregator) Size() int {
return a.nbytes
}
// Count return how many records stored in the aggregator.
func (a *Aggregator) Count() int {
return len(a.buf)
}
// Put record using `data` and `partitionKey`. This method is thread-safe.
func (a *Aggregator) Put(data []byte, partitionKey string) {
// For now, all records in the aggregated record will have
// the same partition key.
// later, we will add shard-mapper same as the KPL use.
// see: https://github.com/a8m/kinesis-producer/issues/1
if len(a.pkeys) == 0 {
a.pkeys = []string{partitionKey}
a.nbytes += len([]byte(partitionKey))
}
keyIndex := uint64(len(a.pkeys) - 1)
a.nbytes += partitionKeyIndexSize
a.buf = append(a.buf, &Record{
Data: data,
PartitionKeyIndex: &keyIndex,
})
a.nbytes += len(data)
}
// Drain create an aggregated `kinesis.PutRecordsRequestEntry`
// that compatible with the KCL's deaggregation logic.
//
// If you interested to know more about it. see: aggregation-format.md
func (a *Aggregator) Drain() (*ktypes.PutRecordsRequestEntry, error) {
if a.nbytes == 0 {
return nil, nil
}
data, err := proto.Marshal(&AggregatedRecord{
PartitionKeyTable: a.pkeys,
Records: a.buf,
})
if err != nil {
return nil, err
}
h := md5.New()
h.Write(data)
checkSum := h.Sum(nil)
aggData := append(magicNumber, data...)
aggData = append(aggData, checkSum...)
entry := &ktypes.PutRecordsRequestEntry{
Data: aggData,
PartitionKey: aws.String(a.pkeys[0]),
}
a.clear()
return entry, nil
}
func (a *Aggregator) clear() {
a.buf = make([]*Record, 0)
a.pkeys = make([]string, 0)
a.nbytes = 0
}
// Test if a given entry is aggregated record.
func isAggregated(entry *ktypes.PutRecordsRequestEntry) bool {
return bytes.HasPrefix(entry.Data, magicNumber)
}
func extractRecords(entry *ktypes.PutRecordsRequestEntry) (out []ktypes.PutRecordsRequestEntry) {
src := entry.Data[len(magicNumber) : len(entry.Data)-md5.Size]
dest := new(AggregatedRecord)
err := proto.Unmarshal(src, dest)
if err != nil {
return
}
for i := range dest.Records {
r := dest.Records[i]
out = append(out, ktypes.PutRecordsRequestEntry{
Data: r.GetData(),
PartitionKey: &dest.PartitionKeyTable[r.GetPartitionKeyIndex()],
})
}
return
}