diff --git a/cmd/dataset_tokenizer/dataset_tokenizer.go b/cmd/dataset_tokenizer/dataset_tokenizer.go index 29eda96..a162eae 100644 --- a/cmd/dataset_tokenizer/dataset_tokenizer.go +++ b/cmd/dataset_tokenizer/dataset_tokenizer.go @@ -631,11 +631,17 @@ func getAndCheckToken( s = strings.ReplaceAll(s, "\\n", "\n") token := t.Get(s) if token == nil { - tokens := t.Encode(&s) - if len(*tokens) != 1 { - return 0, fmt.Errorf("'%s' is not a valid token for %s", s, id) + tokens := *t.Encode(&s) + // Also allow a single "real" token surrounded by an EosToken and/or a BosToken + if len(tokens) == 1 || + len(tokens) == 2 && tokens[1] == t.EosToken && tokens[0] != t.BosToken { + return tokens[0], nil + } else if len(tokens) == 3 && + tokens[0] == t.BosToken && tokens[2] == t.EosToken || + len(tokens) == 2 && tokens[0] == t.BosToken && tokens[1] != t.EosToken { + return tokens[1], nil } else { - return (*tokens)[0], nil + return 0, fmt.Errorf("'%s' is not a valid token for %s", s, id) } } else { return *token, nil @@ -758,11 +764,14 @@ func (tt TextsTokenizer) handleExclusions( func (tt TextsTokenizer) TokenizeTexts( texts chan namedRuneReader, indexPath string, + tokenizerPtr *gpt_bpe.GPTEncoder, ) (chan gpt_bpe.Tokens, error) { - tokenizerPtr, tokErr := tt.InitTokenizer() - - if tokErr != nil { - return nil, tokErr + var tokErr error + if tokenizerPtr == nil { + tokenizerPtr, tokErr = tt.InitTokenizer() + if tokErr != nil { + return nil, tokErr + } } tokenizer := *tokenizerPtr var endOfText gpt_bpe.Token @@ -853,11 +862,14 @@ func (tt TextsTokenizer) TokenizeTexts( // that returns tokenized contexts that are fixed and padded out to // `contextSize`. func (tt TextsTokenizer) TokenizeTextsToContexts( - texts chan namedRuneReader, + texts chan namedRuneReader, tokenizerPtr *gpt_bpe.GPTEncoder, ) (chan gpt_bpe.Tokens, error) { - tokenizerPtr, tokErr := tt.InitTokenizer() - if tokErr != nil { - return nil, tokErr + var tokErr error + if tokenizerPtr == nil { + tokenizerPtr, tokErr = tt.InitTokenizer() + if tokErr != nil { + return nil, tokErr + } } tokenizer := *tokenizerPtr var padToken, endOfText gpt_bpe.Token @@ -888,7 +900,7 @@ func (tt TextsTokenizer) TokenizeTextsToContexts( var boundary gpt_bpe.Token if tt.Boundary == "" { - boundary = 65535 + boundary = 0xFFFFFFFF } else { var boundaryErr error boundary, boundaryErr = getAndCheckToken( @@ -1046,7 +1058,7 @@ func (tt TextsTokenizer) TokenizeTextsToContexts( // We were given a hard index to use as the chunk boundary, // and it may not be a complete unicode character, so we // need to align it to a valid unicode character. - if boundary == 65535 && doUnitrim { + if boundary == 0xFFFFFFFF && doUnitrim { // Ensure that our next chunk is aligned to valid // unicode. _, offset := tokenizer.AlignAndSizeTokens( @@ -1116,16 +1128,23 @@ func WriteContexts( sampling int, shuffle bool, enforceUint32 bool, + showContexts bool, ) (int, error) { totalTokens := 0 - var useUint32 bool - // We only use uint32 if we're enforcing it and vocab size is greater than - // 65536. - if encoder != nil { - if enforceUint32 && len(encoder.Encoder) > 65536 { + useUint32 := enforceUint32 + // Use uint32 if explicitly requested or if the vocab size is greater than 65536. + if !useUint32 { + if encoder == nil { + return 0, fmt.Errorf("WriteContexts called with unknown encoder; cannot determine output byte width") + } else if len(encoder.Encoder) > 65536 { useUint32 = true + log.Println("warning: tokenizer vocab too large for 16-bit, outputting as 32-bit") } } + if showContexts && encoder == nil { + showContexts = false + log.Println("warning: no encoder info, cannot show contexts") + } // create file AND filepath if not exists if err := os.MkdirAll(filepath.Dir(outPath), os.ModePerm); err != nil { @@ -1158,6 +1177,11 @@ func WriteContexts( doKeepSampling := sampling == 100 || (samplingIdx%lcd < skipEveryX) if doKeepSampling { sampledContexts <- context + if showContexts { + fmt.Println(len(context)) + fmt.Println("======================================") + fmt.Println(encoder.Decode(&context)) + } } samplingIdx += 1 } @@ -1187,7 +1211,10 @@ func WriteContexts( if !ok { break } - binContext := context.ToBin(useUint32) + binContext, err := context.ToBin(useUint32) + if err != nil { + return totalTokens, err + } // We keep track of the final file position if endpos == 0 { // On the first context, we discern the context size and make the @@ -1421,7 +1448,7 @@ func main() { ) enforceUint32 := flag.Bool( "uint32_enforce", false, - "enforce uint32 tokenization if needed (vocab size > 65535)", + "output tokens as uint32 instead of uint16 (required for vocabs with over 2^16 tokens)", ) flag.Parse() @@ -1513,7 +1540,9 @@ func main() { ) } } - if _, tokErr := textsTokenizer.InitTokenizer(); tokErr != nil { + + encoder, tokErr := textsTokenizer.InitTokenizer() + if tokErr != nil { log.Fatal(tokErr) } @@ -1591,13 +1620,19 @@ func main() { contexts, tokErr = textsTokenizer.TokenizeTexts( textReaders, indexFilePath, + encoder, ) if tokErr != nil { log.Fatal(tokErr) } total, writeErr := WriteContexts( - outputFilePath, contexts, - nil, sampling, false, *enforceUint32, + outputFilePath, + contexts, + encoder, + sampling, + false, + *enforceUint32, + *showContexts, ) if writeErr != nil { log.Fatal(writeErr) @@ -1611,20 +1646,20 @@ func main() { var contexts chan gpt_bpe.Tokens var tokErr error contexts, tokErr = textsTokenizer.TokenizeTextsToContexts( - textReaders, + textReaders, encoder, ) if tokErr != nil { log.Fatal(tokErr) } - var enc *gpt_bpe.GPTEncoder - if *showContexts { - enc, _ = textsTokenizer.InitTokenizer() - } var writeErr error numTokens, writeErr = WriteContexts( - *outputFile, contexts, enc, + *outputFile, + contexts, + encoder, sampling, - *reorderPaths == "shuffle", *enforceUint32, + *reorderPaths == "shuffle", + *enforceUint32, + *showContexts, ) if writeErr != nil { log.Fatal(writeErr) diff --git a/cmd/dataset_tokenizer/dataset_tokenizer_test.go b/cmd/dataset_tokenizer/dataset_tokenizer_test.go index fb60744..99e78cf 100644 --- a/cmd/dataset_tokenizer/dataset_tokenizer_test.go +++ b/cmd/dataset_tokenizer/dataset_tokenizer_test.go @@ -228,9 +228,7 @@ func TestEncodeText1(t *testing.T) { textsTokenizer.BoundaryBegin = false var enc *gpt_bpe.GPTEncoder - reorderPaths := "" sampling := 100 - enforceUint32 := true // Only if needed outputFile := "base.chunk" enc, tokErr := textsTokenizer.InitTokenizer() @@ -252,14 +250,19 @@ func TestEncodeText1(t *testing.T) { }() begin := time.Now() - contexts, tokErr := textsTokenizer.TokenizeTexts(reader, "./test") + contexts, tokErr := textsTokenizer.TokenizeTexts(reader, "./test", enc) if tokErr != nil { log.Fatal("Error tokenizing texts: ", tokErr) } total, writeErr := WriteContexts( - outputFile, contexts, enc, sampling, reorderPaths == "shuffle", - enforceUint32, + outputFile, + contexts, + enc, + sampling, + false, + false, + false, ) if writeErr != nil { log.Fatal("Error writing contexts: ", writeErr) @@ -310,7 +313,8 @@ func TestSampling50(t *testing.T) { sampling := 100 outputFile := "base.chunk" - if _, tokErr := textsTokenizer.InitTokenizer(); tokErr != nil { + enc, tokErr := textsTokenizer.InitTokenizer() + if tokErr != nil { log.Fatal(tokErr) } @@ -323,18 +327,22 @@ func TestSampling50(t *testing.T) { } else { begin := time.Now() contexts, tokErr := textsTokenizer.TokenizeTexts( - texts, "./test", + texts, "./test", enc, ) if tokErr != nil { log.Fatal(tokErr) } - var enc *gpt_bpe.GPTEncoder // *showContexts = true total, writeErr := WriteContexts( - outputFile, contexts, enc, sampling, - reorderPaths == "shuffle", false, + outputFile, + contexts, + enc, + sampling, + false, + false, + false, ) all1 += total if writeErr != nil { @@ -361,7 +369,8 @@ func TestSampling50(t *testing.T) { sampling = 50 outputFile = "samp50.chunk" - if _, tokErr := textsTokenizer.InitTokenizer(); tokErr != nil { + enc, tokErr = textsTokenizer.InitTokenizer() + if tokErr != nil { log.Fatal(tokErr) } @@ -374,16 +383,20 @@ func TestSampling50(t *testing.T) { } else { begin := time.Now() contexts, tokErr := textsTokenizer.TokenizeTexts( - texts2, "./test", + texts2, "./test", enc, ) if tokErr != nil { log.Fatal(tokErr) } - var enc *gpt_bpe.GPTEncoder // *showContexts = true total2, writeErr := WriteContexts( - outputFile, contexts, enc, sampling, reorderPaths == "shuffle", + outputFile, + contexts, + enc, + sampling, + reorderPaths == "shuffle", + false, false, ) all2 += total2 @@ -430,7 +443,8 @@ func TestShuffle(t *testing.T) { sampling := 100 outputFile := "noshuffle.chunk" - if _, tokErr := textsTokenizer.InitTokenizer(); tokErr != nil { + enc, tokErr := textsTokenizer.InitTokenizer() + if tokErr != nil { log.Fatal(tokErr) } @@ -442,16 +456,20 @@ func TestShuffle(t *testing.T) { } else { begin := time.Now() contexts, tokErr := textsTokenizer.TokenizeTexts( - texts, "./test", + texts, "./test", enc, ) if tokErr != nil { log.Fatal(tokErr) } - var enc *gpt_bpe.GPTEncoder // *showContexts = true total, writeErr := WriteContexts( - outputFile, contexts, enc, sampling, reorderPaths == "shuffle", + outputFile, + contexts, + enc, + sampling, + false, + false, false, ) all1 += total @@ -479,7 +497,8 @@ func TestShuffle(t *testing.T) { sampling = 100 outputFile = "shuffle.chunk" - if _, tokErr := textsTokenizer.InitTokenizer(); tokErr != nil { + enc2, tokErr := textsTokenizer.InitTokenizer() + if tokErr != nil { log.Fatal(tokErr) } @@ -491,16 +510,20 @@ func TestShuffle(t *testing.T) { } else { begin := time.Now() contexts2, tokErr := textsTokenizer.TokenizeTexts( - texts2, "./test", + texts2, "./test", enc2, ) if tokErr != nil { log.Fatal(tokErr) } - var enc2 *gpt_bpe.GPTEncoder // *showContexts = true total2, writeErr := WriteContexts( - outputFile, contexts2, enc2, sampling, reorderPaths == "shuffle", + outputFile, + contexts2, + enc2, + sampling, + true, + false, false, ) all2 += total2 diff --git a/cmd/detokenizer/detokenizer.go b/cmd/detokenizer/detokenizer.go index 61feb9c..d9a7e41 100644 --- a/cmd/detokenizer/detokenizer.go +++ b/cmd/detokenizer/detokenizer.go @@ -12,6 +12,7 @@ func main() { "input tokenizer id [gpt2, pile, clip, huggingface-id]") inputFile := flag.String("input", "", "input file to retokenize") + in32 := flag.Bool("in32", false, "force input tokens to be read as 32-bit") outputFile := flag.String("output", "detokenized.txt", "output file to write retokenized data") flag.Parse() @@ -44,6 +45,7 @@ func main() { log.Fatal(inputErr) } } + input32Bit := *in32 || len(inputTokenizer.Encoder) > 65536 inputFileHandle, err := os.Open(*inputFile) if err != nil { @@ -56,20 +58,28 @@ func main() { log.Fatal(err) } + if input32Bit { + log.Println("Reading as 32-bit") + } else { + log.Println("Reading as 16-bit") + } + // Read 4096 bytes at a time from the input file. + // This is a bit arbitrary, but it's a good tradeoff + // between memory usage and speed. + bytes := make([]byte, 4096) for { - // Read 4096 bytes at a time from the input file. - // This is a bit arbitrary, but it's a good tradeoff - // between memory usage and speed. - bytes := make([]byte, 4096) - _, err := inputFileHandle.Read(bytes) + bytesRead, err := inputFileHandle.Read(bytes) if err != nil { break } // Decode the bytes into a string. - decoded := inputTokenizer.DecodeBuffer(&bytes) - if err != nil { - log.Fatal(err) + var decoded string + if bytesRead == 4096 { + decoded = inputTokenizer.DecodeBuffer(&bytes, input32Bit) + } else { + filledBytes := bytes[:bytesRead] + decoded = inputTokenizer.DecodeBuffer(&filledBytes, input32Bit) } // Write the decoded string to the output file. diff --git a/cmd/tokens_transformer/tokens_transformer.go b/cmd/tokens_transformer/tokens_transformer.go index fcb8b91..e6f5ced 100644 --- a/cmd/tokens_transformer/tokens_transformer.go +++ b/cmd/tokens_transformer/tokens_transformer.go @@ -20,6 +20,10 @@ func main() { "show contexts as they are retokenized") unitrimBool := flag.Bool("no_unitrim", false, "do not trim to valid unicode retokenized contexts") + in32 := flag.Bool("in32", false, + "force input tokens to be read as 32-bit") + out32 := flag.Bool("out32", false, + "force output tokens to be written as 32-bit") inputFile := flag.String("input", "", "input file to retokenize") outputFile := flag.String("output", "retokenized.tokens", @@ -53,14 +57,30 @@ func main() { if _, err := os.Stat(*inputFile); os.IsNotExist(err) { log.Fatal("Input file does not exist") } - inputTokenizer, inputErr := gpt_bpe.NewEncoder(*inputTokenizerId) + + // Check if it's an internal reference. If not, it's a file path. + inputTokenizer, inputErr := gpt_bpe.NewEncoder( + *inputTokenizerId + "-tokenizer") if inputErr != nil { - log.Fatal(inputErr) + // Fall back to path-like. + inputTokenizer, inputErr = gpt_bpe.NewEncoder(*inputTokenizerId) + if inputErr != nil { + log.Fatal(inputErr) + } } - outputTokenizer, outputErr := gpt_bpe.NewEncoder(*outputTokenizerId) + input32Bit := *in32 || len(inputTokenizer.Encoder) > 65536 + + outputTokenizer, outputErr := gpt_bpe.NewEncoder( + *outputTokenizerId + "-tokenizer") if outputErr != nil { - log.Fatal(outputErr) + // Fall back to path-like. + outputTokenizer, outputErr = gpt_bpe.NewEncoder(*outputTokenizerId) + if outputErr != nil { + log.Fatal(outputErr) + } } + output32Bit := *out32 || len(outputTokenizer.Encoder) > 65536 + // open input file inputFileHandle, inputOpenErr := os.Open(*inputFile) if inputOpenErr != nil { @@ -94,8 +114,13 @@ func main() { break } + if input32Bit { + log.Println("Reading as 32-bit") + } else { + log.Println("Reading as 16-bit") + } context := contextBuffer[:bytesRead] - decoded := inputTokenizer.DecodeBuffer(&context) + decoded := inputTokenizer.DecodeBuffer(&context, input32Bit) encoded := outputTokenizer.Encode(&decoded) // trim encoded tokens to context size if len(*encoded) > *contextSize { @@ -115,7 +140,12 @@ func main() { encoded = &padded } // write encoded context to output file - bytesToWrite := encoded.ToBin(false) + if output32Bit { + log.Println("Writing as 32-bit") + } else { + log.Println("Writing as 16-bit") + } + bytesToWrite, _ := encoded.ToBin(output32Bit) bytesWritten, writeErr := outputFileHandle.Write(*bytesToWrite) if writeErr != nil { diff --git a/gpt_bpe.go b/gpt_bpe.go index 54edd06..a8a9b74 100644 --- a/gpt_bpe.go +++ b/gpt_bpe.go @@ -1216,19 +1216,21 @@ func (encoder *GPTEncoder) EncodeReader(reader io.RuneReader) *Tokens { // EncodeBuffer takes a byte array and encodes it into Tokens in another // byte array. -func (encoder *GPTEncoder) EncodeBuffer(buffer *[]byte) *[]byte { +func (encoder *GPTEncoder) EncodeBuffer(buffer *[]byte) (*[]byte, uint64) { runeReader := bytes.NewReader(*buffer) nextTokens := encoder.StreamingEncode(runeReader) buf := bytes.NewBuffer(make([]byte, 0, 4096)) + var count uint64 = 0 for { tokens := nextTokens(2048) if tokens == nil { break } _ = binary.Write(buf, binary.LittleEndian, tokens) + count += uint64(len(*tokens)) } bufBytes := buf.Bytes() - return &bufBytes + return &bufBytes, count } // Encode encodes a string into a sequence of tokens. @@ -1333,9 +1335,14 @@ func (encoder *GPTEncoder) Decode(encoded *Tokens) (text string) { // DecodeBuffer // Decode Tokens from a byte array into a string. -func (encoder *GPTEncoder) DecodeBuffer(encoded *[]byte) (text string) { +func (encoder *GPTEncoder) DecodeBuffer(encoded *[]byte, useUint32 bool) (text string) { // First convert our bytearray into uint32 `Token` array. - tokens := types.TokensFromBin(encoded) + var tokens *Tokens + if useUint32 { + tokens = types.TokensFromBin32(encoded) + } else { + tokens = types.TokensFromBin(encoded) + } // Decode our tokens into a string. return encoder.Decode(tokens) } diff --git a/gpt_bpe_test.go b/gpt_bpe_test.go index 7150104..88ef7b0 100644 --- a/gpt_bpe_test.go +++ b/gpt_bpe_test.go @@ -514,7 +514,7 @@ func BenchmarkGPTEncoder_Encode(b *testing.B) { func BenchmarkGPTEncoder_EncodeBuffer(b *testing.B) { corpusBytes := []byte(corpus) start := time.Now() - tokenCt := len(*gpt2Encoder.EncodeBuffer(&corpusBytes)) / 2 + _, tokenCt := gpt2Encoder.EncodeBuffer(&corpusBytes) duration := time.Since(start) b.Logf( "%v bytes into %v tokens over %v", diff --git a/lib/library.go b/lib/library.go index d09053c..fa0dfdd 100644 --- a/lib/library.go +++ b/lib/library.go @@ -6,11 +6,13 @@ package main import "C" import ( "fmt" + "os" "reflect" "time" "unsafe" "github.com/wbrown/gpt_bpe" + "github.com/wbrown/gpt_bpe/types" ) var tokenizers map[string]*gpt_bpe.GPTEncoder @@ -53,18 +55,18 @@ func tokenizeBuffer(vocabIdStr *C.char, buf *C.char, sz C.size_t) C.Tokens { encoder = tokenizers[tokenizerId] } goBuf := createBuffer(unsafe.Pointer(buf), int(sz)) - encoded := *encoder.EncodeBuffer(goBuf) - tokensArr := C.CBytes(encoded) + encoded, tokenCount := encoder.EncodeBuffer(goBuf) + tokensArr := C.CBytes(*encoded) tokens := C.Tokens{ tokens: (*C.uint32_t)(tokensArr), - len: (C.size_t)(len(encoded) / 2), + len: (C.size_t)(tokenCount), } return tokens } // tokenize accepts a vocabulary and text as a C string, and returns a C.Tokens -// that contains a malloc'ed array of uint32_t tokens along with the number of -// tokens. +// that contains a malloc'ed array of little-endian uint32_t tokens along with +// the number of tokens. // //export tokenize func tokenize(vocabIdStr *C.char, str *C.char) C.Tokens { @@ -78,7 +80,12 @@ func tokenize(vocabIdStr *C.char, str *C.char) C.Tokens { fmt.Printf("input: %s\n", s) encoded := *encoder.Encode(&s) fmt.Printf("Tokens: %v\n", encoded) - tokensArr := C.CBytes(*encoded.ToBin(false)) + encodedBinary, err := encoded.ToBin(true) + if err == nil || encodedBinary == nil { + _, _ = fmt.Fprintf(os.Stderr, "tokenize: failed to write tokens as uint32_t") + return C.Tokens{tokens: nil, len: 0} + } + tokensArr := C.CBytes(*encodedBinary) tokens := C.Tokens{ tokens: (*C.uint32_t)(tokensArr), len: C.size_t(len(encoded)), @@ -101,8 +108,8 @@ func decode(vocabIdStr *C.char, tokens C.Tokens) *C.char { initTokenizer(vocabIdStr) encoder = tokenizers[tokenizerId] } - tokensArr := C.GoBytes(unsafe.Pointer(tokens.tokens), C.int(tokens.len)*2) - goTokens := gpt_bpe.TokensFromBin(&tokensArr) + tokensArr := C.GoBytes(unsafe.Pointer(tokens.tokens), C.int(tokens.len)*4) + goTokens := types.TokensFromBin32(&tokensArr) fmt.Printf("goTokens: %v\n", goTokens) decoded := encoder.Decode(goTokens) fmt.Printf("Decoded: %s\n", decoded) diff --git a/resources/resolver.go b/resources/resolver.go index 38765ee..4780b39 100644 --- a/resources/resolver.go +++ b/resources/resolver.go @@ -553,7 +553,7 @@ func ResolveResources( ) } log.Printf( - "Added %s to resources via sentancepiece conversion\n", + "Added %s to resources via sentencepiece conversion\n", f.Name(), ) } diff --git a/types/methods.go b/types/methods.go index a72f3e5..c1256f6 100644 --- a/types/methods.go +++ b/types/methods.go @@ -3,9 +3,10 @@ package types import ( "bytes" "encoding/binary" + "fmt" ) -func (tokens *Tokens) ToBin(useUint32 bool) *[]byte { +func (tokens *Tokens) ToBin(useUint32 bool) (*[]byte, error) { if useUint32 { return tokens.ToBinUint32() } else { @@ -13,43 +14,50 @@ func (tokens *Tokens) ToBin(useUint32 bool) *[]byte { } } -func (tokens *Tokens) ToBinUint16() *[]byte { - buf := bytes.NewBuffer(make([]byte, 0, len(*tokens)*TokenSize)) +func (tokens *Tokens) ToBinUint16() (*[]byte, error) { + buf := bytes.NewBuffer(make([]byte, 0, len(*tokens)*2)) for idx := range *tokens { bs := (*tokens)[idx] - binary.Write(buf, binary.LittleEndian, uint16(bs)) + if bs > 65535 { + return nil, fmt.Errorf("integer overflow: tried to write token ID %d as unsigned 16-bit", bs) + } + err := binary.Write(buf, binary.LittleEndian, uint16(bs)) + if err != nil { + return nil, err + } } byt := buf.Bytes() - return &byt + return &byt, nil } -func (tokens *Tokens) ToBinUint32() *[]byte { - buf := bytes.NewBuffer(make([]byte, 0, len(*tokens)*TokenSize)) +func (tokens *Tokens) ToBinUint32() (*[]byte, error) { + buf := bytes.NewBuffer(make([]byte, 0, len(*tokens)*4)) for idx := range *tokens { bs := (*tokens)[idx] - binary.Write(buf, binary.LittleEndian, uint32(bs)) + err := binary.Write(buf, binary.LittleEndian, uint32(bs)) + if err != nil { + return nil, err + } } byt := buf.Bytes() - return &byt + return &byt, nil } func TokensFromBin(bin *[]byte) *Tokens { - type tokenuint16 uint16 - tokens := make(Tokens, 0) + tokens := make(Tokens, 0, len(*bin)/2) buf := bytes.NewReader(*bin) for { - var token tokenuint16 + var token uint16 if err := binary.Read(buf, binary.LittleEndian, &token); err != nil { break } - tu32 := Token(uint16(token)) - tokens = append(tokens, tu32) + tokens = append(tokens, Token(token)) } return &tokens } func TokensFromBin32(bin *[]byte) *Tokens { - tokens := make(Tokens, 0) + tokens := make(Tokens, 0, len(*bin)/4) buf := bytes.NewReader(*bin) for { var token Token diff --git a/types/shared.go b/types/shared.go index eb6c216..ffb622c 100644 --- a/types/shared.go +++ b/types/shared.go @@ -3,7 +3,3 @@ package types type Token uint32 type Tokens []Token type TokenMap map[string]Token - -const ( - TokenSize = 2 -)