diff --git a/internal/luacode/funcstate.go b/internal/luacode/funcstate.go index 0666166..f165017 100644 --- a/internal/luacode/funcstate.go +++ b/internal/luacode/funcstate.go @@ -35,12 +35,11 @@ type funcState struct { numActiveVariables uint8 // firstFreeRegister is the first free register. firstFreeRegister registerIndex - // instructionsSinceLastAbsLineInfo is a counter - // of instructions added since the last [absLineInfo]. - instructionsSinceLastAbsLineInfo uint8 // needClose is true if the function needs to close upvalues and/or to-be-closed variables // when returning. needClose bool + + lineInfoWriter lineInfoWriter } // blockControl is a linked list of active blocks. @@ -130,55 +129,30 @@ func (fs *funcState) label() int { } // saveLineInfo save the line information for a new instruction. -// If difference from last line does not fit in a byte, -// of after that many instructions, -// save a new absolute line info; -// (in that case, the special value 'ABSLINEINFO' in 'lineinfo' -// signals the existence of this absolute information.) -// Otherwise, store the difference from last line in 'lineinfo'. // // Equivalent to `savelineinfo` in upstream Lua. func (fs *funcState) saveLineInfo(line int) { - const deltaLimit = 1 << 7 - delta := line - fs.previousLine - absDelta := delta - if delta < 0 { - absDelta = -delta - } - - pc := len(fs.Code) - 1 // last instruction coded - - if absDelta >= deltaLimit || fs.instructionsSinceLastAbsLineInfo >= maxInstructionsWithoutAbsLineInfo { + rel := fs.lineInfoWriter.next(line) + if rel == absMarker { fs.LineInfo.abs = append(fs.LineInfo.abs, absLineInfo{ - pc: pc, + pc: len(fs.LineInfo.rel), line: line, }) - delta = int(absMarker) - fs.instructionsSinceLastAbsLineInfo = 1 - } else { - fs.instructionsSinceLastAbsLineInfo++ } - - fs.LineInfo.rel = append(fs.LineInfo.rel, int8(delta)) - fs.previousLine = line + fs.LineInfo.rel = append(fs.LineInfo.rel, rel) } // removeLastLineInfo remove line information from the last instruction. // -// Equivalent to `removeLastLineInfo` in upstream Lua. +// Equivalent to `removelastlineinfo` in upstream Lua. func (fs *funcState) removeLastLineInfo() { lineInfo := &fs.LineInfo - - if lastDelta := lineInfo.rel[len(lineInfo.rel)-1]; lastDelta == absMarker { + lastDelta := lineInfo.rel[len(lineInfo.rel)-1] + lineInfo.rel = lineInfo.rel[:len(lineInfo.rel)-1] + if lastDelta == absMarker { lineInfo.abs = lineInfo.abs[:len(lineInfo.abs)-1] - // Force next line info to be absolute. - fs.instructionsSinceLastAbsLineInfo = maxInstructionsWithoutAbsLineInfo + 1 - } else { - fs.previousLine -= int(lastDelta) - fs.instructionsSinceLastAbsLineInfo-- } - - lineInfo.rel = lineInfo.rel[:len(lineInfo.rel)-1] + fs.lineInfoWriter.prev(lastDelta) } // fixLineInfo changes the line information associated with the last instruction. diff --git a/internal/luacode/lineinfo.go b/internal/luacode/lineinfo.go new file mode 100644 index 0000000..2aed970 --- /dev/null +++ b/internal/luacode/lineinfo.go @@ -0,0 +1,331 @@ +// Copyright (C) 1994-2024 Lua.org, PUC-Rio. +// Copyright 2024 The zb Authors +// SPDX-License-Identifier: MIT + +package luacode + +import ( + "cmp" + "fmt" + "io" + "iter" + "slices" +) + +const maxInstructionsWithoutAbsLineInfo = 128 + +const ( + // lineInfoRelativeLimit is the maximum value permitted + // in elements of the rel slice of [LineInfo]. + lineInfoRelativeLimit = 1<<7 - 1 + + // absMarker is the mark for entries in the rel slice of [LineInfo] + // that have absolute information in the abs slice. + absMarker int8 = -lineInfoRelativeLimit - 1 +) + +// LineInfo is a sequence of line numbers. +// The zero value is an empty sequence. +// +// The underlying data structure is optimized for a sequence of integers +// where the difference between adjacent values is relatively small (|Δ| < 128). +type LineInfo struct { + rel []int8 + abs []absLineInfo +} + +type absLineInfo struct { + pc int + line int +} + +// CollectLineInfo collects values from seq into a new [LineInfo] and returns it. +func CollectLineInfo(seq iter.Seq[int]) LineInfo { + var info LineInfo + var w lineInfoWriter + for line := range seq { + rel := w.next(line) + info.rel = append(info.rel, rel) + if rel == absMarker { + info.abs = append(info.abs, absLineInfo{ + pc: len(info.rel) - 1, + line: line, + }) + } + } + return info +} + +// Len returns the number of line numbers in the sequence. +func (info LineInfo) Len() int { + return len(info.rel) +} + +// All returns an iterator over the sequence's line numbers. +// (The index is the instruction address.) +func (info LineInfo) All() iter.Seq2[int, int] { + return func(yield func(int, int) bool) { + absIndex := 0 + curr := 0 + for pc, delta := range info.rel { + if delta != absMarker { + curr += int(delta) + } else { + if info.abs[absIndex].pc != pc { + panic("corrupted LineInfo") + } + curr = info.abs[absIndex].line + absIndex++ + } + + if !yield(pc, curr) { + return + } + } + } +} + +// At returns the i'th line number in the sequence. +// If i < 0 or i >= info.Len(), then Line panics. +func (info LineInfo) At(i int) int { + // Equivalent to luaG_getfuncline in upstream Lua. + + if i < 0 || i >= info.Len() { + panic("index out of range") + } + + absIndex, ok := slices.BinarySearchFunc(info.abs, i, func(a absLineInfo, pc int) int { + return cmp.Compare(a.pc, pc) + }) + if !ok { + // Binary search finds next largest, so go back one. + absIndex-- + } + + currPC := 0 + lineno := 0 + if absIndex >= 0 { + currPC = info.abs[absIndex].pc + 1 // Skip absMarker. + lineno = info.abs[absIndex].line + } + + for ; currPC <= i; currPC++ { + delta := info.rel[currPC] + if delta == absMarker { + // Search through info.abs should have brought us to closest absMarker + 1. + panic("corrupted LineInfo") + } + lineno += int(delta) + } + return lineno +} + +func dumpLineInfo(buf []byte, base int, info LineInfo) []byte { + if info.Len() == 0 { + buf = dumpVarint(buf, 0) + buf = dumpVarint(buf, 0) + return buf + } + + rel0, rel, abs := normalizeLineInfo(info, base) + buf = dumpVarint(buf, 1+len(rel)) + buf = append(buf, byte(rel0)) + for _, i := range rel { + buf = append(buf, byte(i)) + } + buf = dumpVarint(buf, len(abs)) + for _, a := range abs { + buf = dumpVarint(buf, a.pc) + buf = dumpVarint(buf, a.line) + } + return buf +} + +// normalizeLineInfo converts info to upstream Lua's algorithm. +// rel and abs may refer to info's underlying arrays. +// +// We store line info in-memory slightly differently from upstream Lua: +// rather than make Prototype.LineDefined be the base line number for the first offset, +// we have an implicit offset of zero to make [LineInfo] usable as a standalone data type. +// Frequently, the only difference between our representation and upstream Lua +// is info.rel[0] == absMarker instead of a relative offset, +// but that's easy to strip out without an allocation. +// We go through the whole exercise of verifying the entire array +// because [loadLineInfo] may import a well-formed but inefficient (or just different) packing. +func normalizeLineInfo(info LineInfo, base int) (rel0 int8, rel []int8, abs []absLineInfo) { + w := lineInfoWriter{previousLine: base} + relIdx := 0 + abs = info.abs + absIdx := 0 + + needsRewrite := false + for i, line := range info.All() { + if i == 0 { + rel0 = w.next(line) + isFirstAbsPC0 := len(info.abs) > 0 && info.abs[0].pc == 0 + if rel0 == absMarker && !isFirstAbsPC0 { + needsRewrite = true + break + } + if rel0 != absMarker && isFirstAbsPC0 { + // In the common case where we transformed the first element + // from an absolute line info to a line info relative to base, + // only use the subsequent absolute line entries. + abs = abs[1:] + } + } else { + want := w.next(line) + if info.rel[relIdx] != want { + needsRewrite = true + break + } + if want == absMarker { + if abs[absIdx].pc != i { + needsRewrite = true + break + } + absIdx++ + } + } + } + if !needsRewrite { + return rel0, info.rel[1:], abs + } + + // Reset writer and allocate new arrays. + w = lineInfoWriter{previousLine: base} + abs = nil + for pc, line := range info.All() { + delta := w.next(line) + if pc == 0 { + rel0 = delta + } else { + rel = append(rel, delta) + } + if delta == absMarker { + abs = append(abs, absLineInfo{ + pc: pc, + line: line, + }) + } + } + return rel0, rel, abs +} + +func loadLineInfo(r *chunkReader, base int) (LineInfo, error) { + n, err := r.readVarint() + if err != nil { + return LineInfo{}, fmt.Errorf("line info: %v", err) + } + info := LineInfo{ + rel: make([]int8, n), + } + nAbsolute := 0 // Counter for absMarker values read. + for i := range info.rel { + b, ok := r.readByte() + if !ok { + return LineInfo{}, fmt.Errorf("line info: %v", io.ErrUnexpectedEOF) + } + delta := int8(b) + if delta == absMarker { + info.rel[i] = absMarker + nAbsolute++ + } else if i > 0 { + info.rel[i] = delta + } else { + // Interpret the first element as relative to base, + // inserting an absMarker if needed. + rebased := base + int(delta) + if newDelta, fitsRelative := lineInfoRelativeDelta(rebased); fitsRelative { + info.rel[i] = newDelta + } else { + info.rel[i] = absMarker + info.abs = append(info.abs, absLineInfo{ + pc: 0, + line: rebased, + }) + } + } + } + + if got, err := r.readVarint(); err != nil { + return LineInfo{}, fmt.Errorf("line info: %v", err) + } else if got != nAbsolute { + return LineInfo{}, fmt.Errorf("line info: absolute line info count incorrect (%d vs. %d markers)", got, nAbsolute) + } + info.abs = slices.Grow(info.abs, nAbsolute) + for i := range nAbsolute { + var newAbsInfo absLineInfo + newAbsInfo.pc, err = r.readVarint() + if err != nil { + return LineInfo{}, fmt.Errorf("line info: %v", err) + } + minPC := -1 + if len(info.abs) > 0 { + minPC = info.abs[len(info.abs)-1].pc + } + if newAbsInfo.pc <= minPC { + return LineInfo{}, fmt.Errorf("line info: absolute line info PCs not monotonically increasing") + } + if newAbsInfo.pc >= n { + return LineInfo{}, fmt.Errorf("line info: absolute line info PC %d out of range", newAbsInfo.pc) + } + if info.rel[newAbsInfo.pc] != absMarker { + return LineInfo{}, fmt.Errorf("line info: absolute line information not expected for pc %d", i) + } + + newAbsInfo.line, err = r.readVarint() + if err != nil { + return LineInfo{}, fmt.Errorf("line info: %v", err) + } + + info.abs = append(info.abs, newAbsInfo) + } + + return info, nil +} + +// A lineInfoWriter holds the state to construct a [LineInfo] a value at a time. +// This algorithm matches upstream Lua's. +type lineInfoWriter struct { + // previousLine is the last line number passed to next. + previousLine int + // instructionsSinceLastAbsLineInfo is a counter + // of instructions added since the last [absLineInfo]. + instructionsSinceLastAbsLineInfo uint8 +} + +// next returns the next value for the rel slice given the line. +// A new entry should be appended to LineInfo.abs +// if the returned value is [absMarker]. +func (w *lineInfoWriter) next(line int) int8 { + delta, fitsRelative := lineInfoRelativeDelta(line - w.previousLine) + w.previousLine = line + + if !fitsRelative || + w.instructionsSinceLastAbsLineInfo >= maxInstructionsWithoutAbsLineInfo { + w.instructionsSinceLastAbsLineInfo = 1 + return absMarker + } + + w.instructionsSinceLastAbsLineInfo++ + return delta +} + +// prev undoes the effects of a call to [*lineInfoWriter.next]. +func (w *lineInfoWriter) prev(lastDelta int8) { + if lastDelta == absMarker { + // Force next line info to be absolute. + w.instructionsSinceLastAbsLineInfo = maxInstructionsWithoutAbsLineInfo + 1 + } else { + w.previousLine -= int(lastDelta) + w.instructionsSinceLastAbsLineInfo-- + } +} + +func lineInfoRelativeDelta(delta int) (_ int8, ok bool) { + if delta > lineInfoRelativeLimit || delta < -lineInfoRelativeLimit { + return absMarker, false + } + return int8(delta), true +} diff --git a/internal/luacode/lineinfo_test.go b/internal/luacode/lineinfo_test.go new file mode 100644 index 0000000..c5d7dc2 --- /dev/null +++ b/internal/luacode/lineinfo_test.go @@ -0,0 +1,163 @@ +// Copyright 2024 The zb Authors +// SPDX-License-Identifier: MIT + +package luacode + +import ( + "slices" + "testing" + + "github.com/google/go-cmp/cmp" +) + +func TestEmptyLineInfo(t *testing.T) { + var info LineInfo + if got, want := info.Len(), 0; got != want { + t.Errorf("LineInfo{}.Len() = %d; want %d", got, want) + } + for pc, line := range info.All() { + t.Errorf("LineInfo{}.All() yielded %d, %d", pc, line) + } +} + +func TestLineInfo(t *testing.T) { + tests := [][]int{ + {}, + {1}, + {200}, + {1, 1, 2, 3}, + } + + t.Run("Len", func(t *testing.T) { + for _, test := range tests { + got := CollectLineInfo(slices.Values(test)) + + if got, want := got.Len(), len(test); got != want { + t.Errorf("CollectLineInfo(slices.Values(%v)).Len() = %d; want %d", test, got, want) + } + } + }) + + t.Run("At", func(t *testing.T) { + for _, test := range tests { + got := CollectLineInfo(slices.Values(test)) + + for i, want := range test { + if got := got.At(i); got != want { + t.Errorf("CollectLineInfo(slices.Values(%v)).At(%d) = %d; want %d", test, i, got, want) + } + } + } + }) + + t.Run("All", func(t *testing.T) { + for _, test := range tests { + got := CollectLineInfo(slices.Values(test)) + + gotAll := make([]int, 0, len(test)) + for i, line := range got.All() { + if want := len(gotAll); i != want { + t.Errorf("CollectLineInfo(slices.Values(%v)).All()[%d] has index %d", test, want, i) + } + gotAll = append(gotAll, line) + } + if diff := cmp.Diff(test, gotAll); diff != "" { + t.Errorf("CollectLineInfo(slices.Values(%v)).All() (-want +got):\n%s", test, diff) + } + } + }) +} + +func TestLineInfoWriter(t *testing.T) { + type nextCall struct { + line int + want int8 + } + + tests := []struct { + name string + base int + calls []nextCall + }{ + { + name: "AllSame", + calls: []nextCall{ + {100, 100}, + {100, 0}, + {100, 0}, + }, + }, + { + name: "AllRelative", + calls: []nextCall{ + {100, 100}, + {200, 100}, + {300, 100}, + }, + }, + { + name: "AllRelativeWithBase", + base: 99, + calls: []nextCall{ + {100, 1}, + {200, 100}, + {300, 100}, + }, + }, + { + name: "StartAbsolute", + calls: []nextCall{ + {200, absMarker}, + {300, 100}, + {400, 100}, + }, + }, + { + name: "InsertAbsoluteAfterLimit", + calls: append( + append( + []nextCall{{100, 100}}, + slices.Repeat([]nextCall{{100, 0}}, maxInstructionsWithoutAbsLineInfo-1)..., + ), + nextCall{100, absMarker}, + ), + }, + { + name: "InsertAbsoluteAfterSecondLimit", + calls: append( + append( + append( + append( + []nextCall{{100, 100}}, + slices.Repeat([]nextCall{{100, 0}}, maxInstructionsWithoutAbsLineInfo-1)..., + ), + nextCall{100, absMarker}, + ), + slices.Repeat([]nextCall{{100, 0}}, maxInstructionsWithoutAbsLineInfo-1)..., + ), + nextCall{100, absMarker}, + ), + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + w := lineInfoWriter{previousLine: test.base} + for i, call := range test.calls { + if got := w.next(call.line); got != call.want { + t.Errorf("[%d]: w.next(%d) = %d; want %d", i, call.line, got, call.want) + } + } + }) + } +} + +var lineInfoCompareOption = cmp.Transformer("lineInfoToSlice", lineInfoToSlice) + +func lineInfoToSlice(info LineInfo) []int { + s := make([]int, 0, info.Len()) + for _, line := range info.All() { + s = append(s, line) + } + return s +} diff --git a/internal/luacode/load.go b/internal/luacode/load.go index 19a502e..46260d8 100644 --- a/internal/luacode/load.go +++ b/internal/luacode/load.go @@ -151,7 +151,7 @@ func loadFunction(f *Prototype, r *chunkReader, parentSource Source) error { } // Debug - f.LineInfo, err = loadLineInfo(r) + f.LineInfo, err = loadLineInfo(r, f.LineDefined) if err != nil { return fmt.Errorf("load function: %v", err) } @@ -191,45 +191,6 @@ func loadFunction(f *Prototype, r *chunkReader, parentSource Source) error { return nil } -func loadLineInfo(r *chunkReader) (LineInfo, error) { - n, err := r.readVarint() - if err != nil { - return LineInfo{}, fmt.Errorf("line info: %v", err) - } - info := LineInfo{ - rel: make([]int8, n), - } - for i := range info.rel { - b, ok := r.readByte() - if !ok { - return LineInfo{}, fmt.Errorf("line info: %v", io.ErrUnexpectedEOF) - } - info.rel[i] = int8(b) - } - - n, err = r.readVarint() - if err != nil { - return LineInfo{}, fmt.Errorf("line info: %v", err) - } - info.abs = make([]absLineInfo, n) - for i := range info.abs { - info.abs[i].pc, err = r.readVarint() - if err != nil { - return LineInfo{}, fmt.Errorf("line info: %v", err) - } - if i > 0 && info.abs[i-1].pc >= info.abs[i].pc { - return LineInfo{}, fmt.Errorf("line info: absolute line information not monotonically increasing") - } - - info.abs[i].line, err = r.readVarint() - if err != nil { - return LineInfo{}, fmt.Errorf("line info: %v", err) - } - } - - return info, nil -} - type chunkReader struct { s []byte diff --git a/internal/luacode/prototype.go b/internal/luacode/prototype.go index e44a06b..a427109 100644 --- a/internal/luacode/prototype.go +++ b/internal/luacode/prototype.go @@ -209,7 +209,7 @@ func dumpFunction(buf []byte, f *Prototype, parentSource Source) ([]byte, error) } // Debug information - buf = dumpLineInfo(buf, f.LineInfo) + buf = dumpLineInfo(buf, f.LineDefined, f.LineInfo) buf = dumpVarint(buf, len(f.LocalVariables)) for _, v := range f.LocalVariables { buf = dumpString(buf, v.Name) @@ -393,41 +393,6 @@ func (source Source) String() string { return prefix + line + truncSignifier + suffix } -const maxInstructionsWithoutAbsLineInfo = 128 - -const ( - // lineInfoRelativeLimit is the limit for values in the rel slice - // of [LineInfo]. - lineInfoRelativeLimit = 1 << 7 - - // absMarker is the mark for entries in the rel slice of [LineInfo] - // that have absolute information in the abs slice. - absMarker int8 = -lineInfoRelativeLimit -) - -type LineInfo struct { - rel []int8 - abs []absLineInfo -} - -type absLineInfo struct { - pc int - line int -} - -func dumpLineInfo(buf []byte, info LineInfo) []byte { - buf = dumpVarint(buf, len(info.rel)) - for _, i := range info.rel { - buf = append(buf, byte(i)) - } - buf = dumpVarint(buf, len(info.abs)) - for _, a := range info.abs { - buf = dumpVarint(buf, a.pc) - buf = dumpVarint(buf, a.line) - } - return buf -} - // maxRegisters is the maximum number of registers in a Lua function. const maxRegisters = 255 diff --git a/internal/luacode/prototype_test.go b/internal/luacode/prototype_test.go index db21b57..35207ae 100644 --- a/internal/luacode/prototype_test.go +++ b/internal/luacode/prototype_test.go @@ -13,8 +13,7 @@ import ( ) var prototypeDiffOptions = cmp.Options{ - cmp.AllowUnexported(LineInfo{}), - cmp.AllowUnexported(absLineInfo{}), + lineInfoCompareOption, cmpopts.EquateEmpty(), }