Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ChunkMatch.BestLineMatch to return the best-scoring line #884

Merged
merged 2 commits into from
Jan 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions api.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,13 @@ type ChunkMatch struct {
// beginning of a line (Column will always be 1).
ContentStart Location

// Score is the overall relevance score of this chunk.
Score float64

// BestLineMatch is the line number of the highest-scoring line match in this chunk.
// The line number represents the index in the full file, and is 1-based. If FileName: true,
// this number will be 0.
BestLineMatch uint32
}

func (cm *ChunkMatch) sizeBytes() (sz uint64) {
Expand Down
30 changes: 16 additions & 14 deletions api_proto.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,13 +96,14 @@ func ChunkMatchFromProto(p *proto.ChunkMatch) ChunkMatch {
}

return ChunkMatch{
Content: p.GetContent(),
ContentStart: LocationFromProto(p.GetContentStart()),
FileName: p.GetFileName(),
Ranges: ranges,
SymbolInfo: symbols,
Score: p.GetScore(),
DebugScore: p.GetDebugScore(),
Content: p.GetContent(),
ContentStart: LocationFromProto(p.GetContentStart()),
FileName: p.GetFileName(),
Ranges: ranges,
SymbolInfo: symbols,
Score: p.GetScore(),
BestLineMatch: p.GetBestLineMatch(),
DebugScore: p.GetDebugScore(),
}
}

Expand All @@ -118,13 +119,14 @@ func (cm *ChunkMatch) ToProto() *proto.ChunkMatch {
}

return &proto.ChunkMatch{
Content: cm.Content,
ContentStart: cm.ContentStart.ToProto(),
FileName: cm.FileName,
Ranges: ranges,
SymbolInfo: symbolInfo,
Score: cm.Score,
DebugScore: cm.DebugScore,
Content: cm.Content,
ContentStart: cm.ContentStart.ToProto(),
FileName: cm.FileName,
Ranges: ranges,
SymbolInfo: symbolInfo,
Score: cm.Score,
BestLineMatch: cm.BestLineMatch,
DebugScore: cm.DebugScore,
}
}

Expand Down
2 changes: 1 addition & 1 deletion api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ func TestMatchSize(t *testing.T) {
size: 256,
}, {
v: ChunkMatch{},
size: 112,
size: 120,
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I ran the fieldalignment tool as this test suggests, and did not see a regression. Here is the output for api.go where ChunkMatch lives ... there is no mention of ChunkMatch or FileMatch:

/Users/jtibshirani/code/zoekt/api.go:232:16: struct with 136 pointer bytes could be 96
/Users/jtibshirani/code/zoekt/api.go:301:24: struct with 32 pointer bytes could be 8
/Users/jtibshirani/code/zoekt/api.go:503:19: struct with 216 pointer bytes could be 24
/Users/jtibshirani/code/zoekt/api.go:561:17: struct of size 224 could be 208
/Users/jtibshirani/code/zoekt/api.go:753:20: struct with 88 pointer bytes could be 56
/Users/jtibshirani/code/zoekt/api.go:833:27: struct with 16 pointer bytes could be 8
/Users/jtibshirani/code/zoekt/api.go:873:15: struct with 32 pointer bytes could be 16
/Users/jtibshirani/code/zoekt/api.go:929:20: struct of size 88 could be 80

}, {
v: candidateMatch{},
size: 80,
Expand Down
53 changes: 48 additions & 5 deletions build/scoring_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,12 @@ import (
)

type scoreCase struct {
fileName string
content []byte
query query.Q
language string
wantScore float64
fileName string
content []byte
query query.Q
language string
wantScore float64
wantBestLineMatch uint32
}

func TestFileNameMatch(t *testing.T) {
Expand Down Expand Up @@ -79,6 +80,8 @@ func TestBM25(t *testing.T) {
language: "Java",
// bm25-score: 0.58 <- sum-termFrequencyScore: 14.00, length-ratio: 1.00
wantScore: 0.58,
// line 5: private final int exampleField;
wantBestLineMatch: 5,
}, {
// Matches only on content
fileName: "example.java",
Expand All @@ -91,6 +94,8 @@ func TestBM25(t *testing.T) {
language: "Java",
// bm25-score: 1.81 <- sum-termFrequencyScore: 116.00, length-ratio: 1.00
wantScore: 1.81,
// line 3: public class InnerClasses {
wantBestLineMatch: 3,
},
{
// Matches only on filename
Expand Down Expand Up @@ -130,6 +135,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 5500 (partial symbol at boundary) + 1000 (Java class) + 50 (partial word)
wantScore: 6550,
// line 37: public class InnerClass implements InnerInterface<Integer, Integer> {
wantBestLineMatch: 37,
},
{
fileName: "example.java",
Expand All @@ -138,6 +145,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word)
wantScore: 7000,
// line 32: public static class InnerStaticClass {
wantBestLineMatch: 32,
},
{
fileName: "example.java",
Expand All @@ -146,6 +155,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 7000 (symbol) + 900 (Java enum) + 500 (word)
wantScore: 8400,
// line 16: public enum InnerEnum {
wantBestLineMatch: 16,
},
{
fileName: "example.java",
Expand All @@ -154,6 +165,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 7000 (symbol) + 800 (Java interface) + 500 (word)
wantScore: 8300,
// line 22: public interface InnerInterface<A, B> {
wantBestLineMatch: 22,
},
{
fileName: "example.java",
Expand All @@ -162,6 +175,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 7000 (symbol) + 700 (Java method) + 500 (word)
wantScore: 8200,
// line 44: public void innerMethod() {
wantBestLineMatch: 44,
},
{
fileName: "example.java",
Expand All @@ -170,6 +185,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 7000 (symbol) + 600 (Java field) + 500 (word)
wantScore: 8100,
// line 38: private final int field;
wantBestLineMatch: 38,
},
{
fileName: "example.java",
Expand All @@ -178,6 +195,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 7000 (symbol) + 500 (Java enum constant) + 500 (word)
wantScore: 8000,
// line 18: B,
wantBestLineMatch: 18,
},
// 2 Atoms (1x content and 1x filename)
{
Expand All @@ -187,6 +206,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 5500 (edge symbol) + 600 (Java field) + 500 (word) + 200 (atom)
wantScore: 6800,
// line 5: private final int exampleField;
wantBestLineMatch: 5,
},
// 3 Atoms (2x content, 1x filename)
{
Expand All @@ -199,6 +220,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 7000 (symbol) + 700 (Java method) + 500 (word) + 266.67 (atom)
wantScore: 8466,
// line 54: private static <A, B> B runInnerInterface(InnerInterface<A, B> fn, A a) {
wantBestLineMatch: 54,
},
// 4 Atoms (4x content)
{
Expand All @@ -213,6 +236,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 7000 (symbol) + 900 (Java enum) + 500 (word) + 300 (atom)
wantScore: 8700,
// line 16: public enum InnerEnum {
wantBestLineMatch: 16,
},
{
fileName: "example.java",
Expand All @@ -221,6 +246,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 4000 (overlap Symbol) + 700 (Java method) + 50 (partial word)
wantScore: 4750,
// line 54: private static <A, B> B runInnerInterface(InnerInterface<A, B> fn, A a) {
wantBestLineMatch: 54,
},
{
fileName: "example.java",
Expand All @@ -229,6 +256,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 7000 (Symbol) + 900 (Java enum) + 500 (word)
wantScore: 8400,
// line 16: public enum InnerEnum {
wantBestLineMatch: 16,
},
{
fileName: "example.java",
Expand All @@ -237,6 +266,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 5500 (edge Symbol) + 900 (Java enum) + 500 (word)
wantScore: 6900,
// line 16: public enum InnerEnum {
wantBestLineMatch: 16,
},
{
fileName: "example.java",
Expand All @@ -245,6 +276,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 4000 (overlap Symbol) + 900 (Java enum) + 500 (word)
wantScore: 5400,
// line 16: public enum InnerEnum {
wantBestLineMatch: 16,
},
}

Expand Down Expand Up @@ -640,6 +673,16 @@ func checkScoring(t *testing.T, c scoreCase, useBM25 bool, parserType ctags.CTag
t.Fatalf("score: want %f, got %f\ndebug: %s\ndebugscore: %s", c.wantScore, got, srs.Files[0].Debug, srs.Files[0].ChunkMatches[0].DebugScore)
}

if c.wantBestLineMatch != 0 {
if len(srs.Files[0].ChunkMatches) == 0 {
t.Fatalf("want BestLineMatch %d, but no chunk matches were returned", c.wantBestLineMatch)
}
chunkMatch := srs.Files[0].ChunkMatches[0]
if chunkMatch.BestLineMatch != c.wantBestLineMatch {
t.Fatalf("want BestLineMatch %d, got %d", c.wantBestLineMatch, chunkMatch.BestLineMatch)
}
}

if got := srs.Files[0].Language; got != c.language {
t.Fatalf("want %s, got %s", c.language, got)
}
Expand Down
Loading
Loading