diff --git a/cmd/scip/stats.go b/cmd/scip/stats.go index 8108d05d..89e2b9d5 100644 --- a/cmd/scip/stats.go +++ b/cmd/scip/stats.go @@ -9,7 +9,9 @@ import ( "path/filepath" "github.com/hhatto/gocloc" + "github.com/montanaflynn/stats" "github.com/urfave/cli/v2" + "google.golang.org/protobuf/proto" "github.com/sourcegraph/sourcegraph/lib/errors" @@ -59,35 +61,95 @@ func statsMain(flags statsFlags) error { return nil } +type Stats struct { + Percentiles struct { + Fifty int32 `json:"50"` + Ninety int32 `json:"90"` + NinetyFive int32 `json:"95"` + NinetyNine int32 `json:"99"` + NinetyNinePointNine int32 `json:"99.9"` + } + Mean int32 `json:"mean"` + Stddev int32 `json:"stddev"` + Max int32 `json:"max"` + Sum int32 `json:"sum"` + Comment string `json:"comment"` +} + +func NewStats(values []float64) Stats { + s := Stats{} + s.Percentiles.Fifty = percentile(values, 50) + s.Percentiles.Ninety = percentile(values, 90) + s.Percentiles.NinetyFive = percentile(values, 95) + s.Percentiles.NinetyNine = percentile(values, 99) + s.Percentiles.NinetyNinePointNine = percentile(values, 99.9) + mean, _ := stats.Mean(values) + s.Mean = int32(mean) + stddev, _ := stats.StandardDeviation(values) + s.Stddev = int32(stddev) + max, _ := stats.Max(values) + s.Max = int32(max) + sum, _ := stats.Sum(values) + s.Sum = int32(sum) + return s +} + type indexStatistics struct { - Documents int32 `json:"documents"` - LinesOfCode int32 `json:"linesOfCode"` - Occurrences int32 `json:"occurrences"` - Definitions int32 `json:"definitions"` + Documents int32 `json:"documents"` + DocumentSizes Stats `json:"documentSizes"` + LinesOfCode int32 `json:"linesOfCode"` + Occurrences int32 `json:"occurrences"` + OccurrenceCounts Stats `json:"occurrenceCounts"` + Definitions int32 `json:"definitions"` + DefinitionCounts Stats `json:"definitionCounts"` } func countStatistics(index *scip.Index, customProjectRoot string) (*indexStatistics, error) { loc, err := countLinesOfCode(index, customProjectRoot) + var linesOfCode int32 if err != nil { - return nil, err + // Keep this a non-fatal error so that we can measure other index stats + // even if the project is not cloned locally (e.g. if it's a huge + // project like Chromium or the Linux kernel). + log.Printf("Couldn't count lines of code: %s", err) + } else { + linesOfCode = loc.Total.Code } stats := &indexStatistics{ Documents: int32(len(index.Documents)), - LinesOfCode: loc.Total.Code, + LinesOfCode: linesOfCode, Occurrences: 0, Definitions: 0, } + documentSizes := []float64{} + occurrenceCounts := []float64{} + definitionCounts := []float64{} for _, document := range index.Documents { + bytes, _ := proto.Marshal(document) + documentSizes = append(documentSizes, float64(len(bytes))) + stats.Occurrences += int32(len(document.Occurrences)) + occurrenceCounts = append(occurrenceCounts, float64(len(document.Occurrences))) + definitionCounts = append(definitionCounts, 0) for _, occurrence := range document.Occurrences { - stats.Occurrences += 1 if scip.SymbolRole_Definition.Matches(occurrence) { stats.Definitions += 1 + definitionCounts[len(definitionCounts)-1] += 1 } } } + stats.DocumentSizes = NewStats(documentSizes) + stats.DocumentSizes.Comment = "sizes are in bytes" + stats.OccurrenceCounts = NewStats(occurrenceCounts) + stats.DefinitionCounts = NewStats(definitionCounts) + stats.DefinitionCounts.Comment = "counted using occurrences" return stats, nil } +func percentile(buf []float64, percent float64) int32 { + res, _ := stats.Percentile(buf, percent) + return int32(res) +} + func countLinesOfCode(index *scip.Index, customProjectRoot string) (*gocloc.Result, error) { var localSource string root, err := url.Parse(index.Metadata.ProjectRoot) diff --git a/go.mod b/go.mod index 1b77a15c..5d2744ec 100644 --- a/go.mod +++ b/go.mod @@ -9,6 +9,7 @@ require ( github.com/hexops/gotextdiff v1.0.3 github.com/hhatto/gocloc v0.4.2 github.com/k0kubun/pp/v3 v3.1.0 + github.com/montanaflynn/stats v0.7.1 github.com/pseudomuto/protoc-gen-doc v1.5.1 github.com/smacker/go-tree-sitter v0.0.0-20220209044044-0d3022e933c3 github.com/sourcegraph/sourcegraph/lib v0.0.0-20220511160847-5a43d3ea24eb diff --git a/go.sum b/go.sum index 27776a07..ae9422ff 100644 --- a/go.sum +++ b/go.sum @@ -282,6 +282,8 @@ github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lN github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/montanaflynn/stats v0.7.1 h1:etflOAAHORrCC44V+aR6Ftzort912ZU+YLiSTuV8eaE= +github.com/montanaflynn/stats v0.7.1/go.mod h1:etXPPgVO6n31NxCd9KQUMvCM+ve0ruNzt6R8Bnaayow= github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A= github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc= github.com/moul/http2curl v1.0.0/go.mod h1:8UbvGypXm98wA/IqH45anm5Y2Z6ep6O31QGOAZ3H0fQ=