Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cannier: implement parts (features, model, test set) #3930

Draft
wants to merge 15 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ require (
github.com/jetstack/cert-manager v1.1.0
github.com/joshdk/go-junit v0.0.0-20190428045703-ad7e11aa49ff
github.com/lnquy/cron v1.1.1
github.com/malaschitz/randomForest v0.0.0-20240228214944-c64ffe1648e0
github.com/onsi/ginkgo/v2 v2.19.0
github.com/onsi/gomega v1.34.1
github.com/prometheus/client_golang v1.19.0
Expand Down Expand Up @@ -200,6 +201,7 @@ require (
golang.org/x/tools v0.23.0 // indirect
golang.org/x/xerrors v0.0.0-20240716161551-93cc26a95ae9 // indirect
gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
gonum.org/v1/gonum v0.12.0 // indirect
google.golang.org/genproto v0.0.0-20240812133136-8ffd90a71988 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20240812133136-8ffd90a71988 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20240812133136-8ffd90a71988 // indirect
Expand Down
129 changes: 129 additions & 0 deletions go.sum

Large diffs are not rendered by default.

13 changes: 13 additions & 0 deletions robots/cmd/cannier/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# CANNIER - implementations for the CANNIER method

# `extract`

Extracts a feature set as described in the [CANNIER] paper.

Usage:

```bash
go run ./... extract ...
```

[CANNIER]: https://www.gregorykapfhammer.com/download/research/papers/key/Parry2023-paper.pdf
32 changes: 32 additions & 0 deletions robots/cmd/cannier/cmd/cmd_suite_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/*
* This file is part of the KubeVirt project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Copyright the KubeVirt Authors.
*
*/

package cmd

import (
"testing"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)

func TestCmd(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "cannier cmd")
}
89 changes: 89 additions & 0 deletions robots/cmd/cannier/cmd/extract-features.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
/*
* This file is part of the KubeVirt project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Copyright the KubeVirt Authors.
*
*/

package cmd

import (
"encoding/json"
"fmt"
"kubevirt.io/project-infra/robots/pkg/cannier"
"kubevirt.io/project-infra/robots/pkg/ginkgo"
"os"

"github.com/spf13/cobra"
)

var (
testName *string
fileName *string
outputFileName *string
overwriteOutputFile *bool
asRequest *bool
)

func init() {
extractCmd.AddCommand(extractFeatureSetCmd)
testName = extractFeatureSetCmd.Flags().StringP("test-name", "t", "", "name of the test to analyze")
fileName = extractFeatureSetCmd.Flags().StringP("filename", "f", "", "filename for the test to analyze")
outputFileName = extractFeatureSetCmd.Flags().StringP("output-filename", "o", "", "filename to write the resulting feature set into, format is json")
overwriteOutputFile = extractFeatureSetCmd.Flags().BoolP("overwrite", "F", false, "whether to overwrite the output file if it exists")
asRequest = extractFeatureSetCmd.Flags().BoolP("as-request", "r", true, "whether to output the bare data or the data suitable for a hosted model request")
}

// extractFeatureSetCmd represents the extract command
var extractFeatureSetCmd = &cobra.Command{
Use: "features",
Short: "features extracts a feature set from a single test",
Long: `Extracts a feature set as described in the CANNIER paper from a single test.`,
RunE: func(cmd *cobra.Command, args []string) error {
return ExtractFeatures(*testName, *fileName, *outputFileName, *overwriteOutputFile, *asRequest)
},
}

func ExtractFeatures(testName string, fileName string, outputFileName string, overwriteOutputFile bool, asRequest bool) error {
testDescriptor, err := ginkgo.NewTestDescriptorForName(testName, fileName)
if err != nil {
return err
}
if outputFileName == "" {
return fmt.Errorf("output fileName is required")
}
_, err = os.Stat(outputFileName)
if !overwriteOutputFile && (err == nil || !os.IsNotExist(err)) {
return fmt.Errorf("output file %q must not exist", outputFileName)
}
features, err := cannier.ExtractFeatures(testDescriptor)
if err != nil {
return fmt.Errorf("error extracting features: %w", err)
}
outputFile, err := os.Create(outputFileName)
if err != nil {
return fmt.Errorf("error writing output file: %w", err)
}
defer outputFile.Close()
if asRequest {
err = json.NewEncoder(outputFile).Encode(RequestData{Features: features})
} else {
err = json.NewEncoder(outputFile).Encode(features)
}
if err != nil {
return fmt.Errorf("error writing output file: %w", err)
}
return nil
}
161 changes: 161 additions & 0 deletions robots/cmd/cannier/cmd/extract-testnames.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
/*
* This file is part of the KubeVirt project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Copyright the KubeVirt Authors.
*
*/

package cmd

import (
"encoding/json"
"fmt"
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"kubevirt.io/project-infra/robots/pkg/ginkgo"
"kubevirt.io/project-infra/robots/pkg/git"
"os"
"path/filepath"
"regexp"
)

// flag variables
var (
revisionRange *string
repoPath *string
testSubDirectory *string
debug *bool
)

var revisionRangeRegex = regexp.MustCompile(`^([^\s]+)(..([^\s]+))?$`)

func init() {
extractCmd.AddCommand(extractTestNamesCmd)
revisionRange = extractTestNamesCmd.Flags().StringP("revision-range", "r", "main..HEAD", "gives the revision range to look at when determining the changes")
repoPath = extractTestNamesCmd.Flags().StringP("repo-path", "p", "", "gives the test directory to look at when determining the changed tests")
testSubDirectory = extractTestNamesCmd.Flags().StringP("test-subdirectory", "t", "", "gives the test directory to look at when determining the changed tests")
debug = extractTestNamesCmd.Flags().BoolP("debug", "D", false, "print and store debugging information - WARNING: might be VERY verbose!")
}

var extractTestNamesCmd = &cobra.Command{
Use: "testnames",
Short: "Extracts the names for the changed ginkgo tests for a range of commits",
Long: `Extracts the names for the changed ginkgo tests for a range of commits.

Test names are determined by looking at the changes from the lines changed in the commits, then matching those with the ginkgo outline for the changed files.`,
RunE: func(cmd *cobra.Command, args []string) error {
log.SetFormatter(&log.JSONFormatter{})
if *debug {
log.SetLevel(log.DebugLevel)
}
return ExtractTestNames(*revisionRange, *testSubDirectory, *repoPath, *debug)
},
}

func ExtractTestNames(revisionRange string, testDirectory string, repoPath string, debug bool) error {
if !revisionRangeRegex.MatchString(revisionRange) {
return fmt.Errorf("revision range must be a valid git revision range")
}
commits, err := git.LogCommits(revisionRange, repoPath, testDirectory)
if err != nil {
return err
}
outlines := make(map[string][]*ginkgo.Node)
blameLines := make(map[string][]*git.BlameLine)
for _, logCommit := range commits {
for _, fileChange := range logCommit.FileChanges {
testFilename := filepath.Join(repoPath, fileChange.Filename)
_, ok := outlines[testFilename]
if ok {
continue
}
outline, err := ginkgo.OutlineFromFile(testFilename)
if err != nil {
return err
}
outlines[testFilename] = outline
blameLinesForFile, err := git.GetBlameLinesForFile(testFilename)
if err != nil {
return err
}
blameLines[testFilename] = blameLinesForFile
}
}
if debug {
commitsTemp, err := os.CreateTemp("", "commits-*.json")
if err != nil {
return err
}
defer commitsTemp.Close()
json.NewEncoder(commitsTemp).Encode(&commits)
log.Debugf("commits written to %q", commitsTemp.Name())
outlinesTemp, err := os.CreateTemp("", "outlines-*.json")
if err != nil {
return err
}
defer outlinesTemp.Close()
json.NewEncoder(outlinesTemp).Encode(&outlines)
log.Debugf("outlines written to %q", outlinesTemp.Name())
blameLinesTemp, err := os.CreateTemp("", "blame-lines-*.json")
if err != nil {
return err
}
json.NewEncoder(blameLinesTemp).Encode(&blameLines)
log.Debugf("blameLines written to %q", blameLinesTemp.Name())
}
return nil
}

func extractChangedTestNames(commits []*git.LogCommit,
outlines map[string][]*ginkgo.Node, blameLines map[string][]*git.BlameLine) []string {
return nil
}

func blameLinesForCommits(commits []*git.LogCommit, blameLines map[string][]*git.BlameLine) (filenamesToBlamelines map[string][]*git.BlameLine) {
filenamesToBlamelines = make(map[string][]*git.BlameLine)
commitIDs := make(map[string]struct{})
for _, commit := range commits {
commitIDs[commit.Hash[:11]] = struct{}{}
}

for filename, blameLinesForFile := range blameLines {
for _, line := range blameLinesForFile {
if _, ok := commitIDs[line.CommitID]; !ok {
continue
}
filenamesToBlamelines[filename] = append(filenamesToBlamelines[filename], line)
}
}

return
}

func outlinesForBlameLines(blamelines map[string][]*git.BlameLine, outlines map[string][]*ginkgo.Node) (result []*ginkgo.Node) {
for blameFilename, _ := range blamelines {
if _, ok := outlines[blameFilename]; !ok {
continue
}

// match the outline to the blameLine
// problem: blameLine has a lineNo, where outline has characterNo start and end
// therefore make a list of all lines with character start

// as result return the filtered outline, meaning an outline with all containers
// affected by the changes, this way the caller can construct the full test names
// directly

}
return
}
Loading