From 7ea4e2e5ecd0486ac9f4a6ffbf9bd9691e90dce2 Mon Sep 17 00:00:00 2001 From: Ben Selwyn-Smith Date: Mon, 8 Jan 2024 15:32:44 +1000 Subject: [PATCH] chore: disregard branch in commit finder (#586) Signed-off-by: Ben Selwyn-Smith --- scripts/dev_scripts/integration_tests.sh | 2 +- src/macaron/repo_finder/commit_finder.py | 72 ++++++------------ src/macaron/slsa_analyzer/analyzer.py | 7 +- tests/e2e/expected_results/maven/guava.json | 23 +++--- .../purl/com_google_guava/guava/guava.json | 33 +++++---- tests/repo_finder/test_commit_finder.py | 73 ++++--------------- 6 files changed, 70 insertions(+), 140 deletions(-) diff --git a/scripts/dev_scripts/integration_tests.sh b/scripts/dev_scripts/integration_tests.sh index 36928532c..8b8d6d355 100755 --- a/scripts/dev_scripts/integration_tests.sh +++ b/scripts/dev_scripts/integration_tests.sh @@ -381,7 +381,7 @@ echo "google/guava: Analyzing with PURL and repository path without dependency r echo -e "----------------------------------------------------------------------------------\n" JSON_EXPECTED=$WORKSPACE/tests/e2e/expected_results/purl/com_google_guava/guava/guava.json JSON_RESULT=$WORKSPACE/output/reports/maven/com_google_guava/guava/guava.json -$RUN_MACARON analyze -purl pkg:maven/com.google.guava/guava@32.1.2-jre?type=jar -rp https://github.com/google/guava -b master -d d8633ac8539dae52c8361f79c7a0dbd9ad6dd2c4 --skip-deps || log_fail +$RUN_MACARON analyze -purl pkg:maven/com.google.guava/guava@32.1.2-jre?type=jar --skip-deps || log_fail check_or_update_expected_output $COMPARE_JSON_OUT $JSON_RESULT $JSON_EXPECTED || log_fail diff --git a/src/macaron/repo_finder/commit_finder.py b/src/macaron/repo_finder/commit_finder.py index 202891df3..e96bdbc42 100644 --- a/src/macaron/repo_finder/commit_finder.py +++ b/src/macaron/repo_finder/commit_finder.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023 - 2023, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the logic for matching PackageURL versions to repository commits via the tags they contain.""" @@ -114,10 +114,10 @@ class AbstractPurlType(Enum): UNSUPPORTED = (2,) -def find_commit(git_obj: Git, purl: PackageURL) -> tuple[str, str]: +def find_commit(git_obj: Git, purl: PackageURL) -> str | None: """Try to find the commit matching the passed PURL. - The PURL may have be a repository type, e.g. GitHub, in which case the commit might be in its version part. + The PURL may be a repository type, e.g. GitHub, in which case the commit might be in its version part. Otherwise, the PURL should be a package manager type, e.g. Maven, in which case the commit must be found from the artifact version. @@ -130,13 +130,13 @@ def find_commit(git_obj: Git, purl: PackageURL) -> tuple[str, str]: Returns ------- - tuple[str, str] - The branch name and digest as a tuple. + str | None + The digest, or None if the commit cannot be correctly retrieved. """ version = purl.version if not version: logger.debug("Missing version for analysis target: %s", purl.name) - return "", "" + return None repo_type = determine_abstract_purl_type(purl) if repo_type == AbstractPurlType.REPOSITORY: @@ -144,7 +144,7 @@ def find_commit(git_obj: Git, purl: PackageURL) -> tuple[str, str]: if repo_type == AbstractPurlType.ARTIFACT: return find_commit_from_version_and_name(git_obj, purl.name, version) logger.debug("Type of PURL is not supported for commit finding: %s", purl.type) - return "", "" + return None def determine_abstract_purl_type(purl: PackageURL) -> AbstractPurlType: @@ -157,7 +157,7 @@ def determine_abstract_purl_type(purl: PackageURL) -> AbstractPurlType: Returns ------- - PurlType: + PurlType The identified type of the PURL. """ available_domains = [git_service.hostname for git_service in GIT_SERVICES if git_service.hostname] @@ -174,7 +174,7 @@ def determine_abstract_purl_type(purl: PackageURL) -> AbstractPurlType: return AbstractPurlType.UNSUPPORTED -def extract_commit_from_version(git_obj: Git, version: str) -> tuple[str, str]: +def extract_commit_from_version(git_obj: Git, version: str) -> str | None: """Try to extract the commit from the PURL's version parameter. E.g. @@ -190,8 +190,8 @@ def extract_commit_from_version(git_obj: Git, version: str) -> tuple[str, str]: Returns ------- - tuple[str, str] - The branch name and digest as a tuple. + str | None + The digest, or None if the commit cannot be correctly retrieved. """ # A commit hash is 40 characters in length, but commits are often referenced using only some of those. commit: Commit | None = None @@ -211,17 +211,12 @@ def extract_commit_from_version(git_obj: Git, version: str) -> tuple[str, str]: logger.debug("Failed to retrieve commit: %s", error) if not commit: - return "", "" - - branch_name = _get_branch_of_commit(commit) - if not branch_name: - logger.debug("No valid branch found for commit: %s", commit.hash) - return "", "" + return None - return branch_name, commit.hash + return commit.hash if commit else None -def find_commit_from_version_and_name(git_obj: Git, name: str, version: str) -> tuple[str, str]: +def find_commit_from_version_and_name(git_obj: Git, name: str, version: str) -> str | None: """Try to find the matching commit in a repository of a given version (and name) via tags. The passed version is used to match with the tags in the target repository. The passed name is used in cases where @@ -238,8 +233,8 @@ def find_commit_from_version_and_name(git_obj: Git, name: str, version: str) -> Returns ------- - tuple[str, str] - The branch name and digest as a tuple, or empty strings if the commit cannot be correctly retrieved. + str | None + The digest, or None if the commit cannot be correctly retrieved. """ logger.debug("Searching for commit of artifact version using tags: %s@%s", name, version) @@ -256,14 +251,14 @@ def find_commit_from_version_and_name(git_obj: Git, name: str, version: str) -> if not valid_tags: logger.debug("No tags with commits found for %s", name) - return "", "" + return None # Match tags. matched_tags = match_tags(list(valid_tags.keys()), name, version) if not matched_tags: logger.debug("No tags matched for %s", name) - return "", "" + return None if len(matched_tags) > 1: logger.debug("Tags found for %s: %s", name, len(matched_tags)) @@ -276,26 +271,20 @@ def find_commit_from_version_and_name(git_obj: Git, name: str, version: str) -> # Tag names are taken from valid_tags and should always exist within it. logger.debug("Missing tag name from tag dict: %s not in %s", tag_name, valid_tags.keys()) - branch_name = _get_branch_of_commit(git_obj.get_commit_from_tag(tag_name)) try: hexsha = tag.commit.hexsha except ValueError: logger.debug("Error trying to retrieve digest of commit: %s", tag.commit) - return "", "" - - if not branch_name: - logger.debug("No valid branch associated with tag (commit): %s (%s)", tag_name, hexsha) - return "", "" + return None logger.debug( - "Found tag %s with commit %s of branch %s for artifact version %s@%s", + "Found tag %s with commit %s for artifact version %s@%s", tag, hexsha, - branch_name, name, version, ) - return branch_name, hexsha + return hexsha if hexsha else None def _build_version_pattern(name: str, version: str) -> tuple[Pattern | None, list[str]]: @@ -520,25 +509,6 @@ def _compute_tag_version_similarity(tag_version: str, tag_suffix: str, version_p return count -def _get_branch_of_commit(commit: Commit) -> str: - """Get the branch of the passed commit as a string or return None.""" - branches = commit.branches - - if len(branches) == 1 and "" in branches: - # An 'empty' result for branches is a set containing a zero length string. - logger.debug("No branch associated with commit: %s", commit.hash) - return "" - - branch_name = "" - for branch in branches: - # Ensure the detached head branch is not picked up. - if "(HEAD detached at" not in branch: - branch_name = branch - break - - return branch_name - - def _get_tag_commit(tag: TagReference) -> Commit | None: """Return the commit of the passed tag. diff --git a/src/macaron/slsa_analyzer/analyzer.py b/src/macaron/slsa_analyzer/analyzer.py index af6d1c05f..9e93b1b70 100644 --- a/src/macaron/slsa_analyzer/analyzer.py +++ b/src/macaron/slsa_analyzer/analyzer.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2023, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2024, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module handles the cloning and analyzing a Git repo.""" @@ -696,12 +696,13 @@ def _prepare_repo( # Find the digest and branch if a version has been specified if not digest and purl and purl.version: - branch_name, digest = find_commit(git_obj, purl) - if not (branch_name and digest): + found_digest = find_commit(git_obj, purl) + if not found_digest: logger.error( "Could not map the input purl string to a specific commit in the corresponding repository." ) return None + digest = found_digest # Checking out the specific branch or commit. This operation varies depends on the git service that the # repository uses. diff --git a/tests/e2e/expected_results/maven/guava.json b/tests/e2e/expected_results/maven/guava.json index a608b9803..03c7463c9 100644 --- a/tests/e2e/expected_results/maven/guava.json +++ b/tests/e2e/expected_results/maven/guava.json @@ -1,6 +1,7 @@ { "metadata": { - "timestamps": "2023-09-12 17:28:04" + "timestamps": "2024-01-03 14:31:23", + "has_passing_check": true }, "target": { "info": { @@ -143,7 +144,7 @@ "Provenance content - Identifies builder - SLSA Level 1" ], "justification": [ - "Could not find any SLSA provenances." + "Could not find any SLSA or Witness provenances." ], "result_type": "FAILED" }, @@ -208,11 +209,11 @@ "unique_dep_repos": 0, "checks_summary": [ { - "check_id": "mcn_provenance_expectation_1", + "check_id": "mcn_build_script_1", "num_deps_pass": 0 }, { - "check_id": "mcn_provenance_witness_level_one_1", + "check_id": "mcn_trusted_builder_level_three_1", "num_deps_pass": 0 }, { @@ -220,31 +221,31 @@ "num_deps_pass": 0 }, { - "check_id": "mcn_infer_artifact_pipeline_1", + "check_id": "mcn_build_as_code_1", "num_deps_pass": 0 }, { - "check_id": "mcn_build_as_code_1", + "check_id": "mcn_build_service_1", "num_deps_pass": 0 }, { - "check_id": "mcn_version_control_system_1", + "check_id": "mcn_infer_artifact_pipeline_1", "num_deps_pass": 0 }, { - "check_id": "mcn_trusted_builder_level_three_1", + "check_id": "mcn_provenance_level_three_1", "num_deps_pass": 0 }, { - "check_id": "mcn_build_script_1", + "check_id": "mcn_provenance_witness_level_one_1", "num_deps_pass": 0 }, { - "check_id": "mcn_provenance_level_three_1", + "check_id": "mcn_version_control_system_1", "num_deps_pass": 0 }, { - "check_id": "mcn_build_service_1", + "check_id": "mcn_provenance_expectation_1", "num_deps_pass": 0 } ], diff --git a/tests/e2e/expected_results/purl/com_google_guava/guava/guava.json b/tests/e2e/expected_results/purl/com_google_guava/guava/guava.json index 672413f43..d8cb064de 100644 --- a/tests/e2e/expected_results/purl/com_google_guava/guava/guava.json +++ b/tests/e2e/expected_results/purl/com_google_guava/guava/guava.json @@ -1,6 +1,6 @@ { "metadata": { - "timestamps": "2023-11-03 11:54:05", + "timestamps": "2024-01-03 15:57:20", "has_passing_check": true }, "target": { @@ -8,9 +8,9 @@ "full_name": "pkg:maven/com.google.guava/guava@32.1.2-jre?type=jar", "local_cloned_path": "git_repos/github_com/google/guava", "remote_path": "https://github.com/google/guava", - "branch": "master", - "commit_hash": "d8633ac8539dae52c8361f79c7a0dbd9ad6dd2c4", - "commit_date": "2022-06-16T01:55:17-07:00" + "branch": null, + "commit_hash": "db74bd2fdac443223d45e6fc5c66548542be1081", + "commit_date": "2023-07-31T17:01:20-04:00" }, "provenances": { "is_inferred": true, @@ -22,16 +22,16 @@ "predicateType": "https://slsa.dev/provenance/v0.2", "predicate": { "builder": { - "id": "https://github.com/google/guava/blob/d8633ac8539dae52c8361f79c7a0dbd9ad6dd2c4/./util/deploy_snapshot.sh" + "id": "https://github.com/google/guava/blob/db74bd2fdac443223d45e6fc5c66548542be1081/./util/deploy_snapshot.sh" }, "buildType": "Custom github_actions", "invocation": { "configSource": { - "uri": "https://github.com/google/guava@refs/heads/master", + "uri": "https://github.com/google/guava@refs/heads/None", "digest": { - "sha1": "d8633ac8539dae52c8361f79c7a0dbd9ad6dd2c4" + "sha1": "db74bd2fdac443223d45e6fc5c66548542be1081" }, - "entryPoint": "https://github.com/google/guava/blob/d8633ac8539dae52c8361f79c7a0dbd9ad6dd2c4/.github/workflows/ci.yml" + "entryPoint": "https://github.com/google/guava/blob/db74bd2fdac443223d45e6fc5c66548542be1081/.github/workflows/ci.yml" }, "parameters": {}, "environment": {} @@ -79,9 +79,10 @@ "Build as code - SLSA Level 3" ], "justification": [ + "The target repository does not use gradle to deploy.", { - "The target repository uses build tool maven to deploy": "https://github.com/google/guava/blob/d8633ac8539dae52c8361f79c7a0dbd9ad6dd2c4/./util/deploy_snapshot.sh", - "The build is triggered by": "https://github.com/google/guava/blob/d8633ac8539dae52c8361f79c7a0dbd9ad6dd2c4/.github/workflows/ci.yml" + "The target repository uses build tool maven to deploy": "https://github.com/google/guava/blob/db74bd2fdac443223d45e6fc5c66548542be1081/./util/deploy_snapshot.sh", + "The build is triggered by": "https://github.com/google/guava/blob/db74bd2fdac443223d45e6fc5c66548542be1081/.github/workflows/ci.yml" }, "Deploy command: ['mvn', 'clean', 'source:jar', 'javadoc:jar', 'deploy', '-DskipTests=true', '\"$@\"']", "However, could not find a passing workflow run." @@ -211,15 +212,15 @@ "unique_dep_repos": 0, "checks_summary": [ { - "check_id": "mcn_version_control_system_1", + "check_id": "mcn_build_script_1", "num_deps_pass": 0 }, { - "check_id": "mcn_infer_artifact_pipeline_1", + "check_id": "mcn_trusted_builder_level_three_1", "num_deps_pass": 0 }, { - "check_id": "mcn_provenance_witness_level_one_1", + "check_id": "mcn_provenance_available_1", "num_deps_pass": 0 }, { @@ -231,7 +232,7 @@ "num_deps_pass": 0 }, { - "check_id": "mcn_build_script_1", + "check_id": "mcn_infer_artifact_pipeline_1", "num_deps_pass": 0 }, { @@ -239,11 +240,11 @@ "num_deps_pass": 0 }, { - "check_id": "mcn_trusted_builder_level_three_1", + "check_id": "mcn_provenance_witness_level_one_1", "num_deps_pass": 0 }, { - "check_id": "mcn_provenance_available_1", + "check_id": "mcn_version_control_system_1", "num_deps_pass": 0 }, { diff --git a/tests/repo_finder/test_commit_finder.py b/tests/repo_finder/test_commit_finder.py index db151de2e..cea3c92a8 100644 --- a/tests/repo_finder/test_commit_finder.py +++ b/tests/repo_finder/test_commit_finder.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023 - 2023, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module tests the commit finder.""" @@ -99,34 +99,19 @@ def test_commit_finder() -> None: commit_files(git_obj, ["file_1"]) # Create a commit with no associated branch. - git = git_obj.repo.git commit_0 = git_obj.repo.index.commit(message="Commit_0") - git.checkout("HEAD", b="missing_branch") - commit_with_no_branch = git_obj.repo.index.commit(message="Commit_1") - git.checkout("master") - git.branch("-D", "missing_branch") # No version in PURL. - branch, _ = commit_finder.find_commit(git_obj, PackageURL.from_string("pkg:maven/apache/maven")) - assert not branch + assert not commit_finder.find_commit(git_obj, PackageURL.from_string("pkg:maven/apache/maven")) # Unsupported PURL type. - branch, _ = commit_finder.find_commit(git_obj, PackageURL.from_string("pkg:gem/ruby-artifact@1")) - assert not branch + assert not commit_finder.find_commit(git_obj, PackageURL.from_string("pkg:gem/ruby-artifact@1")) # Hash not present in repository, tests hash and tag. - branch, _ = commit_finder.find_commit(git_obj, PackageURL.from_string("pkg:github/apache/maven@ab4ce3e")) - assert not branch - - # Hash present but no associated branch. - branch, _ = commit_finder.find_commit( - git_obj, PackageURL.from_string(f"pkg:github/apache/maven@{commit_with_no_branch.hexsha}") - ) - assert not branch + assert not commit_finder.find_commit(git_obj, PackageURL.from_string("pkg:github/apache/maven@ab4ce3e")) # Valid PURL but repository has no tags yet. - branch, _ = commit_finder.find_commit(git_obj, PackageURL.from_string("pkg:maven/apache/maven@1.0")) - assert not branch + assert not commit_finder.find_commit(git_obj, PackageURL.from_string("pkg:maven/apache/maven@1.0")) # Additional setup is done here to avoid tainting earlier tests. @@ -150,72 +135,44 @@ def test_commit_finder() -> None: git_obj.repo.create_tag(f"{tag_version_2}_DEV_RC1_RELEASE", ref=empty_commit.hexsha) git_obj.repo.create_tag(f"rel/prefix_name-{tag_version}", ref=empty_commit.hexsha) - # Create a tag on the commit that has no branch. - tag_no_branch = "0.1.2" - git_obj.repo.create_tag(tag_no_branch, ref=commit_with_no_branch.hexsha) - - # Tag with no branch. - branch, _ = commit_finder.find_commit(git_obj, PackageURL.from_string(f"pkg:maven/apache/maven@{tag_no_branch}")) - assert not branch - # Version that fails to create a pattern. - branch, _ = commit_finder.find_commit(git_obj, PackageURL.from_string(f"pkg:maven/apache/maven@{bad_version}")) - assert not branch + assert not commit_finder.find_commit(git_obj, PackageURL.from_string(f"pkg:maven/apache/maven@{bad_version}")) # Version with a suffix and no matching tag. - branch, _ = commit_finder.find_commit(git_obj, PackageURL.from_string("pkg:maven/apache/maven@1-JRE")) - assert not branch + assert not commit_finder.find_commit(git_obj, PackageURL.from_string("pkg:maven/apache/maven@1-JRE")) # Version with only one digit and no matching tag. - branch, _ = commit_finder.find_commit(git_obj, PackageURL.from_string("pkg:maven/apache/maven@1")) - assert not branch + assert not commit_finder.find_commit(git_obj, PackageURL.from_string("pkg:maven/apache/maven@1")) # Valid repository PURL. - branch, digest = commit_finder.find_commit( - git_obj, PackageURL.from_string(f"pkg:github/apache/maven@{commit_0.hexsha}") - ) - assert branch == "master" + digest = commit_finder.find_commit(git_obj, PackageURL.from_string(f"pkg:github/apache/maven@{commit_0.hexsha}")) assert digest == commit_0.hexsha # Valid artifact PURL. - branch, digest = commit_finder.find_commit(git_obj, PackageURL.from_string(f"pkg:maven/apache/maven@{tag_version}")) - assert branch == "master" + digest = commit_finder.find_commit(git_obj, PackageURL.from_string(f"pkg:maven/apache/maven@{tag_version}")) assert digest == commit_0.hexsha # Valid artifact PURL with an alphanumeric suffix. - branch, digest = commit_finder.find_commit( - git_obj, PackageURL.from_string(f"pkg:maven/apache/maven@{tag_version}-RC1") - ) - assert branch == "master" + digest = commit_finder.find_commit(git_obj, PackageURL.from_string(f"pkg:maven/apache/maven@{tag_version}-RC1")) assert digest == commit_0.hexsha # Valid artifact PURL that should match a tag with a name prefix. - branch, digest = commit_finder.find_commit( - git_obj, PackageURL.from_string(f"pkg:maven/apache/prefix_name@{tag_version}") - ) - assert branch == "master" + digest = commit_finder.find_commit(git_obj, PackageURL.from_string(f"pkg:maven/apache/prefix_name@{tag_version}")) assert digest == empty_commit.hexsha # Valid artifact PURL that matches a version with a suffix, to a tag with the same suffix. - branch, digest = commit_finder.find_commit( - git_obj, PackageURL.from_string(f"pkg:maven/apache/maven@{tag_version_2}-DEV") - ) - assert branch == "master" + digest = commit_finder.find_commit(git_obj, PackageURL.from_string(f"pkg:maven/apache/maven@{tag_version_2}-DEV")) assert digest == empty_commit.hexsha # Valid artifact PURL that matches a version with a suffix, to a tag with the same suffix part in a multi-suffix. - branch, digest = commit_finder.find_commit( + digest = commit_finder.find_commit( git_obj, PackageURL.from_string(f"pkg:maven/apache/maven@{tag_version_2}_RELEASE") ) - assert branch == "master" assert digest == empty_commit.hexsha # Valid artifact PURL that matches a version with an alphanumeric suffix, to a tag with the same suffix part in a # multi-suffix. - branch, digest = commit_finder.find_commit( - git_obj, PackageURL.from_string(f"pkg:maven/apache/maven@{tag_version_2}_RC1") - ) - assert branch == "master" + digest = commit_finder.find_commit(git_obj, PackageURL.from_string(f"pkg:maven/apache/maven@{tag_version_2}_RC1")) assert digest == empty_commit.hexsha