Skip to content

Commit

Permalink
chore: disregard branch in commit finder (#586)
Browse files Browse the repository at this point in the history
Signed-off-by: Ben Selwyn-Smith <benselwynsmith@googlemail.com>
  • Loading branch information
benmss authored Jan 8, 2024
1 parent 6b14295 commit 7ea4e2e
Show file tree
Hide file tree
Showing 6 changed files with 70 additions and 140 deletions.
2 changes: 1 addition & 1 deletion scripts/dev_scripts/integration_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,7 @@ echo "google/guava: Analyzing with PURL and repository path without dependency r
echo -e "----------------------------------------------------------------------------------\n"
JSON_EXPECTED=$WORKSPACE/tests/e2e/expected_results/purl/com_google_guava/guava/guava.json
JSON_RESULT=$WORKSPACE/output/reports/maven/com_google_guava/guava/guava.json
$RUN_MACARON analyze -purl pkg:maven/com.google.guava/guava@32.1.2-jre?type=jar -rp https://github.com/google/guava -b master -d d8633ac8539dae52c8361f79c7a0dbd9ad6dd2c4 --skip-deps || log_fail
$RUN_MACARON analyze -purl pkg:maven/com.google.guava/guava@32.1.2-jre?type=jar --skip-deps || log_fail

check_or_update_expected_output $COMPARE_JSON_OUT $JSON_RESULT $JSON_EXPECTED || log_fail

Expand Down
72 changes: 21 additions & 51 deletions src/macaron/repo_finder/commit_finder.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023 - 2023, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""This module contains the logic for matching PackageURL versions to repository commits via the tags they contain."""
Expand Down Expand Up @@ -114,10 +114,10 @@ class AbstractPurlType(Enum):
UNSUPPORTED = (2,)


def find_commit(git_obj: Git, purl: PackageURL) -> tuple[str, str]:
def find_commit(git_obj: Git, purl: PackageURL) -> str | None:
"""Try to find the commit matching the passed PURL.
The PURL may have be a repository type, e.g. GitHub, in which case the commit might be in its version part.
The PURL may be a repository type, e.g. GitHub, in which case the commit might be in its version part.
Otherwise, the PURL should be a package manager type, e.g. Maven, in which case the commit must be found from
the artifact version.
Expand All @@ -130,21 +130,21 @@ def find_commit(git_obj: Git, purl: PackageURL) -> tuple[str, str]:
Returns
-------
tuple[str, str]
The branch name and digest as a tuple.
str | None
The digest, or None if the commit cannot be correctly retrieved.
"""
version = purl.version
if not version:
logger.debug("Missing version for analysis target: %s", purl.name)
return "", ""
return None

repo_type = determine_abstract_purl_type(purl)
if repo_type == AbstractPurlType.REPOSITORY:
return extract_commit_from_version(git_obj, version)
if repo_type == AbstractPurlType.ARTIFACT:
return find_commit_from_version_and_name(git_obj, purl.name, version)
logger.debug("Type of PURL is not supported for commit finding: %s", purl.type)
return "", ""
return None


def determine_abstract_purl_type(purl: PackageURL) -> AbstractPurlType:
Expand All @@ -157,7 +157,7 @@ def determine_abstract_purl_type(purl: PackageURL) -> AbstractPurlType:
Returns
-------
PurlType:
PurlType
The identified type of the PURL.
"""
available_domains = [git_service.hostname for git_service in GIT_SERVICES if git_service.hostname]
Expand All @@ -174,7 +174,7 @@ def determine_abstract_purl_type(purl: PackageURL) -> AbstractPurlType:
return AbstractPurlType.UNSUPPORTED


def extract_commit_from_version(git_obj: Git, version: str) -> tuple[str, str]:
def extract_commit_from_version(git_obj: Git, version: str) -> str | None:
"""Try to extract the commit from the PURL's version parameter.
E.g.
Expand All @@ -190,8 +190,8 @@ def extract_commit_from_version(git_obj: Git, version: str) -> tuple[str, str]:
Returns
-------
tuple[str, str]
The branch name and digest as a tuple.
str | None
The digest, or None if the commit cannot be correctly retrieved.
"""
# A commit hash is 40 characters in length, but commits are often referenced using only some of those.
commit: Commit | None = None
Expand All @@ -211,17 +211,12 @@ def extract_commit_from_version(git_obj: Git, version: str) -> tuple[str, str]:
logger.debug("Failed to retrieve commit: %s", error)

if not commit:
return "", ""

branch_name = _get_branch_of_commit(commit)
if not branch_name:
logger.debug("No valid branch found for commit: %s", commit.hash)
return "", ""
return None

return branch_name, commit.hash
return commit.hash if commit else None


def find_commit_from_version_and_name(git_obj: Git, name: str, version: str) -> tuple[str, str]:
def find_commit_from_version_and_name(git_obj: Git, name: str, version: str) -> str | None:
"""Try to find the matching commit in a repository of a given version (and name) via tags.
The passed version is used to match with the tags in the target repository. The passed name is used in cases where
Expand All @@ -238,8 +233,8 @@ def find_commit_from_version_and_name(git_obj: Git, name: str, version: str) ->
Returns
-------
tuple[str, str]
The branch name and digest as a tuple, or empty strings if the commit cannot be correctly retrieved.
str | None
The digest, or None if the commit cannot be correctly retrieved.
"""
logger.debug("Searching for commit of artifact version using tags: %s@%s", name, version)

Expand All @@ -256,14 +251,14 @@ def find_commit_from_version_and_name(git_obj: Git, name: str, version: str) ->

if not valid_tags:
logger.debug("No tags with commits found for %s", name)
return "", ""
return None

# Match tags.
matched_tags = match_tags(list(valid_tags.keys()), name, version)

if not matched_tags:
logger.debug("No tags matched for %s", name)
return "", ""
return None

if len(matched_tags) > 1:
logger.debug("Tags found for %s: %s", name, len(matched_tags))
Expand All @@ -276,26 +271,20 @@ def find_commit_from_version_and_name(git_obj: Git, name: str, version: str) ->
# Tag names are taken from valid_tags and should always exist within it.
logger.debug("Missing tag name from tag dict: %s not in %s", tag_name, valid_tags.keys())

branch_name = _get_branch_of_commit(git_obj.get_commit_from_tag(tag_name))
try:
hexsha = tag.commit.hexsha
except ValueError:
logger.debug("Error trying to retrieve digest of commit: %s", tag.commit)
return "", ""

if not branch_name:
logger.debug("No valid branch associated with tag (commit): %s (%s)", tag_name, hexsha)
return "", ""
return None

logger.debug(
"Found tag %s with commit %s of branch %s for artifact version %s@%s",
"Found tag %s with commit %s for artifact version %s@%s",
tag,
hexsha,
branch_name,
name,
version,
)
return branch_name, hexsha
return hexsha if hexsha else None


def _build_version_pattern(name: str, version: str) -> tuple[Pattern | None, list[str]]:
Expand Down Expand Up @@ -520,25 +509,6 @@ def _compute_tag_version_similarity(tag_version: str, tag_suffix: str, version_p
return count


def _get_branch_of_commit(commit: Commit) -> str:
"""Get the branch of the passed commit as a string or return None."""
branches = commit.branches

if len(branches) == 1 and "" in branches:
# An 'empty' result for branches is a set containing a zero length string.
logger.debug("No branch associated with commit: %s", commit.hash)
return ""

branch_name = ""
for branch in branches:
# Ensure the detached head branch is not picked up.
if "(HEAD detached at" not in branch:
branch_name = branch
break

return branch_name


def _get_tag_commit(tag: TagReference) -> Commit | None:
"""Return the commit of the passed tag.
Expand Down
7 changes: 4 additions & 3 deletions src/macaron/slsa_analyzer/analyzer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022 - 2023, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2022 - 2024, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""This module handles the cloning and analyzing a Git repo."""
Expand Down Expand Up @@ -696,12 +696,13 @@ def _prepare_repo(

# Find the digest and branch if a version has been specified
if not digest and purl and purl.version:
branch_name, digest = find_commit(git_obj, purl)
if not (branch_name and digest):
found_digest = find_commit(git_obj, purl)
if not found_digest:
logger.error(
"Could not map the input purl string to a specific commit in the corresponding repository."
)
return None
digest = found_digest

# Checking out the specific branch or commit. This operation varies depends on the git service that the
# repository uses.
Expand Down
23 changes: 12 additions & 11 deletions tests/e2e/expected_results/maven/guava.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{
"metadata": {
"timestamps": "2023-09-12 17:28:04"
"timestamps": "2024-01-03 14:31:23",
"has_passing_check": true
},
"target": {
"info": {
Expand Down Expand Up @@ -143,7 +144,7 @@
"Provenance content - Identifies builder - SLSA Level 1"
],
"justification": [
"Could not find any SLSA provenances."
"Could not find any SLSA or Witness provenances."
],
"result_type": "FAILED"
},
Expand Down Expand Up @@ -208,43 +209,43 @@
"unique_dep_repos": 0,
"checks_summary": [
{
"check_id": "mcn_provenance_expectation_1",
"check_id": "mcn_build_script_1",
"num_deps_pass": 0
},
{
"check_id": "mcn_provenance_witness_level_one_1",
"check_id": "mcn_trusted_builder_level_three_1",
"num_deps_pass": 0
},
{
"check_id": "mcn_provenance_available_1",
"num_deps_pass": 0
},
{
"check_id": "mcn_infer_artifact_pipeline_1",
"check_id": "mcn_build_as_code_1",
"num_deps_pass": 0
},
{
"check_id": "mcn_build_as_code_1",
"check_id": "mcn_build_service_1",
"num_deps_pass": 0
},
{
"check_id": "mcn_version_control_system_1",
"check_id": "mcn_infer_artifact_pipeline_1",
"num_deps_pass": 0
},
{
"check_id": "mcn_trusted_builder_level_three_1",
"check_id": "mcn_provenance_level_three_1",
"num_deps_pass": 0
},
{
"check_id": "mcn_build_script_1",
"check_id": "mcn_provenance_witness_level_one_1",
"num_deps_pass": 0
},
{
"check_id": "mcn_provenance_level_three_1",
"check_id": "mcn_version_control_system_1",
"num_deps_pass": 0
},
{
"check_id": "mcn_build_service_1",
"check_id": "mcn_provenance_expectation_1",
"num_deps_pass": 0
}
],
Expand Down
33 changes: 17 additions & 16 deletions tests/e2e/expected_results/purl/com_google_guava/guava/guava.json
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
{
"metadata": {
"timestamps": "2023-11-03 11:54:05",
"timestamps": "2024-01-03 15:57:20",
"has_passing_check": true
},
"target": {
"info": {
"full_name": "pkg:maven/com.google.guava/guava@32.1.2-jre?type=jar",
"local_cloned_path": "git_repos/github_com/google/guava",
"remote_path": "https://github.com/google/guava",
"branch": "master",
"commit_hash": "d8633ac8539dae52c8361f79c7a0dbd9ad6dd2c4",
"commit_date": "2022-06-16T01:55:17-07:00"
"branch": null,
"commit_hash": "db74bd2fdac443223d45e6fc5c66548542be1081",
"commit_date": "2023-07-31T17:01:20-04:00"
},
"provenances": {
"is_inferred": true,
Expand All @@ -22,16 +22,16 @@
"predicateType": "https://slsa.dev/provenance/v0.2",
"predicate": {
"builder": {
"id": "https://github.com/google/guava/blob/d8633ac8539dae52c8361f79c7a0dbd9ad6dd2c4/./util/deploy_snapshot.sh"
"id": "https://github.com/google/guava/blob/db74bd2fdac443223d45e6fc5c66548542be1081/./util/deploy_snapshot.sh"
},
"buildType": "Custom github_actions",
"invocation": {
"configSource": {
"uri": "https://github.com/google/guava@refs/heads/master",
"uri": "https://github.com/google/guava@refs/heads/None",
"digest": {
"sha1": "d8633ac8539dae52c8361f79c7a0dbd9ad6dd2c4"
"sha1": "db74bd2fdac443223d45e6fc5c66548542be1081"
},
"entryPoint": "https://github.com/google/guava/blob/d8633ac8539dae52c8361f79c7a0dbd9ad6dd2c4/.github/workflows/ci.yml"
"entryPoint": "https://github.com/google/guava/blob/db74bd2fdac443223d45e6fc5c66548542be1081/.github/workflows/ci.yml"
},
"parameters": {},
"environment": {}
Expand Down Expand Up @@ -79,9 +79,10 @@
"Build as code - SLSA Level 3"
],
"justification": [
"The target repository does not use gradle to deploy.",
{
"The target repository uses build tool maven to deploy": "https://github.com/google/guava/blob/d8633ac8539dae52c8361f79c7a0dbd9ad6dd2c4/./util/deploy_snapshot.sh",
"The build is triggered by": "https://github.com/google/guava/blob/d8633ac8539dae52c8361f79c7a0dbd9ad6dd2c4/.github/workflows/ci.yml"
"The target repository uses build tool maven to deploy": "https://github.com/google/guava/blob/db74bd2fdac443223d45e6fc5c66548542be1081/./util/deploy_snapshot.sh",
"The build is triggered by": "https://github.com/google/guava/blob/db74bd2fdac443223d45e6fc5c66548542be1081/.github/workflows/ci.yml"
},
"Deploy command: ['mvn', 'clean', 'source:jar', 'javadoc:jar', 'deploy', '-DskipTests=true', '\"$@\"']",
"However, could not find a passing workflow run."
Expand Down Expand Up @@ -211,15 +212,15 @@
"unique_dep_repos": 0,
"checks_summary": [
{
"check_id": "mcn_version_control_system_1",
"check_id": "mcn_build_script_1",
"num_deps_pass": 0
},
{
"check_id": "mcn_infer_artifact_pipeline_1",
"check_id": "mcn_trusted_builder_level_three_1",
"num_deps_pass": 0
},
{
"check_id": "mcn_provenance_witness_level_one_1",
"check_id": "mcn_provenance_available_1",
"num_deps_pass": 0
},
{
Expand All @@ -231,19 +232,19 @@
"num_deps_pass": 0
},
{
"check_id": "mcn_build_script_1",
"check_id": "mcn_infer_artifact_pipeline_1",
"num_deps_pass": 0
},
{
"check_id": "mcn_provenance_level_three_1",
"num_deps_pass": 0
},
{
"check_id": "mcn_trusted_builder_level_three_1",
"check_id": "mcn_provenance_witness_level_one_1",
"num_deps_pass": 0
},
{
"check_id": "mcn_provenance_available_1",
"check_id": "mcn_version_control_system_1",
"num_deps_pass": 0
},
{
Expand Down
Loading

0 comments on commit 7ea4e2e

Please sign in to comment.