Skip to content

Commit

Permalink
fix: report known malware even when not labeled
Browse files Browse the repository at this point in the history
Signed-off-by: behnazh-w <behnaz.hassanshahi@oracle.com>
  • Loading branch information
behnazh-w committed Jan 5, 2025
1 parent 1ea1bd5 commit 0089979
Show file tree
Hide file tree
Showing 14 changed files with 312 additions and 72 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,14 @@ macaron.slsa\_analyzer.package\_registry package
Submodules
----------

macaron.slsa\_analyzer.package\_registry.deps\_dev module
---------------------------------------------------------

.. automodule:: macaron.slsa_analyzer.package_registry.deps_dev
:members:
:undoc-members:
:show-inheritance:

macaron.slsa\_analyzer.package\_registry.jfrog\_maven\_registry module
----------------------------------------------------------------------

Expand Down
7 changes: 6 additions & 1 deletion src/macaron/config/defaults.ini
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022 - 2024, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

[requests]
Expand Down Expand Up @@ -538,6 +538,11 @@ registry_url_scheme = https
fileserver_url_netloc = files.pythonhosted.org
fileserver_url_scheme = https

[deps_dev]
url_netloc = api.deps.dev
url_scheme = https
v3alpha_purl_endpoint = v3alpha/purl

# Configuration options for selecting the checks to run.
# Both the exclude and include are defined as list of strings:
# - The exclude list is used to specify the checks that will not run.
Expand Down
52 changes: 31 additions & 21 deletions src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""This check examines the metadata of pypi packages with seven heuristics."""
Expand All @@ -11,7 +11,7 @@

from macaron.database.db_custom_types import DBJsonDict
from macaron.database.table_definitions import CheckFacts
from macaron.errors import HeuristicAnalyzerValueError
from macaron.errors import HeuristicAnalyzerValueError, InvalidHTTPResponseError
from macaron.json_tools import JsonType, json_extract
from macaron.malware_analyzer.pypi_heuristics.base_analyzer import BaseHeuristicAnalyzer
from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult, Heuristics
Expand All @@ -28,6 +28,7 @@
from macaron.slsa_analyzer.build_tool.poetry import Poetry
from macaron.slsa_analyzer.checks.base_check import BaseCheck
from macaron.slsa_analyzer.checks.check_result import CheckResultData, CheckResultType, Confidence, JustificationType
from macaron.slsa_analyzer.package_registry.deps_dev import DepsDevService
from macaron.slsa_analyzer.package_registry.pypi_registry import PyPIPackageJsonAsset, PyPIRegistry
from macaron.slsa_analyzer.registry import registry
from macaron.slsa_analyzer.specs.package_registry_spec import PackageRegistryInfo
Expand Down Expand Up @@ -177,7 +178,7 @@ def __init__(self) -> None:
"""Initialize a check instance."""
check_id = "mcn_detect_malicious_metadata_1"
description = """This check analyzes the metadata of a package based on reports malicious behavior.
Supported ecosystem: PyPI.
Supported ecosystem for unknown malware: PyPI.
"""
super().__init__(check_id=check_id, description=description, eval_reqs=[])

Expand Down Expand Up @@ -259,21 +260,29 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
The result of the check.
"""
result_tables: list[CheckFacts] = []
# First check if this package is a known malware
package_registry_info_entries = ctx.dynamic_data["package_registries"]

# First check if this package is a known malware
url = "https://api.osv.dev/v1/query"
data = {"package": {"purl": ctx.component.purl}}
response = send_post_http_raw(url, json_data=data, headers=None)
res_obj = None
if response:
try:
res_obj = response.json()
except requests.exceptions.JSONDecodeError as error:
logger.debug("Unable to get a valid response from %s: %s", url, error)
if res_obj:
for vuln in res_obj.get("vulns", {}):
v_id = json_extract(vuln, ["id"], str)
if v_id and v_id.startswith("MAL-"):

try:
package_exists = bool(DepsDevService.get_package_info(ctx.component.purl))
except InvalidHTTPResponseError as error:
logger.debug(error)

# Known malicious packages must have been removed.
if not package_exists:
response = send_post_http_raw(url, json_data=data, headers=None)
res_obj = None
if response:
try:
res_obj = response.json()
except requests.exceptions.JSONDecodeError as error:
logger.debug("Unable to get a valid response from %s: %s", url, error)
if res_obj:
for vuln in res_obj.get("vulns", {}):
v_id = json_extract(vuln, ["id"], str)
result_tables.append(
MaliciousMetadataFacts(
known_malware=f"https://osv.dev/vulnerability/{v_id}",
Expand All @@ -282,15 +291,16 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
confidence=Confidence.HIGH,
)
)
if result_tables:
return CheckResultData(
result_tables=result_tables,
result_type=CheckResultType.FAILED,
)
if result_tables:
return CheckResultData(
result_tables=result_tables,
result_type=CheckResultType.FAILED,
)

package_registry_info_entries = ctx.dynamic_data["package_registries"]
# If the package is not a known malware, run malware analysis heuristics.
for package_registry_info_entry in package_registry_info_entries:
match package_registry_info_entry:
# Currently, only PyPI packages are supported.
case PackageRegistryInfo(
build_tool=Pip() | Poetry(),
package_registry=PyPIRegistry() as pypi_registry,
Expand Down
82 changes: 82 additions & 0 deletions src/macaron/slsa_analyzer/package_registry/deps_dev.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""This module contains implementation of deps.dev service."""

import json
import logging
import urllib.parse
from json.decoder import JSONDecodeError
from urllib.parse import quote as encode

from macaron.config.defaults import defaults
from macaron.errors import ConfigurationError, InvalidHTTPResponseError
from macaron.util import send_get_http_raw

logger: logging.Logger = logging.getLogger(__name__)


class DepsDevService:
"""The deps.dev service class."""

@staticmethod
def get_package_info(purl: str) -> dict | None:
"""Check if the package identified by the PackageURL (PURL) exists and return its information.
Parameters
----------
purl: str
The PackageURL (PURL).
Returns
-------
dict | None
The package metadata or None if it doesn't exist.
Raises
------
InvalidHTTPResponseError
If a network error happens or unexpected response is returned by the API.
"""
section_name = "deps_dev"
if not defaults.has_section(section_name):
return None
section = defaults[section_name]

url_netloc = section.get("url_netloc")
if not url_netloc:
raise ConfigurationError(
f'The "url_netloc" key is missing in section [{section_name}] of the .ini configuration file.'
)
url_scheme = section.get("url_scheme", "https")
v3alpha_purl_endpoint = section.get("v3alpha_purl_endpoint")
if not v3alpha_purl_endpoint:
raise ConfigurationError(
f'The "v3alpha_purl_endpoint" key is missing in section [{section_name}] of the .ini configuration file.'
)

path_params = "/".join([v3alpha_purl_endpoint, encode(purl, safe="")])
try:
url = urllib.parse.urlunsplit(
urllib.parse.SplitResult(
scheme=url_scheme,
netloc=url_netloc,
path=path_params,
query="",
fragment="",
)
)
except ValueError as error:
raise InvalidHTTPResponseError("Failed to construct the API URL.") from error

response = send_get_http_raw(url)
if response and response.text:
try:
metadata: dict = json.loads(response.text)
except JSONDecodeError as error:
raise InvalidHTTPResponseError(f"Failed to process response from deps.dev for {url}.") from error
if not metadata:
raise InvalidHTTPResponseError(f"Empty response returned by {url} .")
return metadata

return None
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""Assets on a package registry."""
Expand Down Expand Up @@ -816,7 +816,7 @@ def download_asset(self, url: str, dest: str) -> bool:

return True

def find_publish_timestamp(self, purl: str, registry_url: str | None = None) -> datetime:
def find_publish_timestamp(self, purl: str) -> datetime:
"""Make a search request to Maven Central to find the publishing timestamp of an artifact.
The reason for directly fetching timestamps from Maven Central is that deps.dev occasionally
Expand All @@ -829,8 +829,6 @@ def find_publish_timestamp(self, purl: str, registry_url: str | None = None) ->
purl: str
The Package URL (purl) of the package whose publication timestamp is to be retrieved.
This should conform to the PURL specification.
registry_url: str | None
The registry URL that can be set for testing.
Returns
-------
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""The module provides abstractions for the Maven Central package registry."""
Expand Down Expand Up @@ -182,7 +182,7 @@ def is_detected(self, build_tool: BaseBuildTool) -> bool:
compatible_build_tool_classes = [Maven, Gradle]
return any(isinstance(build_tool, build_tool_class) for build_tool_class in compatible_build_tool_classes)

def find_publish_timestamp(self, purl: str, registry_url: str | None = None) -> datetime:
def find_publish_timestamp(self, purl: str) -> datetime:
"""Make a search request to Maven Central to find the publishing timestamp of an artifact.
The reason for directly fetching timestamps from Maven Central is that deps.dev occasionally
Expand All @@ -195,8 +195,6 @@ def find_publish_timestamp(self, purl: str, registry_url: str | None = None) ->
purl: str
The Package URL (purl) of the package whose publication timestamp is to be retrieved.
This should conform to the PURL specification.
registry_url: str | None
The registry URL that can be set for testing.
Returns
-------
Expand Down
47 changes: 10 additions & 37 deletions src/macaron/slsa_analyzer/package_registry/package_registry.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,16 @@
# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""This module defines package registries."""

import json
import logging
import urllib.parse
from abc import ABC, abstractmethod
from datetime import datetime
from urllib.parse import quote as encode

import requests

from macaron.errors import InvalidHTTPResponseError
from macaron.json_tools import json_extract
from macaron.slsa_analyzer.build_tool.base_build_tool import BaseBuildTool
from macaron.util import send_get_http_raw
from macaron.slsa_analyzer.package_registry.deps_dev import DepsDevService

logger: logging.Logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -50,7 +45,7 @@ def is_detected(self, build_tool: BaseBuildTool) -> bool:
based on the given build tool.
"""

def find_publish_timestamp(self, purl: str, registry_url: str | None = None) -> datetime:
def find_publish_timestamp(self, purl: str) -> datetime:
"""Retrieve the publication timestamp for a package specified by its purl from the deps.dev repository by default.
This method constructs a request URL based on the provided purl, sends an HTTP GET
Expand All @@ -65,8 +60,6 @@ def find_publish_timestamp(self, purl: str, registry_url: str | None = None) ->
purl: str
The Package URL (purl) of the package whose publication timestamp is to be retrieved.
This should conform to the PURL specification.
registry_url: str | None
The registry URL that can be set for testing.
Returns
-------
Expand All @@ -86,40 +79,20 @@ def find_publish_timestamp(self, purl: str, registry_url: str | None = None) ->
# in the AnalyzeContext object retrieved by the Repo Finder. This step should be
# implemented at the beginning of the analyze command to ensure that the data
# is available for subsequent processing.

base_url_parsed = urllib.parse.urlparse(registry_url or "https://api.deps.dev")
path_params = "/".join(["v3alpha", "purl", encode(purl, safe="")])
try:
url = urllib.parse.urlunsplit(
urllib.parse.SplitResult(
scheme=base_url_parsed.scheme,
netloc=base_url_parsed.netloc,
path=path_params,
query="",
fragment="",
)
)
except ValueError as error:
raise InvalidHTTPResponseError("Failed to construct the API URL.") from error

response = send_get_http_raw(url)
if response and response.text:
try:
metadata: dict = json.loads(response.text)
except requests.exceptions.JSONDecodeError as error:
raise InvalidHTTPResponseError(f"Failed to process response from deps.dev for {url}.") from error
if not metadata:
raise InvalidHTTPResponseError(f"Empty response returned by {url} .")

metadata = DepsDevService.get_package_info(purl)
except InvalidHTTPResponseError as error:
raise InvalidHTTPResponseError(f"Invalid response from deps.dev for {purl}.") from error
if metadata:
timestamp = json_extract(metadata, ["version", "publishedAt"], str)
if not timestamp:
raise InvalidHTTPResponseError(f"The timestamp is missing in the response returned by {url}.")
raise InvalidHTTPResponseError(f"The timestamp is missing in the response returned for {purl}.")

logger.debug("Found timestamp: %s.", timestamp)

try:
return datetime.fromisoformat(timestamp)
except ValueError as error:
raise InvalidHTTPResponseError(f"The timestamp returned by {url} is invalid") from error
raise InvalidHTTPResponseError(f"The timestamp returned for {purl} is invalid") from error

raise InvalidHTTPResponseError(f"Invalid response from deps.dev for {url}.")
raise InvalidHTTPResponseError(f"Invalid response from deps.dev for {purl}.")
10 changes: 10 additions & 0 deletions tests/integration/cases/ultralytics/policy.dl
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
/* Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. */
/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */

#include "prelude.dl"

Policy("check-malicious-package", component_id, "Check the malicious package.") :-
check_passed(component_id, "mcn_detect_malicious_metadata_1").

apply_policy_to("check-malicious-package", component_id) :-
is_component(component_id, "pkg:pypi/ultralytics").
21 changes: 21 additions & 0 deletions tests/integration/cases/ultralytics/test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

description: |
Analyzing a popular package that some of its versions are compromised.
tags:
- macaron-python-package
- macaron-docker-image

steps:
- name: Run macaron analyze
kind: analyze
options:
command_args:
- -purl
- pkg:pypi/ultralytics
- name: Run macaron verify-policy to verify that the malicious metadata check passes.
kind: verify
options:
policy: policy.dl
10 changes: 10 additions & 0 deletions tests/integration/cases/ultralytics_8.3.46/policy.dl
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
/* Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. */
/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */

#include "prelude.dl"

Policy("check-malicious-package", component_id, "Check the malicious package.") :-
check_failed(component_id, "mcn_detect_malicious_metadata_1").

apply_policy_to("check-malicious-package", component_id) :-
is_component(component_id, "pkg:pypi/ultralytics@8.3.46").
Loading

0 comments on commit 0089979

Please sign in to comment.