diff --git a/configs/schemas/config.schema.json b/configs/schemas/config.schema.json index fd3e047..f5a34c9 100644 --- a/configs/schemas/config.schema.json +++ b/configs/schemas/config.schema.json @@ -235,6 +235,11 @@ "title": "Max Retries", "type": "integer" }, + "retry_on_client_errors": { + "default": true, + "title": "Retry On Client Errors", + "type": "boolean" + }, "model_max_batch_size": { "default": 64, "exclusiveMinimum": 0, @@ -1337,6 +1342,7 @@ "cache_dir": null, "ignore_build_vdb_errors": false, "max_retries": 10, + "retry_on_client_errors": true, "model_max_batch_size": 64, "num_threads": 12, "pipeline_batch_size": 1024, diff --git a/src/cve/data_models/config.py b/src/cve/data_models/config.py index ec3e1eb..1c9da3c 100644 --- a/src/cve/data_models/config.py +++ b/src/cve/data_models/config.py @@ -52,6 +52,7 @@ class GeneralConfig(BaseModel): cache_dir: str | None = None ignore_build_vdb_errors: bool = False max_retries: NonNegativeInt = 10 + retry_on_client_errors: bool = True model_max_batch_size: PositiveInt = 64 num_threads: PositiveInt = Field(default_factory=os.cpu_count) pipeline_batch_size: PositiveInt = 1024 diff --git a/src/cve/pipeline/input.py b/src/cve/pipeline/input.py index 8fe9841..83e16fc 100644 --- a/src/cve/pipeline/input.py +++ b/src/cve/pipeline/input.py @@ -180,7 +180,7 @@ async def _inner(): async with aiohttp.ClientSession() as session: - intel_retriever = IntelRetriever(session=session) + intel_retriever = IntelRetriever(session=session, retry_on_client_errors=run_config.general.retry_on_client_errors) intel_coros = [intel_retriever.retrieve(vuln_id=cve.vuln_id) for cve in message.input.scan.vulns] @@ -244,7 +244,7 @@ async def _inner(): async with aiohttp.ClientSession() as session: - vuln_dep_checker = VulnerableDependencyChecker(session=session, image=image, sbom_list=sbom) + vuln_dep_checker = VulnerableDependencyChecker(session=session, image=image, sbom_list=sbom, retry_on_client_errors=run_config.general.retry_on_client_errors) await vuln_dep_checker.load_dependencies() diff --git a/src/cve/utils/async_http_utils.py b/src/cve/utils/async_http_utils.py index 82ef367..bbd9a8a 100644 --- a/src/cve/utils/async_http_utils.py +++ b/src/cve/utils/async_http_utils.py @@ -31,11 +31,12 @@ async def request_with_retry(session: aiohttp.ClientSession, max_retries: int = 10, sleep_time: float = 0.1, respect_retry_after_header: bool = True, - log_on_error=True) -> typing.AsyncIterator[aiohttp.ClientResponse]: + log_on_error=True, + retry_on_client_errors = True) -> typing.AsyncIterator[aiohttp.ClientResponse]: """ Async version of `morpheus.utils.http_utils.request_with_retry` """ - assert not request_kwargs.get('raise_for_status'), "raise_for_status is cincompatible with `request_with_retry`" + assert not request_kwargs.get('raise_for_status'), "raise_for_status is incompatible with `request_with_retry`" try_count = 0 done = False while try_count <= max_retries and not done: @@ -61,6 +62,8 @@ async def request_with_retry(session: aiohttp.ClientSession, actual_sleep_time = max(int(response_headers["Retry-After"]), actual_sleep_time) elif respect_retry_after_header and 'X-RateLimit-Reset' in response_headers: actual_sleep_time = max(int(response_headers["X-RateLimit-Reset"]) - time.time(), actual_sleep_time) + elif not retry_on_client_errors and response.status < 500: + raise e logger.warning("Error requesting [%d/%d]: (Retry %.1f sec) %s: %s", try_count, diff --git a/src/cve/utils/clients/first_client.py b/src/cve/utils/clients/first_client.py index 4373f2a..86263d2 100644 --- a/src/cve/utils/clients/first_client.py +++ b/src/cve/utils/clients/first_client.py @@ -37,13 +37,15 @@ def __init__(self, session: aiohttp.ClientSession | None = None, retry_count: int = 10, sleep_time: float = 0.1, - respect_retry_after_header: bool = True): + respect_retry_after_header: bool = True, + retry_on_client_errors: bool = True): super().__init__(session=session, base_url=base_url or os.environ.get('FIRST_BASE_URL'), retry_count=retry_count, sleep_time=sleep_time, - respect_retry_after_header=respect_retry_after_header) + respect_retry_after_header=respect_retry_after_header, + retry_on_client_errors=retry_on_client_errors) self._headers = {'Accept': 'application/json'} diff --git a/src/cve/utils/clients/ghsa_client.py b/src/cve/utils/clients/ghsa_client.py index 9e8d505..5ace476 100644 --- a/src/cve/utils/clients/ghsa_client.py +++ b/src/cve/utils/clients/ghsa_client.py @@ -41,12 +41,14 @@ def __init__(self, session: aiohttp.ClientSession | None = None, retry_count: int = 10, sleep_time: float = 0.1, - respect_retry_after_header: bool = True): + respect_retry_after_header: bool = True, + retry_on_client_errors: bool = True): super().__init__(session=session, base_url=base_url or os.environ.get('GHSA_BASE_URL'), retry_count=retry_count, sleep_time=sleep_time, - respect_retry_after_header=respect_retry_after_header) + respect_retry_after_header=respect_retry_after_header, + retry_on_client_errors=retry_on_client_errors) self._api_key = api_key or os.environ.get('GHSA_API_KEY', None) diff --git a/src/cve/utils/clients/intel_client.py b/src/cve/utils/clients/intel_client.py index d44b38c..f2acba7 100644 --- a/src/cve/utils/clients/intel_client.py +++ b/src/cve/utils/clients/intel_client.py @@ -30,7 +30,8 @@ def __init__(self, base_url: str | None = None, retry_count: int = 10, sleep_time: float = 0.1, - respect_retry_after_header: bool = True): + respect_retry_after_header: bool = True, + retry_on_client_errors: bool = True): if (session is None): session = aiohttp.ClientSession() @@ -44,6 +45,7 @@ def __init__(self, self._retry_count = retry_count self._sleep_time = sleep_time self._respect_retry_after_header = respect_retry_after_header + self._retry_on_client_errors = retry_on_client_errors @classmethod @abstractmethod @@ -80,6 +82,7 @@ async def request(self, max_retries=self._retry_count, sleep_time=self._sleep_time, respect_retry_after_header=self._respect_retry_after_header, - log_on_error=log_on_error) as response: + log_on_error=log_on_error, + retry_on_client_errors=self._retry_on_client_errors) as response: return await response.json() diff --git a/src/cve/utils/clients/nvd_client.py b/src/cve/utils/clients/nvd_client.py index 1ed922c..8069156 100644 --- a/src/cve/utils/clients/nvd_client.py +++ b/src/cve/utils/clients/nvd_client.py @@ -51,13 +51,15 @@ def __init__(self, session: aiohttp.ClientSession | None = None, retry_count: int = 10, sleep_time: float = 0.1, - respect_retry_after_header: bool = True): + respect_retry_after_header: bool = True, + retry_on_client_errors: bool = True): super().__init__(session=session, base_url=base_url or os.environ.get('NVD_BASE_URL'), retry_count=retry_count, sleep_time=sleep_time, - respect_retry_after_header=respect_retry_after_header) + respect_retry_after_header=respect_retry_after_header, + retry_on_client_errors=retry_on_client_errors) self._api_key = api_key or os.environ.get('NVD_API_KEY', None) @@ -87,7 +89,8 @@ async def _get_soup(self, url: str) -> BeautifulSoup: 'url': url, "skip_auto_headers": {"User-Agent"}, }, - max_retries=self._retry_count) as response: + max_retries=self._retry_count, + retry_on_client_errors=self._retry_on_client_errors) as response: return BeautifulSoup(await response.text(), 'html.parser') def _get_cvss_vector_from_metric(self, metrics: dict, metric_version: str) -> str | None: diff --git a/src/cve/utils/clients/rhsa_client.py b/src/cve/utils/clients/rhsa_client.py index 81e8c52..840b61c 100644 --- a/src/cve/utils/clients/rhsa_client.py +++ b/src/cve/utils/clients/rhsa_client.py @@ -37,13 +37,15 @@ def __init__(self, session: aiohttp.ClientSession | None = None, retry_count: int = 10, sleep_time: float = 0.1, - respect_retry_after_header: bool = True): + respect_retry_after_header: bool = True, + retry_on_client_errors: bool = True): super().__init__(session=session, base_url=base_url or os.environ.get('RHSA_BASE_URL'), retry_count=retry_count, sleep_time=sleep_time, - respect_retry_after_header=respect_retry_after_header) + respect_retry_after_header=respect_retry_after_header, + retry_on_client_errors=retry_on_client_errors) @classmethod def default_base_url(cls) -> str: diff --git a/src/cve/utils/clients/ubuntu_client.py b/src/cve/utils/clients/ubuntu_client.py index 9f85041..b4587b2 100644 --- a/src/cve/utils/clients/ubuntu_client.py +++ b/src/cve/utils/clients/ubuntu_client.py @@ -35,13 +35,15 @@ def __init__(self, session: aiohttp.ClientSession | None = None, retry_count: int = 10, sleep_time: float = 0.1, - respect_retry_after_header: bool = True): + respect_retry_after_header: bool = True, + retry_on_client_errors: bool = True): super().__init__(session=session, base_url=base_url or os.environ.get('UBUNTU_BASE_URL'), retry_count=retry_count, sleep_time=sleep_time, - respect_retry_after_header=respect_retry_after_header) + respect_retry_after_header=respect_retry_after_header, + retry_on_client_errors=retry_on_client_errors) @classmethod def default_base_url(cls) -> str: diff --git a/src/cve/utils/intel_retriever.py b/src/cve/utils/intel_retriever.py index ab336ae..8ff0d7e 100644 --- a/src/cve/utils/intel_retriever.py +++ b/src/cve/utils/intel_retriever.py @@ -45,7 +45,8 @@ def __init__(self, nist_api_key: str | None = None, ghsa_api_key: str | None = None, lang_code: str = 'en', - max_retries: int = 10): + max_retries: int = 10, + retry_on_client_errors: bool = True): """ Initialize the NISTCVERetriever with URL templates for vulnerability and CVE details. """ @@ -56,13 +57,18 @@ def __init__(self, self._nvd_client = NVDClient(api_key=os.environ.get('NVD_API_KEY', nist_api_key), session=self._session, lang_code=lang_code, - retry_count=max_retries) - self._first_client = FirstClient(session=self._session, retry_count=max_retries) + retry_count=max_retries, + retry_on_client_errors=retry_on_client_errors) + self._first_client = FirstClient(session=self._session, retry_count=max_retries, + retry_on_client_errors=retry_on_client_errors) self._ghsa_client = GHSAClient(api_key=os.environ.get('GHSA_API_KEY', ghsa_api_key), session=self._session, - retry_count=max_retries) - self._rhsa_client = RHSAClient(session=self._session, retry_count=max_retries) - self._ubuntu_client = UbuntuClient(session=self._session, retry_count=max_retries) + retry_count=max_retries, + retry_on_client_errors=retry_on_client_errors) + self._rhsa_client = RHSAClient(session=self._session, retry_count=max_retries, + retry_on_client_errors=retry_on_client_errors) + self._ubuntu_client = UbuntuClient(session=self._session, retry_count=max_retries, + retry_on_client_errors=retry_on_client_errors) @asynccontextmanager async def _get_session(self, session: aiohttp.ClientSession | None = None): diff --git a/src/cve/utils/vulnerable_dependency_checker.py b/src/cve/utils/vulnerable_dependency_checker.py index 00ae915..0416fa5 100644 --- a/src/cve/utils/vulnerable_dependency_checker.py +++ b/src/cve/utils/vulnerable_dependency_checker.py @@ -29,7 +29,6 @@ from ..data_models.cve_intel import CveIntelNvd from ..data_models.dependencies import DependencyPackage -from .async_http_utils import request_with_retry from .clients.intel_client import IntelClient from .string_utils import package_names_match from .url_utils import url_join @@ -126,13 +125,15 @@ def __init__(self, image: str, sbom_list: list, session: aiohttp.ClientSession | None = None, - respect_retry_after_header: bool = True): + respect_retry_after_header: bool = True, + retry_on_client_errors: bool = True): super().__init__( session=session, base_url=base_url or os.environ.get('DEPSDEV_BASE_URL'), retry_count=1, # Service returns 404 if the package is not found. So dont retry - respect_retry_after_header=respect_retry_after_header) + respect_retry_after_header=respect_retry_after_header, + retry_on_client_errors=retry_on_client_errors) self._semaphore = asyncio.Semaphore(25)