From 337fa9a1eb70a28706555bc09eeb330c1a9199c3 Mon Sep 17 00:00:00 2001 From: Ruben Romero Montes Date: Wed, 18 Dec 2024 14:34:09 +0100 Subject: [PATCH] feat: make retry_on_client_errors configurable Signed-off-by: Ruben Romero Montes --- configs/schemas/config.schema.json | 6 ++++++ src/cve/data_models/config.py | 1 + src/cve/utils/async_http_utils.py | 2 +- src/cve/utils/clients/first_client.py | 6 ++++-- src/cve/utils/clients/ghsa_client.py | 6 ++++-- src/cve/utils/clients/intel_client.py | 7 +++++-- src/cve/utils/clients/nvd_client.py | 9 ++++++--- src/cve/utils/clients/rhsa_client.py | 6 ++++-- src/cve/utils/clients/ubuntu_client.py | 6 ++++-- src/cve/utils/intel_retriever.py | 18 ++++++++++++------ src/cve/utils/vulnerable_dependency_checker.py | 6 ++++-- 11 files changed, 51 insertions(+), 22 deletions(-) diff --git a/configs/schemas/config.schema.json b/configs/schemas/config.schema.json index 7a3b490..7cc3226 100644 --- a/configs/schemas/config.schema.json +++ b/configs/schemas/config.schema.json @@ -233,6 +233,11 @@ "title": "Max Retries", "type": "integer" }, + "retry_on_client_errors": { + "default": true, + "title": "Retry On Client Errors", + "type": "boolean" + }, "model_max_batch_size": { "default": 64, "exclusiveMinimum": 0, @@ -1283,6 +1288,7 @@ "cache_dir": null, "ignore_build_vdb_errors": false, "max_retries": 10, + "retry_on_client_errors": true, "model_max_batch_size": 64, "num_threads": 64, "pipeline_batch_size": 1024, diff --git a/src/cve/data_models/config.py b/src/cve/data_models/config.py index dc852d1..5a68b3f 100644 --- a/src/cve/data_models/config.py +++ b/src/cve/data_models/config.py @@ -51,6 +51,7 @@ class GeneralConfig(BaseModel): cache_dir: str | None = None ignore_build_vdb_errors: bool = False max_retries: NonNegativeInt = 10 + retry_on_client_errors: bool = True model_max_batch_size: PositiveInt = 64 num_threads: PositiveInt = Field(default_factory=os.cpu_count) pipeline_batch_size: PositiveInt = 1024 diff --git a/src/cve/utils/async_http_utils.py b/src/cve/utils/async_http_utils.py index 89dba20..9ecd79e 100644 --- a/src/cve/utils/async_http_utils.py +++ b/src/cve/utils/async_http_utils.py @@ -32,7 +32,7 @@ async def request_with_retry(session: aiohttp.ClientSession, sleep_time: float = 0.1, respect_retry_after_header: bool = True, log_on_error=True, - retry_on_client_errors = False) -> typing.AsyncIterator[aiohttp.ClientResponse]: + retry_on_client_errors = True) -> typing.AsyncIterator[aiohttp.ClientResponse]: """ Async version of `morpheus.utils.http_utils.request_with_retry` """ diff --git a/src/cve/utils/clients/first_client.py b/src/cve/utils/clients/first_client.py index 7e62560..b4fd771 100644 --- a/src/cve/utils/clients/first_client.py +++ b/src/cve/utils/clients/first_client.py @@ -37,13 +37,15 @@ def __init__(self, session: aiohttp.ClientSession | None = None, retry_count: int = 10, sleep_time: float = 0.1, - respect_retry_after_header: bool = True): + respect_retry_after_header: bool = True, + retry_on_client_errors: bool = True): super().__init__(session=session, base_url=base_url or os.environ.get('FIRST_BASE_URL'), retry_count=retry_count, sleep_time=sleep_time, - respect_retry_after_header=respect_retry_after_header) + respect_retry_after_header=respect_retry_after_header, + retry_on_client_errors=retry_on_client_errors) self._headers = {'Accept': 'application/json'} diff --git a/src/cve/utils/clients/ghsa_client.py b/src/cve/utils/clients/ghsa_client.py index 05997b0..8f02237 100644 --- a/src/cve/utils/clients/ghsa_client.py +++ b/src/cve/utils/clients/ghsa_client.py @@ -41,12 +41,14 @@ def __init__(self, session: aiohttp.ClientSession | None = None, retry_count: int = 10, sleep_time: float = 0.1, - respect_retry_after_header: bool = True): + respect_retry_after_header: bool = True, + retry_on_client_errors: bool = True): super().__init__(session=session, base_url=base_url or os.environ.get('GHSA_BASE_URL'), retry_count=retry_count, sleep_time=sleep_time, - respect_retry_after_header=respect_retry_after_header) + respect_retry_after_header=respect_retry_after_header, + retry_on_client_errors=retry_on_client_errors) self._api_key = api_key or os.environ.get('GHSA_API_KEY', None) diff --git a/src/cve/utils/clients/intel_client.py b/src/cve/utils/clients/intel_client.py index 5d52589..0ff733a 100644 --- a/src/cve/utils/clients/intel_client.py +++ b/src/cve/utils/clients/intel_client.py @@ -30,7 +30,8 @@ def __init__(self, base_url: str | None = None, retry_count: int = 10, sleep_time: float = 0.1, - respect_retry_after_header: bool = True): + respect_retry_after_header: bool = True, + retry_on_client_errors: bool = True): if (session is None): session = aiohttp.ClientSession() @@ -44,6 +45,7 @@ def __init__(self, self._retry_count = retry_count self._sleep_time = sleep_time self._respect_retry_after_header = respect_retry_after_header + self._retry_on_client_errors = retry_on_client_errors @classmethod @abstractmethod @@ -80,6 +82,7 @@ async def request(self, max_retries=self._retry_count, sleep_time=self._sleep_time, respect_retry_after_header=self._respect_retry_after_header, - log_on_error=log_on_error) as response: + log_on_error=log_on_error, + retry_on_client_errors=self._retry_on_client_errors) as response: return await response.json() diff --git a/src/cve/utils/clients/nvd_client.py b/src/cve/utils/clients/nvd_client.py index d84c0b1..4956093 100644 --- a/src/cve/utils/clients/nvd_client.py +++ b/src/cve/utils/clients/nvd_client.py @@ -51,13 +51,15 @@ def __init__(self, session: aiohttp.ClientSession | None = None, retry_count: int = 10, sleep_time: float = 0.1, - respect_retry_after_header: bool = True): + respect_retry_after_header: bool = True, + retry_on_client_errors: bool = True): super().__init__(session=session, base_url=base_url or os.environ.get('NVD_BASE_URL'), retry_count=retry_count, sleep_time=sleep_time, - respect_retry_after_header=respect_retry_after_header) + respect_retry_after_header=respect_retry_after_header, + retry_on_client_errors=retry_on_client_errors) self._api_key = api_key or os.environ.get('NVD_API_KEY', None) @@ -87,7 +89,8 @@ async def _get_soup(self, url: str) -> BeautifulSoup: 'url': url, "skip_auto_headers": {"User-Agent"}, }, - max_retries=self._retry_count) as response: + max_retries=self._retry_count, + retry_on_client_errors=self._retry_on_client_errors) as response: return BeautifulSoup(await response.text(), 'html.parser') def _get_cvss_vector_from_metric(self, metrics: dict, metric_version: str) -> str | None: diff --git a/src/cve/utils/clients/rhsa_client.py b/src/cve/utils/clients/rhsa_client.py index 0a2f888..190deb8 100644 --- a/src/cve/utils/clients/rhsa_client.py +++ b/src/cve/utils/clients/rhsa_client.py @@ -37,13 +37,15 @@ def __init__(self, session: aiohttp.ClientSession | None = None, retry_count: int = 10, sleep_time: float = 0.1, - respect_retry_after_header: bool = True): + respect_retry_after_header: bool = True, + retry_on_client_errors: bool = True): super().__init__(session=session, base_url=base_url or os.environ.get('RHSA_BASE_URL'), retry_count=retry_count, sleep_time=sleep_time, - respect_retry_after_header=respect_retry_after_header) + respect_retry_after_header=respect_retry_after_header, + retry_on_client_errors=retry_on_client_errors) @classmethod def default_base_url(cls) -> str: diff --git a/src/cve/utils/clients/ubuntu_client.py b/src/cve/utils/clients/ubuntu_client.py index 4a9f319..732ee6d 100644 --- a/src/cve/utils/clients/ubuntu_client.py +++ b/src/cve/utils/clients/ubuntu_client.py @@ -35,13 +35,15 @@ def __init__(self, session: aiohttp.ClientSession | None = None, retry_count: int = 10, sleep_time: float = 0.1, - respect_retry_after_header: bool = True): + respect_retry_after_header: bool = True, + retry_on_client_errors: bool = True): super().__init__(session=session, base_url=base_url or os.environ.get('UBUNTU_BASE_URL'), retry_count=retry_count, sleep_time=sleep_time, - respect_retry_after_header=respect_retry_after_header) + respect_retry_after_header=respect_retry_after_header, + retry_on_client_errors=retry_on_client_errors) @classmethod def default_base_url(cls) -> str: diff --git a/src/cve/utils/intel_retriever.py b/src/cve/utils/intel_retriever.py index 44e6cb5..bf7ce92 100644 --- a/src/cve/utils/intel_retriever.py +++ b/src/cve/utils/intel_retriever.py @@ -45,7 +45,8 @@ def __init__(self, nist_api_key: str | None = None, ghsa_api_key: str | None = None, lang_code: str = 'en', - max_retries: int = 10): + max_retries: int = 10, + retry_on_client_errors: bool = True): """ Initialize the NISTCVERetriever with URL templates for vulnerability and CVE details. """ @@ -56,13 +57,18 @@ def __init__(self, self._nvd_client = NVDClient(api_key=os.environ.get('NVD_API_KEY', nist_api_key), session=self._session, lang_code=lang_code, - retry_count=max_retries) - self._first_client = FirstClient(session=self._session, retry_count=max_retries) + retry_count=max_retries, + retry_on_client_errors=retry_on_client_errors) + self._first_client = FirstClient(session=self._session, retry_count=max_retries, + retry_on_client_errors=retry_on_client_errors) self._ghsa_client = GHSAClient(api_key=os.environ.get('GHSA_API_KEY', ghsa_api_key), session=self._session, - retry_count=max_retries) - self._rhsa_client = RHSAClient(session=self._session, retry_count=max_retries) - self._ubuntu_client = UbuntuClient(session=self._session, retry_count=max_retries) + retry_count=max_retries, + retry_on_client_errors=retry_on_client_errors) + self._rhsa_client = RHSAClient(session=self._session, retry_count=max_retries, + retry_on_client_errors=retry_on_client_errors) + self._ubuntu_client = UbuntuClient(session=self._session, retry_count=max_retries, + retry_on_client_errors=retry_on_client_errors) @asynccontextmanager async def _get_session(self, session: aiohttp.ClientSession | None = None): diff --git a/src/cve/utils/vulnerable_dependency_checker.py b/src/cve/utils/vulnerable_dependency_checker.py index f173c80..11040b3 100644 --- a/src/cve/utils/vulnerable_dependency_checker.py +++ b/src/cve/utils/vulnerable_dependency_checker.py @@ -125,13 +125,15 @@ def __init__(self, image: str, sbom_list: list, session: aiohttp.ClientSession | None = None, - respect_retry_after_header: bool = True): + respect_retry_after_header: bool = True, + retry_on_client_errors: bool = True): super().__init__( session=session, base_url=base_url or os.environ.get('DEPSDEV_BASE_URL'), retry_count=1, # Service returns 404 if the package is not found. So dont retry - respect_retry_after_header=respect_retry_after_header) + respect_retry_after_header=respect_retry_after_header, + retry_on_client_errors=retry_on_client_errors) self._semaphore = asyncio.Semaphore(25)