Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

replace RequestsFetcher for Urllib3Fetcher #2762

Merged
merged 13 commits into from
Feb 14, 2025
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/api/tuf.ngclient.fetcher.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@ Fetcher
:undoc-members:
:private-members: _fetch

.. autoclass:: tuf.ngclient.RequestsFetcher
.. autoclass:: tuf.ngclient.Urllib3Fetcher
:no-inherited-members:
23 changes: 12 additions & 11 deletions tests/test_fetcher_ng.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright 2021, New York University and the TUF contributors
# SPDX-License-Identifier: MIT OR Apache-2.0

"""Unit test for RequestsFetcher."""
"""Unit test for Urllib3Fetcher."""

import io
import logging
Expand All @@ -14,17 +14,17 @@
from typing import Any, ClassVar
from unittest.mock import Mock, patch

import requests
import urllib3

from tests import utils
from tuf.api import exceptions
from tuf.ngclient import RequestsFetcher
from tuf.ngclient import Urllib3Fetcher

logger = logging.getLogger(__name__)


class TestFetcher(unittest.TestCase):
"""Test RequestsFetcher class."""
"""Test Urllib3Fetcher class."""

server_process_handler: ClassVar[utils.TestServerProcess]

Expand Down Expand Up @@ -58,7 +58,7 @@ def tearDownClass(cls) -> None:

def setUp(self) -> None:
# Instantiate a concrete instance of FetcherInterface
self.fetcher = RequestsFetcher()
self.fetcher = Urllib3Fetcher()

# Simple fetch.
def test_fetch(self) -> None:
Expand Down Expand Up @@ -105,11 +105,12 @@ def test_http_error(self) -> None:
self.assertEqual(cm.exception.status_code, 404)

# Response read timeout error
@patch.object(requests.Session, "get")
@patch.object(urllib3.PoolManager, "request")
def test_response_read_timeout(self, mock_session_get: Mock) -> None:
mock_response = Mock()
mock_response.status = 200
attr = {
"iter_content.side_effect": requests.exceptions.ConnectionError(
"stream.side_effect": urllib3.exceptions.ConnectionError(
NicholasTanz marked this conversation as resolved.
Show resolved Hide resolved
"Simulated timeout"
)
}
Expand All @@ -118,13 +119,13 @@ def test_response_read_timeout(self, mock_session_get: Mock) -> None:

with self.assertRaises(exceptions.SlowRetrievalError):
next(self.fetcher.fetch(self.url))
mock_response.iter_content.assert_called_once()
mock_response.stream.assert_called_once()

# Read/connect session timeout error
@patch.object(
requests.Session,
"get",
side_effect=requests.exceptions.Timeout("Simulated timeout"),
urllib3.PoolManager,
"request",
side_effect=urllib3.exceptions.TimeoutError,
NicholasTanz marked this conversation as resolved.
Show resolved Hide resolved
)
def test_session_get_timeout(self, mock_session_get: Mock) -> None:
with self.assertRaises(exceptions.SlowRetrievalError):
Expand Down
4 changes: 2 additions & 2 deletions tests/test_updater_ng.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ def test_non_existing_target_file(self) -> None:
def test_user_agent(self) -> None:
# test default
self.updater.refresh()
session = next(iter(self.updater._fetcher._sessions.values()))
session = next(iter(self.updater._fetcher._poolManagers.values()))
ua = session.headers["User-Agent"]
self.assertEqual(ua[:11], "python-tuf/")

Expand All @@ -338,7 +338,7 @@ def test_user_agent(self) -> None:
config=UpdaterConfig(app_user_agent="MyApp/1.2.3"),
)
updater.refresh()
session = next(iter(updater._fetcher._sessions.values()))
session = next(iter(updater._fetcher._poolManagers.values()))
ua = session.headers["User-Agent"]

self.assertEqual(ua[:23], "MyApp/1.2.3 python-tuf/")
Expand Down
4 changes: 2 additions & 2 deletions tuf/ngclient/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@
# requests_fetcher is public but comes from _internal for now (because
# sigstore-python 1.0 still uses the module from there). requests_fetcher
# can be moved out of _internal once sigstore-python 1.0 is not relevant.
from tuf.ngclient._internal.requests_fetcher import RequestsFetcher
from tuf.ngclient._internal.urllib3_fetcher import Urllib3Fetcher
from tuf.ngclient.config import UpdaterConfig
from tuf.ngclient.fetcher import FetcherInterface
from tuf.ngclient.updater import Updater

__all__ = [ # noqa: PLE0604
FetcherInterface.__name__,
RequestsFetcher.__name__,
Urllib3Fetcher.__name__,
TargetFile.__name__,
NicholasTanz marked this conversation as resolved.
Show resolved Hide resolved
Updater.__name__,
UpdaterConfig.__name__,
Expand Down
152 changes: 152 additions & 0 deletions tuf/ngclient/_internal/urllib3_fetcher.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
# Copyright 2021, New York University and the TUF contributors
# SPDX-License-Identifier: MIT OR Apache-2.0

"""Provides an implementation of ``FetcherInterface`` using the urllib3 HTTP
library.
"""

from __future__ import annotations

import logging
from typing import TYPE_CHECKING
from urllib import parse

# Imports
import urllib3

import tuf
from tuf.api import exceptions
from tuf.ngclient.fetcher import FetcherInterface

if TYPE_CHECKING:
from collections.abc import Iterator

# Globals
logger = logging.getLogger(__name__)


# Classes
class Urllib3Fetcher(FetcherInterface):
"""An implementation of ``FetcherInterface`` based on the urllib3 library.

Attributes:
socket_timeout: Timeout in seconds, used for both initial connection
delay and the maximum delay between bytes received.
chunk_size: Chunk size in bytes used when downloading.
"""

def __init__(
self,
socket_timeout: int = 30,
chunk_size: int = 400000,
app_user_agent: str | None = None,
) -> None:
# NOTE: We use a separate urllib3.PoolManager per scheme+hostname
# combination, in order to reuse connections to the same hostname to
# improve efficiency, but avoiding sharing state between different
# hosts-scheme combinations to minimize subtle security issues.
# Some cookies may not be HTTP-safe.
self._poolManagers: dict[tuple[str, str], urllib3.PoolManager] = {}

# Default settings
self.socket_timeout: int = socket_timeout # seconds
self.chunk_size: int = chunk_size # bytes
self.app_user_agent = app_user_agent
NicholasTanz marked this conversation as resolved.
Show resolved Hide resolved

def _fetch(self, url: str) -> Iterator[bytes]:
"""Fetch the contents of HTTP/HTTPS url from a remote server.

Args:
url: URL string that represents a file location.

Raises:
exceptions.SlowRetrievalError: Timeout occurs while receiving
data.
exceptions.DownloadHTTPError: HTTP error code is received.

Returns:
Bytes iterator
"""
# Get a customized session for each new schema+hostname combination.
poolmanager = self._get_poolmanager(url)
NicholasTanz marked this conversation as resolved.
Show resolved Hide resolved

# Get the urllib3.PoolManager object for this URL.
#
# Defer downloading the response body with preload_content=False.
# Always set the timeout. This timeout value is interpreted by
# urllib3 as:
# - connect timeout (max delay before first byte is received)
# - read (gap) timeout (max delay between bytes received)
try:
response = poolmanager.request(
"GET",
url,
preload_content=False,
timeout=urllib3.Timeout(connect=self.socket_timeout),
NicholasTanz marked this conversation as resolved.
Show resolved Hide resolved
)
except urllib3.exceptions.TimeoutError as e:
raise exceptions.SlowRetrievalError from e

# Check response status.
try:
if response.status >= 400:
raise urllib3.exceptions.HTTPError
except urllib3.exceptions.HTTPError as e:
response.close()
status = response.status
raise exceptions.DownloadHTTPError(str(e), status) from e
NicholasTanz marked this conversation as resolved.
Show resolved Hide resolved

return self._chunks(response)

def _chunks(
self, response: urllib3.response.BaseHTTPResponse
) -> Iterator[bytes]:
"""A generator function to be returned by fetch.

This way the caller of fetch can differentiate between connection
and actual data download.
"""

try:
yield from response.stream(self.chunk_size)
except (
urllib3.exceptions.ConnectionError,
urllib3.exceptions.TimeoutError,
) as e:
raise exceptions.SlowRetrievalError from e

finally:
response.close()
NicholasTanz marked this conversation as resolved.
Show resolved Hide resolved

def _get_poolmanager(self, url: str) -> urllib3.PoolManager:
"""Return a different customized urllib3.PoolManager per schema+hostname
combination.

Raises:
exceptions.DownloadError: When there is a problem parsing the url.
"""
# Use a different urllib3.PoolManager per schema+hostname
# combination, to reuse connections while minimizing subtle
# security issues.
parsed_url = parse.urlparse(url)

if not parsed_url.scheme:
raise exceptions.DownloadError(f"Failed to parse URL {url}")

poolmanager_index = (parsed_url.scheme, parsed_url.hostname or "")
poolmanager = self._poolManagers.get(poolmanager_index)

if not poolmanager:
# no default User-Agent when creating a poolManager
ua = f"python-tuf/{tuf.__version__}"
if self.app_user_agent is not None:
ua = f"{self.app_user_agent} {ua}"

poolmanager = urllib3.PoolManager(headers={"User-Agent": ua})
self._poolManagers[poolmanager_index] = poolmanager

logger.debug("Made new poolManager %s", poolmanager_index)
else:
logger.debug("Reusing poolManager %s", poolmanager_index)

return poolmanager
6 changes: 3 additions & 3 deletions tuf/ngclient/updater.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@

from tuf.api import exceptions
from tuf.api.metadata import Root, Snapshot, TargetFile, Targets, Timestamp
from tuf.ngclient._internal import requests_fetcher, trusted_metadata_set
from tuf.ngclient._internal import trusted_metadata_set, urllib3_fetcher
from tuf.ngclient.config import EnvelopeType, UpdaterConfig

if TYPE_CHECKING:
Expand All @@ -71,7 +71,7 @@ class Updater:
target_base_url: ``Optional``; Default base URL for all remote target
downloads. Can be individually set in ``download_target()``
fetcher: ``Optional``; ``FetcherInterface`` implementation used to
download both metadata and targets. Default is ``RequestsFetcher``
download both metadata and targets. Default is ``Urllib3Fetcher``
config: ``Optional``; ``UpdaterConfig`` could be used to setup common
configuration options.

Expand Down Expand Up @@ -102,7 +102,7 @@ def __init__(
if fetcher is not None:
self._fetcher = fetcher
else:
self._fetcher = requests_fetcher.RequestsFetcher(
self._fetcher = urllib3_fetcher.Urllib3Fetcher(
NicholasTanz marked this conversation as resolved.
Show resolved Hide resolved
app_user_agent=self.config.app_user_agent
)

Expand Down
Loading