diff --git a/docs/api/tuf.ngclient.fetcher.rst b/docs/api/tuf.ngclient.fetcher.rst index ad64b49341..5476512d99 100644 --- a/docs/api/tuf.ngclient.fetcher.rst +++ b/docs/api/tuf.ngclient.fetcher.rst @@ -5,5 +5,5 @@ Fetcher :undoc-members: :private-members: _fetch -.. autoclass:: tuf.ngclient.RequestsFetcher +.. autoclass:: tuf.ngclient.Urllib3Fetcher :no-inherited-members: diff --git a/pyproject.toml b/pyproject.toml index fae68878a6..4e829e7576 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,6 +47,7 @@ classifiers = [ dependencies = [ "requests>=2.19.1", "securesystemslib~=1.0", + "urllib3<3,>=1.21.1", ] dynamic = ["version"] @@ -156,4 +157,4 @@ exclude_also = [ ] [tool.coverage.run] branch = true -omit = [ "tests/*" ] +omit = [ "tests/*", "tuf/ngclient/_internal/requests_fetcher.py" ] diff --git a/tests/test_fetcher_ng.py b/tests/test_fetcher_ng.py index 33e2c9227f..d04b09f427 100644 --- a/tests/test_fetcher_ng.py +++ b/tests/test_fetcher_ng.py @@ -1,7 +1,7 @@ # Copyright 2021, New York University and the TUF contributors # SPDX-License-Identifier: MIT OR Apache-2.0 -"""Unit test for RequestsFetcher.""" +"""Unit test for Urllib3Fetcher.""" import io import logging @@ -13,17 +13,17 @@ from typing import ClassVar from unittest.mock import Mock, patch -import requests +import urllib3 from tests import utils from tuf.api import exceptions -from tuf.ngclient import RequestsFetcher +from tuf.ngclient import Urllib3Fetcher logger = logging.getLogger(__name__) class TestFetcher(unittest.TestCase): - """Test RequestsFetcher class.""" + """Test Urllib3Fetcher class.""" server_process_handler: ClassVar[utils.TestServerProcess] @@ -57,7 +57,7 @@ def tearDownClass(cls) -> None: def setUp(self) -> None: # Instantiate a concrete instance of FetcherInterface - self.fetcher = RequestsFetcher() + self.fetcher = Urllib3Fetcher() # Simple fetch. def test_fetch(self) -> None: @@ -94,7 +94,7 @@ def test_fetch_in_chunks(self) -> None: # Incorrect URL parsing def test_url_parsing(self) -> None: with self.assertRaises(exceptions.DownloadError): - self.fetcher.fetch("missing-scheme-and-hostname-in-url") + self.fetcher.fetch("http://invalid/") # File not found error def test_http_error(self) -> None: @@ -104,12 +104,15 @@ def test_http_error(self) -> None: self.assertEqual(cm.exception.status_code, 404) # Response read timeout error - @patch.object(requests.Session, "get") + @patch.object(urllib3.PoolManager, "request") def test_response_read_timeout(self, mock_session_get: Mock) -> None: mock_response = Mock() + mock_response.status = 200 attr = { - "iter_content.side_effect": requests.exceptions.ConnectionError( - "Simulated timeout" + "stream.side_effect": urllib3.exceptions.MaxRetryError( + urllib3.connectionpool.ConnectionPool("localhost"), + "", + urllib3.exceptions.TimeoutError(), ) } mock_response.configure_mock(**attr) @@ -117,13 +120,17 @@ def test_response_read_timeout(self, mock_session_get: Mock) -> None: with self.assertRaises(exceptions.SlowRetrievalError): next(self.fetcher.fetch(self.url)) - mock_response.iter_content.assert_called_once() + mock_response.stream.assert_called_once() # Read/connect session timeout error @patch.object( - requests.Session, - "get", - side_effect=requests.exceptions.Timeout("Simulated timeout"), + urllib3.PoolManager, + "request", + side_effect=urllib3.exceptions.MaxRetryError( + urllib3.connectionpool.ConnectionPool("localhost"), + "", + urllib3.exceptions.TimeoutError(), + ), ) def test_session_get_timeout(self, mock_session_get: Mock) -> None: with self.assertRaises(exceptions.SlowRetrievalError): diff --git a/tests/test_updater_ng.py b/tests/test_updater_ng.py index 65dc59e0ba..9a89c1deea 100644 --- a/tests/test_updater_ng.py +++ b/tests/test_updater_ng.py @@ -316,7 +316,9 @@ def test_persist_metadata_fails( def test_invalid_target_base_url(self) -> None: info = TargetFile(1, {"sha256": ""}, "targetpath") with self.assertRaises(exceptions.DownloadError): - self.updater.download_target(info, target_base_url="invalid_url") + self.updater.download_target( + info, target_base_url="http://invalid/" + ) def test_non_existing_target_file(self) -> None: info = TargetFile(1, {"sha256": ""}, "/non_existing_file.txt") @@ -328,7 +330,7 @@ def test_non_existing_target_file(self) -> None: def test_user_agent(self) -> None: # test default self.updater.refresh() - session = next(iter(self.updater._fetcher._sessions.values())) + session = self.updater._fetcher._poolManager ua = session.headers["User-Agent"] self.assertEqual(ua[:11], "python-tuf/") @@ -341,7 +343,7 @@ def test_user_agent(self) -> None: config=UpdaterConfig(app_user_agent="MyApp/1.2.3"), ) updater.refresh() - session = next(iter(updater._fetcher._sessions.values())) + session = updater._fetcher._poolManager ua = session.headers["User-Agent"] self.assertEqual(ua[:23], "MyApp/1.2.3 python-tuf/") diff --git a/tuf/ngclient/__init__.py b/tuf/ngclient/__init__.py index b2c5cbfd78..1d2084acf5 100644 --- a/tuf/ngclient/__init__.py +++ b/tuf/ngclient/__init__.py @@ -9,6 +9,7 @@ # sigstore-python 1.0 still uses the module from there). requests_fetcher # can be moved out of _internal once sigstore-python 1.0 is not relevant. from tuf.ngclient._internal.requests_fetcher import RequestsFetcher +from tuf.ngclient._internal.urllib3_fetcher import Urllib3Fetcher from tuf.ngclient.config import UpdaterConfig from tuf.ngclient.fetcher import FetcherInterface from tuf.ngclient.updater import Updater @@ -16,6 +17,7 @@ __all__ = [ # noqa: PLE0604 FetcherInterface.__name__, RequestsFetcher.__name__, + Urllib3Fetcher.__name__, TargetFile.__name__, Updater.__name__, UpdaterConfig.__name__, diff --git a/tuf/ngclient/_internal/urllib3_fetcher.py b/tuf/ngclient/_internal/urllib3_fetcher.py new file mode 100644 index 0000000000..5d4bb1cf60 --- /dev/null +++ b/tuf/ngclient/_internal/urllib3_fetcher.py @@ -0,0 +1,110 @@ +# Copyright 2021, New York University and the TUF contributors +# SPDX-License-Identifier: MIT OR Apache-2.0 + +"""Provides an implementation of ``FetcherInterface`` using the urllib3 HTTP +library. +""" + +from __future__ import annotations + +import logging +from typing import TYPE_CHECKING + +# Imports +import urllib3 + +import tuf +from tuf.api import exceptions +from tuf.ngclient.fetcher import FetcherInterface + +if TYPE_CHECKING: + from collections.abc import Iterator + +# Globals +logger = logging.getLogger(__name__) + + +# Classes +class Urllib3Fetcher(FetcherInterface): + """An implementation of ``FetcherInterface`` based on the urllib3 library. + + Attributes: + socket_timeout: Timeout in seconds, used for both initial connection + delay and the maximum delay between bytes received. + chunk_size: Chunk size in bytes used when downloading. + """ + + def __init__( + self, + socket_timeout: int = 30, + chunk_size: int = 400000, + app_user_agent: str | None = None, + ) -> None: + # Default settings + self.socket_timeout: int = socket_timeout # seconds + self.chunk_size: int = chunk_size # bytes + + # Create User-Agent. + ua = f"python-tuf/{tuf.__version__}" + if app_user_agent is not None: + ua = f"{app_user_agent} {ua}" + + self._poolManager = urllib3.PoolManager(headers={"User-Agent": ua}) + + def _fetch(self, url: str) -> Iterator[bytes]: + """Fetch the contents of HTTP/HTTPS url from a remote server. + + Args: + url: URL string that represents a file location. + + Raises: + exceptions.SlowRetrievalError: Timeout occurs while receiving + data. + exceptions.DownloadHTTPError: HTTP error code is received. + + Returns: + Bytes iterator + """ + + # Defer downloading the response body with preload_content=False. + # Always set the timeout. This timeout value is interpreted by + # urllib3 as: + # - connect timeout (max delay before first byte is received) + # - read (gap) timeout (max delay between bytes received) + try: + response = self._poolManager.request( + "GET", + url, + preload_content=False, + timeout=urllib3.Timeout(self.socket_timeout), + ) + except urllib3.exceptions.MaxRetryError as e: + if isinstance(e.reason, urllib3.exceptions.TimeoutError): + raise exceptions.SlowRetrievalError from e + + if response.status >= 400: + response.close() + raise exceptions.DownloadHTTPError( + f"HTTP error occurred with status {response.status}", + response.status, + ) + + return self._chunks(response) + + def _chunks( + self, response: urllib3.response.BaseHTTPResponse + ) -> Iterator[bytes]: + """A generator function to be returned by fetch. + + This way the caller of fetch can differentiate between connection + and actual data download. + """ + + try: + yield from response.stream(self.chunk_size) + except urllib3.exceptions.MaxRetryError as e: + if isinstance(e.reason, urllib3.exceptions.TimeoutError): + raise exceptions.SlowRetrievalError from e + + finally: + response.release_conn() diff --git a/tuf/ngclient/updater.py b/tuf/ngclient/updater.py index 51bda41f26..022d601f95 100644 --- a/tuf/ngclient/updater.py +++ b/tuf/ngclient/updater.py @@ -49,7 +49,7 @@ from tuf.api import exceptions from tuf.api.metadata import Root, Snapshot, TargetFile, Targets, Timestamp -from tuf.ngclient._internal import requests_fetcher, trusted_metadata_set +from tuf.ngclient._internal import trusted_metadata_set, urllib3_fetcher from tuf.ngclient.config import EnvelopeType, UpdaterConfig if TYPE_CHECKING: @@ -71,7 +71,7 @@ class Updater: target_base_url: ``Optional``; Default base URL for all remote target downloads. Can be individually set in ``download_target()`` fetcher: ``Optional``; ``FetcherInterface`` implementation used to - download both metadata and targets. Default is ``RequestsFetcher`` + download both metadata and targets. Default is ``Urllib3Fetcher`` config: ``Optional``; ``UpdaterConfig`` could be used to setup common configuration options. @@ -102,7 +102,7 @@ def __init__( if fetcher is not None: self._fetcher = fetcher else: - self._fetcher = requests_fetcher.RequestsFetcher( + self._fetcher = urllib3_fetcher.Urllib3Fetcher( app_user_agent=self.config.app_user_agent )