Skip to content

Commit

Permalink
Merge pull request #2762 from NicholasTanz/switchUrlLib3
Browse files Browse the repository at this point in the history
replace RequestsFetcher for Urllib3Fetcher
  • Loading branch information
jku authored Feb 14, 2025
2 parents 8541a39 + 2ac8bdc commit d42426f
Show file tree
Hide file tree
Showing 7 changed files with 143 additions and 21 deletions.
2 changes: 1 addition & 1 deletion docs/api/tuf.ngclient.fetcher.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@ Fetcher
:undoc-members:
:private-members: _fetch

.. autoclass:: tuf.ngclient.RequestsFetcher
.. autoclass:: tuf.ngclient.Urllib3Fetcher
:no-inherited-members:
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ classifiers = [
dependencies = [
"requests>=2.19.1",
"securesystemslib~=1.0",
"urllib3<3,>=1.21.1",
]
dynamic = ["version"]

Expand Down Expand Up @@ -156,4 +157,4 @@ exclude_also = [
]
[tool.coverage.run]
branch = true
omit = [ "tests/*" ]
omit = [ "tests/*", "tuf/ngclient/_internal/requests_fetcher.py" ]
33 changes: 20 additions & 13 deletions tests/test_fetcher_ng.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright 2021, New York University and the TUF contributors
# SPDX-License-Identifier: MIT OR Apache-2.0

"""Unit test for RequestsFetcher."""
"""Unit test for Urllib3Fetcher."""

import io
import logging
Expand All @@ -13,17 +13,17 @@
from typing import ClassVar
from unittest.mock import Mock, patch

import requests
import urllib3

from tests import utils
from tuf.api import exceptions
from tuf.ngclient import RequestsFetcher
from tuf.ngclient import Urllib3Fetcher

logger = logging.getLogger(__name__)


class TestFetcher(unittest.TestCase):
"""Test RequestsFetcher class."""
"""Test Urllib3Fetcher class."""

server_process_handler: ClassVar[utils.TestServerProcess]

Expand Down Expand Up @@ -57,7 +57,7 @@ def tearDownClass(cls) -> None:

def setUp(self) -> None:
# Instantiate a concrete instance of FetcherInterface
self.fetcher = RequestsFetcher()
self.fetcher = Urllib3Fetcher()

# Simple fetch.
def test_fetch(self) -> None:
Expand Down Expand Up @@ -94,7 +94,7 @@ def test_fetch_in_chunks(self) -> None:
# Incorrect URL parsing
def test_url_parsing(self) -> None:
with self.assertRaises(exceptions.DownloadError):
self.fetcher.fetch("missing-scheme-and-hostname-in-url")
self.fetcher.fetch("http://invalid/")

# File not found error
def test_http_error(self) -> None:
Expand All @@ -104,26 +104,33 @@ def test_http_error(self) -> None:
self.assertEqual(cm.exception.status_code, 404)

# Response read timeout error
@patch.object(requests.Session, "get")
@patch.object(urllib3.PoolManager, "request")
def test_response_read_timeout(self, mock_session_get: Mock) -> None:
mock_response = Mock()
mock_response.status = 200
attr = {
"iter_content.side_effect": requests.exceptions.ConnectionError(
"Simulated timeout"
"stream.side_effect": urllib3.exceptions.MaxRetryError(
urllib3.connectionpool.ConnectionPool("localhost"),
"",
urllib3.exceptions.TimeoutError(),
)
}
mock_response.configure_mock(**attr)
mock_session_get.return_value = mock_response

with self.assertRaises(exceptions.SlowRetrievalError):
next(self.fetcher.fetch(self.url))
mock_response.iter_content.assert_called_once()
mock_response.stream.assert_called_once()

# Read/connect session timeout error
@patch.object(
requests.Session,
"get",
side_effect=requests.exceptions.Timeout("Simulated timeout"),
urllib3.PoolManager,
"request",
side_effect=urllib3.exceptions.MaxRetryError(
urllib3.connectionpool.ConnectionPool("localhost"),
"",
urllib3.exceptions.TimeoutError(),
),
)
def test_session_get_timeout(self, mock_session_get: Mock) -> None:
with self.assertRaises(exceptions.SlowRetrievalError):
Expand Down
8 changes: 5 additions & 3 deletions tests/test_updater_ng.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,9 @@ def test_persist_metadata_fails(
def test_invalid_target_base_url(self) -> None:
info = TargetFile(1, {"sha256": ""}, "targetpath")
with self.assertRaises(exceptions.DownloadError):
self.updater.download_target(info, target_base_url="invalid_url")
self.updater.download_target(
info, target_base_url="http://invalid/"
)

def test_non_existing_target_file(self) -> None:
info = TargetFile(1, {"sha256": ""}, "/non_existing_file.txt")
Expand All @@ -328,7 +330,7 @@ def test_non_existing_target_file(self) -> None:
def test_user_agent(self) -> None:
# test default
self.updater.refresh()
session = next(iter(self.updater._fetcher._sessions.values()))
session = self.updater._fetcher._poolManager
ua = session.headers["User-Agent"]
self.assertEqual(ua[:11], "python-tuf/")

Expand All @@ -341,7 +343,7 @@ def test_user_agent(self) -> None:
config=UpdaterConfig(app_user_agent="MyApp/1.2.3"),
)
updater.refresh()
session = next(iter(updater._fetcher._sessions.values()))
session = updater._fetcher._poolManager
ua = session.headers["User-Agent"]

self.assertEqual(ua[:23], "MyApp/1.2.3 python-tuf/")
Expand Down
2 changes: 2 additions & 0 deletions tuf/ngclient/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,15 @@
# sigstore-python 1.0 still uses the module from there). requests_fetcher
# can be moved out of _internal once sigstore-python 1.0 is not relevant.
from tuf.ngclient._internal.requests_fetcher import RequestsFetcher
from tuf.ngclient._internal.urllib3_fetcher import Urllib3Fetcher
from tuf.ngclient.config import UpdaterConfig
from tuf.ngclient.fetcher import FetcherInterface
from tuf.ngclient.updater import Updater

__all__ = [ # noqa: PLE0604
FetcherInterface.__name__,
RequestsFetcher.__name__,
Urllib3Fetcher.__name__,
TargetFile.__name__,
Updater.__name__,
UpdaterConfig.__name__,
Expand Down
110 changes: 110 additions & 0 deletions tuf/ngclient/_internal/urllib3_fetcher.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
# Copyright 2021, New York University and the TUF contributors
# SPDX-License-Identifier: MIT OR Apache-2.0

"""Provides an implementation of ``FetcherInterface`` using the urllib3 HTTP
library.
"""

from __future__ import annotations

import logging
from typing import TYPE_CHECKING

# Imports
import urllib3

import tuf
from tuf.api import exceptions
from tuf.ngclient.fetcher import FetcherInterface

if TYPE_CHECKING:
from collections.abc import Iterator

# Globals
logger = logging.getLogger(__name__)


# Classes
class Urllib3Fetcher(FetcherInterface):
"""An implementation of ``FetcherInterface`` based on the urllib3 library.
Attributes:
socket_timeout: Timeout in seconds, used for both initial connection
delay and the maximum delay between bytes received.
chunk_size: Chunk size in bytes used when downloading.
"""

def __init__(
self,
socket_timeout: int = 30,
chunk_size: int = 400000,
app_user_agent: str | None = None,
) -> None:
# Default settings
self.socket_timeout: int = socket_timeout # seconds
self.chunk_size: int = chunk_size # bytes

# Create User-Agent.
ua = f"python-tuf/{tuf.__version__}"
if app_user_agent is not None:
ua = f"{app_user_agent} {ua}"

self._poolManager = urllib3.PoolManager(headers={"User-Agent": ua})

def _fetch(self, url: str) -> Iterator[bytes]:
"""Fetch the contents of HTTP/HTTPS url from a remote server.
Args:
url: URL string that represents a file location.
Raises:
exceptions.SlowRetrievalError: Timeout occurs while receiving
data.
exceptions.DownloadHTTPError: HTTP error code is received.
Returns:
Bytes iterator
"""

# Defer downloading the response body with preload_content=False.
# Always set the timeout. This timeout value is interpreted by
# urllib3 as:
# - connect timeout (max delay before first byte is received)
# - read (gap) timeout (max delay between bytes received)
try:
response = self._poolManager.request(
"GET",
url,
preload_content=False,
timeout=urllib3.Timeout(self.socket_timeout),
)
except urllib3.exceptions.MaxRetryError as e:
if isinstance(e.reason, urllib3.exceptions.TimeoutError):
raise exceptions.SlowRetrievalError from e

if response.status >= 400:
response.close()
raise exceptions.DownloadHTTPError(
f"HTTP error occurred with status {response.status}",
response.status,
)

return self._chunks(response)

def _chunks(
self, response: urllib3.response.BaseHTTPResponse
) -> Iterator[bytes]:
"""A generator function to be returned by fetch.
This way the caller of fetch can differentiate between connection
and actual data download.
"""

try:
yield from response.stream(self.chunk_size)
except urllib3.exceptions.MaxRetryError as e:
if isinstance(e.reason, urllib3.exceptions.TimeoutError):
raise exceptions.SlowRetrievalError from e

finally:
response.release_conn()
6 changes: 3 additions & 3 deletions tuf/ngclient/updater.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@

from tuf.api import exceptions
from tuf.api.metadata import Root, Snapshot, TargetFile, Targets, Timestamp
from tuf.ngclient._internal import requests_fetcher, trusted_metadata_set
from tuf.ngclient._internal import trusted_metadata_set, urllib3_fetcher
from tuf.ngclient.config import EnvelopeType, UpdaterConfig

if TYPE_CHECKING:
Expand All @@ -71,7 +71,7 @@ class Updater:
target_base_url: ``Optional``; Default base URL for all remote target
downloads. Can be individually set in ``download_target()``
fetcher: ``Optional``; ``FetcherInterface`` implementation used to
download both metadata and targets. Default is ``RequestsFetcher``
download both metadata and targets. Default is ``Urllib3Fetcher``
config: ``Optional``; ``UpdaterConfig`` could be used to setup common
configuration options.
Expand Down Expand Up @@ -102,7 +102,7 @@ def __init__(
if fetcher is not None:
self._fetcher = fetcher
else:
self._fetcher = requests_fetcher.RequestsFetcher(
self._fetcher = urllib3_fetcher.Urllib3Fetcher(
app_user_agent=self.config.app_user_agent
)

Expand Down

0 comments on commit d42426f

Please sign in to comment.