From 7c0636c380c41f95c1d2358571ade48cae8b3cc5 Mon Sep 17 00:00:00 2001 From: L2501 Date: Sat, 25 Jan 2025 18:46:40 +0000 Subject: [PATCH] [script.module.urllib3] 2.2.3 (#2693) --- script.module.urllib3/addon.xml | 2 +- script.module.urllib3/lib/urllib3/__init__.py | 62 +++ .../lib/urllib3/_base_connection.py | 8 +- .../lib/urllib3/_collections.py | 6 +- .../lib/urllib3/_request_methods.py | 65 ++- script.module.urllib3/lib/urllib3/_version.py | 18 +- .../lib/urllib3/connection.py | 250 ++++++++--- .../lib/urllib3/connectionpool.py | 28 +- .../urllib3/contrib/emscripten/__init__.py | 16 + .../urllib3/contrib/emscripten/connection.py | 254 +++++++++++ .../emscripten/emscripten_fetch_worker.js | 110 +++++ .../lib/urllib3/contrib/emscripten/fetch.py | 418 ++++++++++++++++++ .../lib/urllib3/contrib/emscripten/request.py | 22 + .../urllib3/contrib/emscripten/response.py | 285 ++++++++++++ .../lib/urllib3/contrib/pyopenssl.py | 10 +- .../lib/urllib3/contrib/socks.py | 8 +- .../lib/urllib3/exceptions.py | 7 +- script.module.urllib3/lib/urllib3/fields.py | 8 +- .../lib/urllib3/http2/__init__.py | 53 +++ .../lib/urllib3/http2/connection.py | 356 +++++++++++++++ .../lib/urllib3/http2/probe.py | 87 ++++ .../lib/urllib3/poolmanager.py | 11 +- script.module.urllib3/lib/urllib3/response.py | 193 ++++++-- .../lib/urllib3/util/connection.py | 2 +- .../lib/urllib3/util/request.py | 8 +- .../lib/urllib3/util/retry.py | 10 +- .../lib/urllib3/util/ssl_.py | 35 +- .../lib/urllib3/util/ssltransport.py | 12 +- .../lib/urllib3/util/timeout.py | 4 - 29 files changed, 2171 insertions(+), 177 deletions(-) create mode 100644 script.module.urllib3/lib/urllib3/contrib/emscripten/__init__.py create mode 100644 script.module.urllib3/lib/urllib3/contrib/emscripten/connection.py create mode 100644 script.module.urllib3/lib/urllib3/contrib/emscripten/emscripten_fetch_worker.js create mode 100644 script.module.urllib3/lib/urllib3/contrib/emscripten/fetch.py create mode 100644 script.module.urllib3/lib/urllib3/contrib/emscripten/request.py create mode 100644 script.module.urllib3/lib/urllib3/contrib/emscripten/response.py create mode 100644 script.module.urllib3/lib/urllib3/http2/__init__.py create mode 100644 script.module.urllib3/lib/urllib3/http2/connection.py create mode 100644 script.module.urllib3/lib/urllib3/http2/probe.py diff --git a/script.module.urllib3/addon.xml b/script.module.urllib3/addon.xml index 8381110d26..448741daee 100644 --- a/script.module.urllib3/addon.xml +++ b/script.module.urllib3/addon.xml @@ -1,5 +1,5 @@ - + diff --git a/script.module.urllib3/lib/urllib3/__init__.py b/script.module.urllib3/lib/urllib3/__init__.py index 46c89762c2..3fe782c8a4 100644 --- a/script.module.urllib3/lib/urllib3/__init__.py +++ b/script.module.urllib3/lib/urllib3/__init__.py @@ -6,6 +6,7 @@ # Set default logging handler to avoid "No handler found" warnings. import logging +import sys import typing import warnings from logging import NullHandler @@ -132,6 +133,61 @@ def request( Therefore, its side effects could be shared across dependencies relying on it. To avoid side effects create a new ``PoolManager`` instance and use it instead. The method does not accept low-level ``**urlopen_kw`` keyword arguments. + + :param method: + HTTP request method (such as GET, POST, PUT, etc.) + + :param url: + The URL to perform the request on. + + :param body: + Data to send in the request body, either :class:`str`, :class:`bytes`, + an iterable of :class:`str`/:class:`bytes`, or a file-like object. + + :param fields: + Data to encode and send in the request body. + + :param headers: + Dictionary of custom headers to send, such as User-Agent, + If-None-Match, etc. + + :param bool preload_content: + If True, the response's body will be preloaded into memory. + + :param bool decode_content: + If True, will attempt to decode the body based on the + 'content-encoding' header. + + :param redirect: + If True, automatically handle redirects (status codes 301, 302, + 303, 307, 308). Each redirect counts as a retry. Disabling retries + will disable redirect, too. + + :param retries: + Configure the number of retries to allow before raising a + :class:`~urllib3.exceptions.MaxRetryError` exception. + + If ``None`` (default) will retry 3 times, see ``Retry.DEFAULT``. Pass a + :class:`~urllib3.util.retry.Retry` object for fine-grained control + over different types of retries. + Pass an integer number to retry connection errors that many times, + but no other types of errors. Pass zero to never retry. + + If ``False``, then retries are disabled and any exception is raised + immediately. Also, instead of raising a MaxRetryError on redirects, + the redirect response will be returned. + + :type retries: :class:`~urllib3.util.retry.Retry`, False, or an int. + + :param timeout: + If specified, overrides the default timeout for this one + request. It may be a float (in seconds) or an instance of + :class:`urllib3.util.Timeout`. + + :param json: + Data to encode and send as JSON with UTF-encoded in the request body. + The ``"Content-Type"`` header will be set to ``"application/json"`` + unless specified otherwise. """ return _DEFAULT_POOL.request( @@ -147,3 +203,9 @@ def request( timeout=timeout, json=json, ) + + +if sys.platform == "emscripten": + from .contrib.emscripten import inject_into_urllib3 # noqa: 401 + + inject_into_urllib3() diff --git a/script.module.urllib3/lib/urllib3/_base_connection.py b/script.module.urllib3/lib/urllib3/_base_connection.py index bb349c744b..29ca334879 100644 --- a/script.module.urllib3/lib/urllib3/_base_connection.py +++ b/script.module.urllib3/lib/urllib3/_base_connection.py @@ -12,7 +12,7 @@ class ProxyConfig(typing.NamedTuple): ssl_context: ssl.SSLContext | None use_forwarding_for_https: bool - assert_hostname: None | str | Literal[False] + assert_hostname: None | str | typing.Literal[False] assert_fingerprint: str | None @@ -28,7 +28,7 @@ class _ResponseOptions(typing.NamedTuple): if typing.TYPE_CHECKING: import ssl - from typing import Literal, Protocol + from typing import Protocol from .response import BaseHTTPResponse @@ -124,7 +124,7 @@ class BaseHTTPSConnection(BaseHTTPConnection, Protocol): # Certificate verification methods cert_reqs: int | str | None - assert_hostname: None | str | Literal[False] + assert_hostname: None | str | typing.Literal[False] assert_fingerprint: str | None ssl_context: ssl.SSLContext | None @@ -155,7 +155,7 @@ def __init__( proxy: Url | None = None, proxy_config: ProxyConfig | None = None, cert_reqs: int | str | None = None, - assert_hostname: None | str | Literal[False] = None, + assert_hostname: None | str | typing.Literal[False] = None, assert_fingerprint: str | None = None, server_hostname: str | None = None, ssl_context: ssl.SSLContext | None = None, diff --git a/script.module.urllib3/lib/urllib3/_collections.py b/script.module.urllib3/lib/urllib3/_collections.py index 55b0324797..8a4409a122 100644 --- a/script.module.urllib3/lib/urllib3/_collections.py +++ b/script.module.urllib3/lib/urllib3/_collections.py @@ -427,7 +427,7 @@ def _copy_from(self, other: HTTPHeaderDict) -> None: val = other.getlist(key) self._container[key.lower()] = [key, *val] - def copy(self) -> HTTPHeaderDict: + def copy(self) -> Self: clone = type(self)() clone._copy_from(self) return clone @@ -462,7 +462,7 @@ def __ior__(self, other: object) -> HTTPHeaderDict: self.extend(maybe_constructable) return self - def __or__(self, other: object) -> HTTPHeaderDict: + def __or__(self, other: object) -> Self: # Supports merging header dicts using operator | # combining items with add instead of __setitem__ maybe_constructable = ensure_can_construct_http_header_dict(other) @@ -472,7 +472,7 @@ def __or__(self, other: object) -> HTTPHeaderDict: result.extend(maybe_constructable) return result - def __ror__(self, other: object) -> HTTPHeaderDict: + def __ror__(self, other: object) -> Self: # Supports merging header dicts using operator | when other is on left side # combining items with add instead of __setitem__ maybe_constructable = ensure_can_construct_http_header_dict(other) diff --git a/script.module.urllib3/lib/urllib3/_request_methods.py b/script.module.urllib3/lib/urllib3/_request_methods.py index 1d0f3465ad..03186e5129 100644 --- a/script.module.urllib3/lib/urllib3/_request_methods.py +++ b/script.module.urllib3/lib/urllib3/_request_methods.py @@ -85,6 +85,29 @@ def request( option to drop down to more specific methods when necessary, such as :meth:`request_encode_url`, :meth:`request_encode_body`, or even the lowest level :meth:`urlopen`. + + :param method: + HTTP request method (such as GET, POST, PUT, etc.) + + :param url: + The URL to perform the request on. + + :param body: + Data to send in the request body, either :class:`str`, :class:`bytes`, + an iterable of :class:`str`/:class:`bytes`, or a file-like object. + + :param fields: + Data to encode and send in the URL or request body, depending on ``method``. + + :param headers: + Dictionary of custom headers to send, such as User-Agent, + If-None-Match, etc. If None, pool headers are used. If provided, + these headers completely replace any pool-specific headers. + + :param json: + Data to encode and send as JSON with UTF-encoded in the request body. + The ``"Content-Type"`` header will be set to ``"application/json"`` + unless specified otherwise. """ method = method.upper() @@ -95,9 +118,11 @@ def request( if json is not None: if headers is None: - headers = self.headers.copy() # type: ignore + headers = self.headers + if not ("content-type" in map(str.lower, headers.keys())): - headers["Content-Type"] = "application/json" # type: ignore + headers = HTTPHeaderDict(headers) + headers["Content-Type"] = "application/json" body = _json.dumps(json, separators=(",", ":"), ensure_ascii=False).encode( "utf-8" @@ -130,6 +155,20 @@ def request_encode_url( """ Make a request using :meth:`urlopen` with the ``fields`` encoded in the url. This is useful for request methods like GET, HEAD, DELETE, etc. + + :param method: + HTTP request method (such as GET, POST, PUT, etc.) + + :param url: + The URL to perform the request on. + + :param fields: + Data to encode and send in the URL. + + :param headers: + Dictionary of custom headers to send, such as User-Agent, + If-None-Match, etc. If None, pool headers are used. If provided, + these headers completely replace any pool-specific headers. """ if headers is None: headers = self.headers @@ -186,6 +225,28 @@ def request_encode_body( be overwritten because it depends on the dynamic random boundary string which is used to compose the body of the request. The random boundary string can be explicitly set with the ``multipart_boundary`` parameter. + + :param method: + HTTP request method (such as GET, POST, PUT, etc.) + + :param url: + The URL to perform the request on. + + :param fields: + Data to encode and send in the request body. + + :param headers: + Dictionary of custom headers to send, such as User-Agent, + If-None-Match, etc. If None, pool headers are used. If provided, + these headers completely replace any pool-specific headers. + + :param encode_multipart: + If True, encode the ``fields`` using the multipart/form-data MIME + format. + + :param multipart_boundary: + If not specified, then a random boundary will be generated using + :func:`urllib3.filepost.choose_boundary`. """ if headers is None: headers = self.headers diff --git a/script.module.urllib3/lib/urllib3/_version.py b/script.module.urllib3/lib/urllib3/_version.py index 409ba3f53a..eb8b5c285a 100644 --- a/script.module.urllib3/lib/urllib3/_version.py +++ b/script.module.urllib3/lib/urllib3/_version.py @@ -1,4 +1,16 @@ -# This file is protected via CODEOWNERS -from __future__ import annotations +# file generated by setuptools_scm +# don't change, don't track in version control +TYPE_CHECKING = False +if TYPE_CHECKING: + from typing import Tuple, Union + VERSION_TUPLE = Tuple[Union[int, str], ...] +else: + VERSION_TUPLE = object -__version__ = "2.1.0" +version: str +__version__: str +__version_tuple__: VERSION_TUPLE +version_tuple: VERSION_TUPLE + +__version__ = version = '2.2.3' +__version_tuple__ = version_tuple = (2, 2, 3) diff --git a/script.module.urllib3/lib/urllib3/connection.py b/script.module.urllib3/lib/urllib3/connection.py index 38a2fd6dfa..7cbef7d5a9 100644 --- a/script.module.urllib3/lib/urllib3/connection.py +++ b/script.module.urllib3/lib/urllib3/connection.py @@ -1,11 +1,13 @@ from __future__ import annotations import datetime +import http.client import logging import os import re import socket import sys +import threading import typing import warnings from http.client import HTTPConnection as _HTTPConnection @@ -14,13 +16,12 @@ from socket import timeout as SocketTimeout if typing.TYPE_CHECKING: - from typing import Literal - from .response import HTTPResponse from .util.ssl_ import _TYPE_PEER_CERT_RET_DICT from .util.ssltransport import SSLTransport from ._collections import HTTPHeaderDict +from .http2 import probe as http2_probe from .util.response import assert_header_parsing from .util.timeout import _DEFAULT_TIMEOUT, _TYPE_TIMEOUT, Timeout from .util.util import to_str @@ -73,7 +74,7 @@ class BaseSSLError(BaseException): # type: ignore[no-redef] # When it comes time to update this value as a part of regular maintenance # (ie test_recent_date is failing) update it to ~6 months before the current date. -RECENT_DATE = datetime.date(2022, 1, 1) +RECENT_DATE = datetime.date(2023, 6, 1) _CONTAINS_CONTROL_CHAR_RE = re.compile(r"[^-!#$%&'*+.^_`|~0-9a-zA-Z]") @@ -160,11 +161,6 @@ def __init__( self._tunnel_port: int | None = None self._tunnel_scheme: str | None = None - # https://github.com/python/mypy/issues/4125 - # Mypy treats this as LSP violation, which is considered a bug. - # If `host` is made a property it violates LSP, because a writeable attribute is overridden with a read-only one. - # However, there is also a `host` setter so LSP is not violated. - # Potentially, a `@host.deleter` might be needed depending on how this issue will be fixed. @property def host(self) -> str: """ @@ -239,6 +235,46 @@ def set_tunnel( super().set_tunnel(host, port=port, headers=headers) self._tunnel_scheme = scheme + if sys.version_info < (3, 11, 4): + + def _tunnel(self) -> None: + _MAXLINE = http.client._MAXLINE # type: ignore[attr-defined] + connect = b"CONNECT %s:%d HTTP/1.0\r\n" % ( # type: ignore[str-format] + self._tunnel_host.encode("ascii"), # type: ignore[union-attr] + self._tunnel_port, + ) + headers = [connect] + for header, value in self._tunnel_headers.items(): # type: ignore[attr-defined] + headers.append(f"{header}: {value}\r\n".encode("latin-1")) + headers.append(b"\r\n") + # Making a single send() call instead of one per line encourages + # the host OS to use a more optimal packet size instead of + # potentially emitting a series of small packets. + self.send(b"".join(headers)) + del headers + + response = self.response_class(self.sock, method=self._method) # type: ignore[attr-defined] + try: + (version, code, message) = response._read_status() # type: ignore[attr-defined] + + if code != http.HTTPStatus.OK: + self.close() + raise OSError(f"Tunnel connection failed: {code} {message.strip()}") + while True: + line = response.fp.readline(_MAXLINE + 1) + if len(line) > _MAXLINE: + raise http.client.LineTooLong("header line") + if not line: + # for sites which EOF without sending a trailer + break + if line in (b"\r\n", b"\n", b""): + break + + if self.debuglevel > 0: + print("header:", line.decode()) + finally: + response.close() + def connect(self) -> None: self.sock = self._new_conn() if self._tunnel_host: @@ -246,13 +282,16 @@ def connect(self) -> None: self._has_connected_to_proxy = True # TODO: Fix tunnel so it doesn't depend on self.sock state. - self._tunnel() # type: ignore[attr-defined] + self._tunnel() # If there's a proxy to be connected to we are fully connected. # This is set twice (once above and here) due to forwarding proxies # not using tunnelling. self._has_connected_to_proxy = bool(self.proxy) + if self._has_connected_to_proxy: + self.proxy_is_verified = False + @property def is_closed(self) -> bool: return self.sock is None @@ -267,6 +306,13 @@ def is_connected(self) -> bool: def has_connected_to_proxy(self) -> bool: return self._has_connected_to_proxy + @property + def proxy_is_forwarding(self) -> bool: + """ + Return True if a forwarding proxy is configured, else return False + """ + return bool(self.proxy) and self._tunnel_host is None + def close(self) -> None: try: super().close() @@ -302,7 +348,7 @@ def putrequest( method, url, skip_host=skip_host, skip_accept_encoding=skip_accept_encoding ) - def putheader(self, header: str, *values: str) -> None: + def putheader(self, header: str, *values: str) -> None: # type: ignore[override] """""" if not any(isinstance(v, str) and v == SKIP_HEADER for v in values): super().putheader(header, *values) @@ -477,6 +523,7 @@ def getresponse( # type: ignore[override] headers=headers, status=httplib_response.status, version=httplib_response.version, + version_string=getattr(self, "_http_vsn_str", "HTTP/?"), reason=httplib_response.reason, preload_content=resp_options.preload_content, decode_content=resp_options.decode_content, @@ -504,6 +551,7 @@ class HTTPSConnection(HTTPConnection): ssl_minimum_version: int | None = None ssl_maximum_version: int | None = None assert_fingerprint: str | None = None + _connect_callback: typing.Callable[..., None] | None = None def __init__( self, @@ -518,7 +566,7 @@ def __init__( proxy: Url | None = None, proxy_config: ProxyConfig | None = None, cert_reqs: int | str | None = None, - assert_hostname: None | str | Literal[False] = None, + assert_hostname: None | str | typing.Literal[False] = None, assert_fingerprint: str | None = None, server_hostname: str | None = None, ssl_context: ssl.SSLContext | None = None, @@ -564,6 +612,7 @@ def __init__( else: cert_reqs = resolve_cert_reqs(None) self.cert_reqs = cert_reqs + self._connect_callback = None def set_cert( self, @@ -572,7 +621,7 @@ def set_cert( cert_reqs: int | str | None = None, key_password: str | None = None, ca_certs: str | None = None, - assert_hostname: None | str | Literal[False] = None, + assert_hostname: None | str | typing.Literal[False] = None, assert_fingerprint: str | None = None, ca_cert_dir: str | None = None, ca_cert_data: None | str | bytes = None, @@ -607,64 +656,142 @@ def set_cert( self.ca_cert_data = ca_cert_data def connect(self) -> None: - sock: socket.socket | ssl.SSLSocket - self.sock = sock = self._new_conn() - server_hostname: str = self.host - tls_in_tls = False - - # Do we need to establish a tunnel? - if self._tunnel_host is not None: - # We're tunneling to an HTTPS origin so need to do TLS-in-TLS. - if self._tunnel_scheme == "https": - self.sock = sock = self._connect_tls_proxy(self.host, sock) - tls_in_tls = True + # Today we don't need to be doing this step before the /actual/ socket + # connection, however in the future we'll need to decide whether to + # create a new socket or re-use an existing "shared" socket as a part + # of the HTTP/2 handshake dance. + if self._tunnel_host is not None and self._tunnel_port is not None: + probe_http2_host = self._tunnel_host + probe_http2_port = self._tunnel_port + else: + probe_http2_host = self.host + probe_http2_port = self.port + + # Check if the target origin supports HTTP/2. + # If the value comes back as 'None' it means that the current thread + # is probing for HTTP/2 support. Otherwise, we're waiting for another + # probe to complete, or we get a value right away. + target_supports_http2: bool | None + if "h2" in ssl_.ALPN_PROTOCOLS: + target_supports_http2 = http2_probe.acquire_and_get( + host=probe_http2_host, port=probe_http2_port + ) + else: + # If HTTP/2 isn't going to be offered it doesn't matter if + # the target supports HTTP/2. Don't want to make a probe. + target_supports_http2 = False + + if self._connect_callback is not None: + self._connect_callback( + "before connect", + thread_id=threading.get_ident(), + target_supports_http2=target_supports_http2, + ) - # If we're tunneling it means we're connected to our proxy. - self._has_connected_to_proxy = True + try: + sock: socket.socket | ssl.SSLSocket + self.sock = sock = self._new_conn() + server_hostname: str = self.host + tls_in_tls = False + + # Do we need to establish a tunnel? + if self._tunnel_host is not None: + # We're tunneling to an HTTPS origin so need to do TLS-in-TLS. + if self._tunnel_scheme == "https": + # _connect_tls_proxy will verify and assign proxy_is_verified + self.sock = sock = self._connect_tls_proxy(self.host, sock) + tls_in_tls = True + elif self._tunnel_scheme == "http": + self.proxy_is_verified = False + + # If we're tunneling it means we're connected to our proxy. + self._has_connected_to_proxy = True + + self._tunnel() + # Override the host with the one we're requesting data from. + server_hostname = self._tunnel_host + + if self.server_hostname is not None: + server_hostname = self.server_hostname + + is_time_off = datetime.date.today() < RECENT_DATE + if is_time_off: + warnings.warn( + ( + f"System time is way off (before {RECENT_DATE}). This will probably " + "lead to SSL verification errors" + ), + SystemTimeWarning, + ) - self._tunnel() # type: ignore[attr-defined] - # Override the host with the one we're requesting data from. - server_hostname = self._tunnel_host - - if self.server_hostname is not None: - server_hostname = self.server_hostname - - is_time_off = datetime.date.today() < RECENT_DATE - if is_time_off: - warnings.warn( - ( - f"System time is way off (before {RECENT_DATE}). This will probably " - "lead to SSL verification errors" - ), - SystemTimeWarning, + # Remove trailing '.' from fqdn hostnames to allow certificate validation + server_hostname_rm_dot = server_hostname.rstrip(".") + + sock_and_verified = _ssl_wrap_socket_and_match_hostname( + sock=sock, + cert_reqs=self.cert_reqs, + ssl_version=self.ssl_version, + ssl_minimum_version=self.ssl_minimum_version, + ssl_maximum_version=self.ssl_maximum_version, + ca_certs=self.ca_certs, + ca_cert_dir=self.ca_cert_dir, + ca_cert_data=self.ca_cert_data, + cert_file=self.cert_file, + key_file=self.key_file, + key_password=self.key_password, + server_hostname=server_hostname_rm_dot, + ssl_context=self.ssl_context, + tls_in_tls=tls_in_tls, + assert_hostname=self.assert_hostname, + assert_fingerprint=self.assert_fingerprint, ) + self.sock = sock_and_verified.socket + + # If an error occurs during connection/handshake we may need to release + # our lock so another connection can probe the origin. + except BaseException: + if self._connect_callback is not None: + self._connect_callback( + "after connect failure", + thread_id=threading.get_ident(), + target_supports_http2=target_supports_http2, + ) - sock_and_verified = _ssl_wrap_socket_and_match_hostname( - sock=sock, - cert_reqs=self.cert_reqs, - ssl_version=self.ssl_version, - ssl_minimum_version=self.ssl_minimum_version, - ssl_maximum_version=self.ssl_maximum_version, - ca_certs=self.ca_certs, - ca_cert_dir=self.ca_cert_dir, - ca_cert_data=self.ca_cert_data, - cert_file=self.cert_file, - key_file=self.key_file, - key_password=self.key_password, - server_hostname=server_hostname, - ssl_context=self.ssl_context, - tls_in_tls=tls_in_tls, - assert_hostname=self.assert_hostname, - assert_fingerprint=self.assert_fingerprint, - ) - self.sock = sock_and_verified.socket - self.is_verified = sock_and_verified.is_verified + if target_supports_http2 is None: + http2_probe.set_and_release( + host=probe_http2_host, port=probe_http2_port, supports_http2=None + ) + raise + + # If this connection doesn't know if the origin supports HTTP/2 + # we report back to the HTTP/2 probe our result. + if target_supports_http2 is None: + supports_http2 = sock_and_verified.socket.selected_alpn_protocol() == "h2" + http2_probe.set_and_release( + host=probe_http2_host, + port=probe_http2_port, + supports_http2=supports_http2, + ) + + # Forwarding proxies can never have a verified target since + # the proxy is the one doing the verification. Should instead + # use a CONNECT tunnel in order to verify the target. + # See: https://github.com/urllib3/urllib3/issues/3267. + if self.proxy_is_forwarding: + self.is_verified = False + else: + self.is_verified = sock_and_verified.is_verified # If there's a proxy to be connected to we are fully connected. # This is set twice (once above and here) due to forwarding proxies # not using tunnelling. self._has_connected_to_proxy = bool(self.proxy) + # Set `self.proxy_is_verified` unless it's already set while + # establishing a tunnel. + if self._has_connected_to_proxy and self.proxy_is_verified is None: + self.proxy_is_verified = sock_and_verified.is_verified + def _connect_tls_proxy(self, hostname: str, sock: socket.socket) -> ssl.SSLSocket: """ Establish a TLS connection to the proxy using the provided SSL context. @@ -718,7 +845,7 @@ def _ssl_wrap_socket_and_match_hostname( ca_certs: str | None, ca_cert_dir: str | None, ca_cert_data: None | str | bytes, - assert_hostname: None | str | Literal[False], + assert_hostname: None | str | typing.Literal[False], assert_fingerprint: str | None, server_hostname: str | None, ssl_context: ssl.SSLContext | None, @@ -864,6 +991,7 @@ def _wrap_proxy_error(err: Exception, proxy_scheme: str | None) -> ProxyError: is_likely_http_proxy = ( "wrong version number" in error_normalized or "unknown protocol" in error_normalized + or "record layer failure" in error_normalized ) http_proxy_warning = ( ". Your proxy appears to only use HTTP and not HTTPS, " diff --git a/script.module.urllib3/lib/urllib3/connectionpool.py b/script.module.urllib3/lib/urllib3/connectionpool.py index 70048b7aed..a2c3cf6098 100644 --- a/script.module.urllib3/lib/urllib3/connectionpool.py +++ b/script.module.urllib3/lib/urllib3/connectionpool.py @@ -53,7 +53,8 @@ if typing.TYPE_CHECKING: import ssl - from typing import Literal + + from typing_extensions import Self from ._base_connection import BaseHTTPConnection, BaseHTTPSConnection @@ -61,8 +62,6 @@ _TYPE_TIMEOUT = typing.Union[Timeout, float, _TYPE_DEFAULT, None] -_SelfT = typing.TypeVar("_SelfT") - # Pool objects class ConnectionPool: @@ -95,7 +94,7 @@ def __init__(self, host: str, port: int | None = None) -> None: def __str__(self) -> str: return f"{type(self).__name__}(host={self.host!r}, port={self.port!r})" - def __enter__(self: _SelfT) -> _SelfT: + def __enter__(self) -> Self: return self def __exit__( @@ -103,7 +102,7 @@ def __exit__( exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None, - ) -> Literal[False]: + ) -> typing.Literal[False]: self.close() # Return False to re-raise any potential exceptions return False @@ -511,9 +510,10 @@ def _make_request( pass except OSError as e: # MacOS/Linux - # EPROTOTYPE is needed on macOS + # EPROTOTYPE and ECONNRESET are needed on macOS # https://erickt.github.io/blog/2014/11/19/adventures-in-debugging-a-potential-osx-kernel-bug/ - if e.errno != errno.EPROTOTYPE: + # Condition changed later to emit ECONNRESET instead of only EPROTOTYPE. + if e.errno != errno.EPROTOTYPE and e.errno != errno.ECONNRESET: raise # Reset the timeout for the recv() on the socket @@ -544,16 +544,15 @@ def _make_request( response._pool = self # type: ignore[attr-defined] log.debug( - '%s://%s:%s "%s %s %s" %s %s', + '%s://%s:%s "%s %s HTTP/%s" %s %s', self.scheme, self.host, self.port, method, url, - # HTTP version - conn._http_vsn_str, # type: ignore[attr-defined] + response.version, response.status, - response.length_remaining, # type: ignore[attr-defined] + response.length_remaining, ) return response @@ -646,7 +645,7 @@ def urlopen( # type: ignore[override] Configure the number of retries to allow before raising a :class:`~urllib3.exceptions.MaxRetryError` exception. - Pass ``None`` to retry until you receive a response. Pass a + If ``None`` (default) will retry 3 times, see ``Retry.DEFAULT``. Pass a :class:`~urllib3.util.retry.Retry` object for fine-grained control over different types of retries. Pass an integer number to retry connection errors that many times, @@ -999,7 +998,7 @@ def __init__( ssl_version: int | str | None = None, ssl_minimum_version: ssl.TLSVersion | None = None, ssl_maximum_version: ssl.TLSVersion | None = None, - assert_hostname: str | Literal[False] | None = None, + assert_hostname: str | typing.Literal[False] | None = None, assert_fingerprint: str | None = None, ca_cert_dir: str | None = None, **conn_kw: typing.Any, @@ -1095,7 +1094,8 @@ def _validate_conn(self, conn: BaseHTTPConnection) -> None: if conn.is_closed: conn.connect() - if not conn.is_verified: + # TODO revise this, see https://github.com/urllib3/urllib3/issues/2791 + if not conn.is_verified and not conn.proxy_is_verified: warnings.warn( ( f"Unverified HTTPS request is being made to host '{conn.host}'. " diff --git a/script.module.urllib3/lib/urllib3/contrib/emscripten/__init__.py b/script.module.urllib3/lib/urllib3/contrib/emscripten/__init__.py new file mode 100644 index 0000000000..8a3c5bebdc --- /dev/null +++ b/script.module.urllib3/lib/urllib3/contrib/emscripten/__init__.py @@ -0,0 +1,16 @@ +from __future__ import annotations + +import urllib3.connection + +from ...connectionpool import HTTPConnectionPool, HTTPSConnectionPool +from .connection import EmscriptenHTTPConnection, EmscriptenHTTPSConnection + + +def inject_into_urllib3() -> None: + # override connection classes to use emscripten specific classes + # n.b. mypy complains about the overriding of classes below + # if it isn't ignored + HTTPConnectionPool.ConnectionCls = EmscriptenHTTPConnection + HTTPSConnectionPool.ConnectionCls = EmscriptenHTTPSConnection + urllib3.connection.HTTPConnection = EmscriptenHTTPConnection # type: ignore[misc,assignment] + urllib3.connection.HTTPSConnection = EmscriptenHTTPSConnection # type: ignore[misc,assignment] diff --git a/script.module.urllib3/lib/urllib3/contrib/emscripten/connection.py b/script.module.urllib3/lib/urllib3/contrib/emscripten/connection.py new file mode 100644 index 0000000000..2ceb4579eb --- /dev/null +++ b/script.module.urllib3/lib/urllib3/contrib/emscripten/connection.py @@ -0,0 +1,254 @@ +from __future__ import annotations + +import os +import typing + +# use http.client.HTTPException for consistency with non-emscripten +from http.client import HTTPException as HTTPException # noqa: F401 +from http.client import ResponseNotReady + +from ..._base_connection import _TYPE_BODY +from ...connection import HTTPConnection, ProxyConfig, port_by_scheme +from ...exceptions import TimeoutError +from ...response import BaseHTTPResponse +from ...util.connection import _TYPE_SOCKET_OPTIONS +from ...util.timeout import _DEFAULT_TIMEOUT, _TYPE_TIMEOUT +from ...util.url import Url +from .fetch import _RequestError, _TimeoutError, send_request, send_streaming_request +from .request import EmscriptenRequest +from .response import EmscriptenHttpResponseWrapper, EmscriptenResponse + +if typing.TYPE_CHECKING: + from ..._base_connection import BaseHTTPConnection, BaseHTTPSConnection + + +class EmscriptenHTTPConnection: + default_port: typing.ClassVar[int] = port_by_scheme["http"] + default_socket_options: typing.ClassVar[_TYPE_SOCKET_OPTIONS] + + timeout: None | (float) + + host: str + port: int + blocksize: int + source_address: tuple[str, int] | None + socket_options: _TYPE_SOCKET_OPTIONS | None + + proxy: Url | None + proxy_config: ProxyConfig | None + + is_verified: bool = False + proxy_is_verified: bool | None = None + + _response: EmscriptenResponse | None + + def __init__( + self, + host: str, + port: int = 0, + *, + timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT, + source_address: tuple[str, int] | None = None, + blocksize: int = 8192, + socket_options: _TYPE_SOCKET_OPTIONS | None = None, + proxy: Url | None = None, + proxy_config: ProxyConfig | None = None, + ) -> None: + self.host = host + self.port = port + self.timeout = timeout if isinstance(timeout, float) else 0.0 + self.scheme = "http" + self._closed = True + self._response = None + # ignore these things because we don't + # have control over that stuff + self.proxy = None + self.proxy_config = None + self.blocksize = blocksize + self.source_address = None + self.socket_options = None + self.is_verified = False + + def set_tunnel( + self, + host: str, + port: int | None = 0, + headers: typing.Mapping[str, str] | None = None, + scheme: str = "http", + ) -> None: + pass + + def connect(self) -> None: + pass + + def request( + self, + method: str, + url: str, + body: _TYPE_BODY | None = None, + headers: typing.Mapping[str, str] | None = None, + # We know *at least* botocore is depending on the order of the + # first 3 parameters so to be safe we only mark the later ones + # as keyword-only to ensure we have space to extend. + *, + chunked: bool = False, + preload_content: bool = True, + decode_content: bool = True, + enforce_content_length: bool = True, + ) -> None: + self._closed = False + if url.startswith("/"): + # no scheme / host / port included, make a full url + url = f"{self.scheme}://{self.host}:{self.port}" + url + request = EmscriptenRequest( + url=url, + method=method, + timeout=self.timeout if self.timeout else 0, + decode_content=decode_content, + ) + request.set_body(body) + if headers: + for k, v in headers.items(): + request.set_header(k, v) + self._response = None + try: + if not preload_content: + self._response = send_streaming_request(request) + if self._response is None: + self._response = send_request(request) + except _TimeoutError as e: + raise TimeoutError(e.message) from e + except _RequestError as e: + raise HTTPException(e.message) from e + + def getresponse(self) -> BaseHTTPResponse: + if self._response is not None: + return EmscriptenHttpResponseWrapper( + internal_response=self._response, + url=self._response.request.url, + connection=self, + ) + else: + raise ResponseNotReady() + + def close(self) -> None: + self._closed = True + self._response = None + + @property + def is_closed(self) -> bool: + """Whether the connection either is brand new or has been previously closed. + If this property is True then both ``is_connected`` and ``has_connected_to_proxy`` + properties must be False. + """ + return self._closed + + @property + def is_connected(self) -> bool: + """Whether the connection is actively connected to any origin (proxy or target)""" + return True + + @property + def has_connected_to_proxy(self) -> bool: + """Whether the connection has successfully connected to its proxy. + This returns False if no proxy is in use. Used to determine whether + errors are coming from the proxy layer or from tunnelling to the target origin. + """ + return False + + +class EmscriptenHTTPSConnection(EmscriptenHTTPConnection): + default_port = port_by_scheme["https"] + # all this is basically ignored, as browser handles https + cert_reqs: int | str | None = None + ca_certs: str | None = None + ca_cert_dir: str | None = None + ca_cert_data: None | str | bytes = None + cert_file: str | None + key_file: str | None + key_password: str | None + ssl_context: typing.Any | None + ssl_version: int | str | None = None + ssl_minimum_version: int | None = None + ssl_maximum_version: int | None = None + assert_hostname: None | str | typing.Literal[False] + assert_fingerprint: str | None = None + + def __init__( + self, + host: str, + port: int = 0, + *, + timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT, + source_address: tuple[str, int] | None = None, + blocksize: int = 16384, + socket_options: None + | _TYPE_SOCKET_OPTIONS = HTTPConnection.default_socket_options, + proxy: Url | None = None, + proxy_config: ProxyConfig | None = None, + cert_reqs: int | str | None = None, + assert_hostname: None | str | typing.Literal[False] = None, + assert_fingerprint: str | None = None, + server_hostname: str | None = None, + ssl_context: typing.Any | None = None, + ca_certs: str | None = None, + ca_cert_dir: str | None = None, + ca_cert_data: None | str | bytes = None, + ssl_minimum_version: int | None = None, + ssl_maximum_version: int | None = None, + ssl_version: int | str | None = None, # Deprecated + cert_file: str | None = None, + key_file: str | None = None, + key_password: str | None = None, + ) -> None: + super().__init__( + host, + port=port, + timeout=timeout, + source_address=source_address, + blocksize=blocksize, + socket_options=socket_options, + proxy=proxy, + proxy_config=proxy_config, + ) + self.scheme = "https" + + self.key_file = key_file + self.cert_file = cert_file + self.key_password = key_password + self.ssl_context = ssl_context + self.server_hostname = server_hostname + self.assert_hostname = assert_hostname + self.assert_fingerprint = assert_fingerprint + self.ssl_version = ssl_version + self.ssl_minimum_version = ssl_minimum_version + self.ssl_maximum_version = ssl_maximum_version + self.ca_certs = ca_certs and os.path.expanduser(ca_certs) + self.ca_cert_dir = ca_cert_dir and os.path.expanduser(ca_cert_dir) + self.ca_cert_data = ca_cert_data + + self.cert_reqs = None + + # The browser will automatically verify all requests. + # We have no control over that setting. + self.is_verified = True + + def set_cert( + self, + key_file: str | None = None, + cert_file: str | None = None, + cert_reqs: int | str | None = None, + key_password: str | None = None, + ca_certs: str | None = None, + assert_hostname: None | str | typing.Literal[False] = None, + assert_fingerprint: str | None = None, + ca_cert_dir: str | None = None, + ca_cert_data: None | str | bytes = None, + ) -> None: + pass + + +# verify that this class implements BaseHTTP(s) connection correctly +if typing.TYPE_CHECKING: + _supports_http_protocol: BaseHTTPConnection = EmscriptenHTTPConnection("", 0) + _supports_https_protocol: BaseHTTPSConnection = EmscriptenHTTPSConnection("", 0) diff --git a/script.module.urllib3/lib/urllib3/contrib/emscripten/emscripten_fetch_worker.js b/script.module.urllib3/lib/urllib3/contrib/emscripten/emscripten_fetch_worker.js new file mode 100644 index 0000000000..243b86222f --- /dev/null +++ b/script.module.urllib3/lib/urllib3/contrib/emscripten/emscripten_fetch_worker.js @@ -0,0 +1,110 @@ +let Status = { + SUCCESS_HEADER: -1, + SUCCESS_EOF: -2, + ERROR_TIMEOUT: -3, + ERROR_EXCEPTION: -4, +}; + +let connections = {}; +let nextConnectionID = 1; +const encoder = new TextEncoder(); + +self.addEventListener("message", async function (event) { + if (event.data.close) { + let connectionID = event.data.close; + delete connections[connectionID]; + return; + } else if (event.data.getMore) { + let connectionID = event.data.getMore; + let { curOffset, value, reader, intBuffer, byteBuffer } = + connections[connectionID]; + // if we still have some in buffer, then just send it back straight away + if (!value || curOffset >= value.length) { + // read another buffer if required + try { + let readResponse = await reader.read(); + + if (readResponse.done) { + // read everything - clear connection and return + delete connections[connectionID]; + Atomics.store(intBuffer, 0, Status.SUCCESS_EOF); + Atomics.notify(intBuffer, 0); + // finished reading successfully + // return from event handler + return; + } + curOffset = 0; + connections[connectionID].value = readResponse.value; + value = readResponse.value; + } catch (error) { + console.log("Request exception:", error); + let errorBytes = encoder.encode(error.message); + let written = errorBytes.length; + byteBuffer.set(errorBytes); + intBuffer[1] = written; + Atomics.store(intBuffer, 0, Status.ERROR_EXCEPTION); + Atomics.notify(intBuffer, 0); + } + } + + // send as much buffer as we can + let curLen = value.length - curOffset; + if (curLen > byteBuffer.length) { + curLen = byteBuffer.length; + } + byteBuffer.set(value.subarray(curOffset, curOffset + curLen), 0); + + Atomics.store(intBuffer, 0, curLen); // store current length in bytes + Atomics.notify(intBuffer, 0); + curOffset += curLen; + connections[connectionID].curOffset = curOffset; + + return; + } else { + // start fetch + let connectionID = nextConnectionID; + nextConnectionID += 1; + const intBuffer = new Int32Array(event.data.buffer); + const byteBuffer = new Uint8Array(event.data.buffer, 8); + try { + const response = await fetch(event.data.url, event.data.fetchParams); + // return the headers first via textencoder + var headers = []; + for (const pair of response.headers.entries()) { + headers.push([pair[0], pair[1]]); + } + let headerObj = { + headers: headers, + status: response.status, + connectionID, + }; + const headerText = JSON.stringify(headerObj); + let headerBytes = encoder.encode(headerText); + let written = headerBytes.length; + byteBuffer.set(headerBytes); + intBuffer[1] = written; + // make a connection + connections[connectionID] = { + reader: response.body.getReader(), + intBuffer: intBuffer, + byteBuffer: byteBuffer, + value: undefined, + curOffset: 0, + }; + // set header ready + Atomics.store(intBuffer, 0, Status.SUCCESS_HEADER); + Atomics.notify(intBuffer, 0); + // all fetching after this goes through a new postmessage call with getMore + // this allows for parallel requests + } catch (error) { + console.log("Request exception:", error); + let errorBytes = encoder.encode(error.message); + let written = errorBytes.length; + byteBuffer.set(errorBytes); + intBuffer[1] = written; + Atomics.store(intBuffer, 0, Status.ERROR_EXCEPTION); + Atomics.notify(intBuffer, 0); + } + } +}); +self.postMessage({ inited: true }); diff --git a/script.module.urllib3/lib/urllib3/contrib/emscripten/fetch.py b/script.module.urllib3/lib/urllib3/contrib/emscripten/fetch.py new file mode 100644 index 0000000000..8d197ea1ee --- /dev/null +++ b/script.module.urllib3/lib/urllib3/contrib/emscripten/fetch.py @@ -0,0 +1,418 @@ +""" +Support for streaming http requests in emscripten. + +A few caveats - + +Firstly, you can't do streaming http in the main UI thread, because atomics.wait isn't allowed. +Streaming only works if you're running pyodide in a web worker. + +Secondly, this uses an extra web worker and SharedArrayBuffer to do the asynchronous fetch +operation, so it requires that you have crossOriginIsolation enabled, by serving over https +(or from localhost) with the two headers below set: + + Cross-Origin-Opener-Policy: same-origin + Cross-Origin-Embedder-Policy: require-corp + +You can tell if cross origin isolation is successfully enabled by looking at the global crossOriginIsolated variable in +javascript console. If it isn't, streaming requests will fallback to XMLHttpRequest, i.e. getting the whole +request into a buffer and then returning it. it shows a warning in the javascript console in this case. + +Finally, the webworker which does the streaming fetch is created on initial import, but will only be started once +control is returned to javascript. Call `await wait_for_streaming_ready()` to wait for streaming fetch. + +NB: in this code, there are a lot of javascript objects. They are named js_* +to make it clear what type of object they are. +""" +from __future__ import annotations + +import io +import json +from email.parser import Parser +from importlib.resources import files +from typing import TYPE_CHECKING, Any + +import js # type: ignore[import-not-found] +from pyodide.ffi import ( # type: ignore[import-not-found] + JsArray, + JsException, + JsProxy, + to_js, +) + +if TYPE_CHECKING: + from typing_extensions import Buffer + +from .request import EmscriptenRequest +from .response import EmscriptenResponse + +""" +There are some headers that trigger unintended CORS preflight requests. +See also https://github.com/koenvo/pyodide-http/issues/22 +""" +HEADERS_TO_IGNORE = ("user-agent",) + +SUCCESS_HEADER = -1 +SUCCESS_EOF = -2 +ERROR_TIMEOUT = -3 +ERROR_EXCEPTION = -4 + +_STREAMING_WORKER_CODE = ( + files(__package__) + .joinpath("emscripten_fetch_worker.js") + .read_text(encoding="utf-8") +) + + +class _RequestError(Exception): + def __init__( + self, + message: str | None = None, + *, + request: EmscriptenRequest | None = None, + response: EmscriptenResponse | None = None, + ): + self.request = request + self.response = response + self.message = message + super().__init__(self.message) + + +class _StreamingError(_RequestError): + pass + + +class _TimeoutError(_RequestError): + pass + + +def _obj_from_dict(dict_val: dict[str, Any]) -> JsProxy: + return to_js(dict_val, dict_converter=js.Object.fromEntries) + + +class _ReadStream(io.RawIOBase): + def __init__( + self, + int_buffer: JsArray, + byte_buffer: JsArray, + timeout: float, + worker: JsProxy, + connection_id: int, + request: EmscriptenRequest, + ): + self.int_buffer = int_buffer + self.byte_buffer = byte_buffer + self.read_pos = 0 + self.read_len = 0 + self.connection_id = connection_id + self.worker = worker + self.timeout = int(1000 * timeout) if timeout > 0 else None + self.is_live = True + self._is_closed = False + self.request: EmscriptenRequest | None = request + + def __del__(self) -> None: + self.close() + + # this is compatible with _base_connection + def is_closed(self) -> bool: + return self._is_closed + + # for compatibility with RawIOBase + @property + def closed(self) -> bool: + return self.is_closed() + + def close(self) -> None: + if not self.is_closed(): + self.read_len = 0 + self.read_pos = 0 + self.int_buffer = None + self.byte_buffer = None + self._is_closed = True + self.request = None + if self.is_live: + self.worker.postMessage(_obj_from_dict({"close": self.connection_id})) + self.is_live = False + super().close() + + def readable(self) -> bool: + return True + + def writable(self) -> bool: + return False + + def seekable(self) -> bool: + return False + + def readinto(self, byte_obj: Buffer) -> int: + if not self.int_buffer: + raise _StreamingError( + "No buffer for stream in _ReadStream.readinto", + request=self.request, + response=None, + ) + if self.read_len == 0: + # wait for the worker to send something + js.Atomics.store(self.int_buffer, 0, ERROR_TIMEOUT) + self.worker.postMessage(_obj_from_dict({"getMore": self.connection_id})) + if ( + js.Atomics.wait(self.int_buffer, 0, ERROR_TIMEOUT, self.timeout) + == "timed-out" + ): + raise _TimeoutError + data_len = self.int_buffer[0] + if data_len > 0: + self.read_len = data_len + self.read_pos = 0 + elif data_len == ERROR_EXCEPTION: + string_len = self.int_buffer[1] + # decode the error string + js_decoder = js.TextDecoder.new() + json_str = js_decoder.decode(self.byte_buffer.slice(0, string_len)) + raise _StreamingError( + f"Exception thrown in fetch: {json_str}", + request=self.request, + response=None, + ) + else: + # EOF, free the buffers and return zero + # and free the request + self.is_live = False + self.close() + return 0 + # copy from int32array to python bytes + ret_length = min(self.read_len, len(memoryview(byte_obj))) + subarray = self.byte_buffer.subarray( + self.read_pos, self.read_pos + ret_length + ).to_py() + memoryview(byte_obj)[0:ret_length] = subarray + self.read_len -= ret_length + self.read_pos += ret_length + return ret_length + + +class _StreamingFetcher: + def __init__(self) -> None: + # make web-worker and data buffer on startup + self.streaming_ready = False + + js_data_blob = js.Blob.new( + [_STREAMING_WORKER_CODE], _obj_from_dict({"type": "application/javascript"}) + ) + + def promise_resolver(js_resolve_fn: JsProxy, js_reject_fn: JsProxy) -> None: + def onMsg(e: JsProxy) -> None: + self.streaming_ready = True + js_resolve_fn(e) + + def onErr(e: JsProxy) -> None: + js_reject_fn(e) # Defensive: never happens in ci + + self.js_worker.onmessage = onMsg + self.js_worker.onerror = onErr + + js_data_url = js.URL.createObjectURL(js_data_blob) + self.js_worker = js.globalThis.Worker.new(js_data_url) + self.js_worker_ready_promise = js.globalThis.Promise.new(promise_resolver) + + def send(self, request: EmscriptenRequest) -> EmscriptenResponse: + headers = { + k: v for k, v in request.headers.items() if k not in HEADERS_TO_IGNORE + } + + body = request.body + fetch_data = {"headers": headers, "body": to_js(body), "method": request.method} + # start the request off in the worker + timeout = int(1000 * request.timeout) if request.timeout > 0 else None + js_shared_buffer = js.SharedArrayBuffer.new(1048576) + js_int_buffer = js.Int32Array.new(js_shared_buffer) + js_byte_buffer = js.Uint8Array.new(js_shared_buffer, 8) + + js.Atomics.store(js_int_buffer, 0, ERROR_TIMEOUT) + js.Atomics.notify(js_int_buffer, 0) + js_absolute_url = js.URL.new(request.url, js.location).href + self.js_worker.postMessage( + _obj_from_dict( + { + "buffer": js_shared_buffer, + "url": js_absolute_url, + "fetchParams": fetch_data, + } + ) + ) + # wait for the worker to send something + js.Atomics.wait(js_int_buffer, 0, ERROR_TIMEOUT, timeout) + if js_int_buffer[0] == ERROR_TIMEOUT: + raise _TimeoutError( + "Timeout connecting to streaming request", + request=request, + response=None, + ) + elif js_int_buffer[0] == SUCCESS_HEADER: + # got response + # header length is in second int of intBuffer + string_len = js_int_buffer[1] + # decode the rest to a JSON string + js_decoder = js.TextDecoder.new() + # this does a copy (the slice) because decode can't work on shared array + # for some silly reason + json_str = js_decoder.decode(js_byte_buffer.slice(0, string_len)) + # get it as an object + response_obj = json.loads(json_str) + return EmscriptenResponse( + request=request, + status_code=response_obj["status"], + headers=response_obj["headers"], + body=_ReadStream( + js_int_buffer, + js_byte_buffer, + request.timeout, + self.js_worker, + response_obj["connectionID"], + request, + ), + ) + elif js_int_buffer[0] == ERROR_EXCEPTION: + string_len = js_int_buffer[1] + # decode the error string + js_decoder = js.TextDecoder.new() + json_str = js_decoder.decode(js_byte_buffer.slice(0, string_len)) + raise _StreamingError( + f"Exception thrown in fetch: {json_str}", request=request, response=None + ) + else: + raise _StreamingError( + f"Unknown status from worker in fetch: {js_int_buffer[0]}", + request=request, + response=None, + ) + + +# check if we are in a worker or not +def is_in_browser_main_thread() -> bool: + return hasattr(js, "window") and hasattr(js, "self") and js.self == js.window + + +def is_cross_origin_isolated() -> bool: + return hasattr(js, "crossOriginIsolated") and js.crossOriginIsolated + + +def is_in_node() -> bool: + return ( + hasattr(js, "process") + and hasattr(js.process, "release") + and hasattr(js.process.release, "name") + and js.process.release.name == "node" + ) + + +def is_worker_available() -> bool: + return hasattr(js, "Worker") and hasattr(js, "Blob") + + +_fetcher: _StreamingFetcher | None = None + +if is_worker_available() and ( + (is_cross_origin_isolated() and not is_in_browser_main_thread()) + and (not is_in_node()) +): + _fetcher = _StreamingFetcher() +else: + _fetcher = None + + +def send_streaming_request(request: EmscriptenRequest) -> EmscriptenResponse | None: + if _fetcher and streaming_ready(): + return _fetcher.send(request) + else: + _show_streaming_warning() + return None + + +_SHOWN_TIMEOUT_WARNING = False + + +def _show_timeout_warning() -> None: + global _SHOWN_TIMEOUT_WARNING + if not _SHOWN_TIMEOUT_WARNING: + _SHOWN_TIMEOUT_WARNING = True + message = "Warning: Timeout is not available on main browser thread" + js.console.warn(message) + + +_SHOWN_STREAMING_WARNING = False + + +def _show_streaming_warning() -> None: + global _SHOWN_STREAMING_WARNING + if not _SHOWN_STREAMING_WARNING: + _SHOWN_STREAMING_WARNING = True + message = "Can't stream HTTP requests because: \n" + if not is_cross_origin_isolated(): + message += " Page is not cross-origin isolated\n" + if is_in_browser_main_thread(): + message += " Python is running in main browser thread\n" + if not is_worker_available(): + message += " Worker or Blob classes are not available in this environment." # Defensive: this is always False in browsers that we test in + if streaming_ready() is False: + message += """ Streaming fetch worker isn't ready. If you want to be sure that streaming fetch +is working, you need to call: 'await urllib3.contrib.emscripten.fetch.wait_for_streaming_ready()`""" + from js import console + + console.warn(message) + + +def send_request(request: EmscriptenRequest) -> EmscriptenResponse: + try: + js_xhr = js.XMLHttpRequest.new() + + if not is_in_browser_main_thread(): + js_xhr.responseType = "arraybuffer" + if request.timeout: + js_xhr.timeout = int(request.timeout * 1000) + else: + js_xhr.overrideMimeType("text/plain; charset=ISO-8859-15") + if request.timeout: + # timeout isn't available on the main thread - show a warning in console + # if it is set + _show_timeout_warning() + + js_xhr.open(request.method, request.url, False) + for name, value in request.headers.items(): + if name.lower() not in HEADERS_TO_IGNORE: + js_xhr.setRequestHeader(name, value) + + js_xhr.send(to_js(request.body)) + + headers = dict(Parser().parsestr(js_xhr.getAllResponseHeaders())) + + if not is_in_browser_main_thread(): + body = js_xhr.response.to_py().tobytes() + else: + body = js_xhr.response.encode("ISO-8859-15") + return EmscriptenResponse( + status_code=js_xhr.status, headers=headers, body=body, request=request + ) + except JsException as err: + if err.name == "TimeoutError": + raise _TimeoutError(err.message, request=request) + elif err.name == "NetworkError": + raise _RequestError(err.message, request=request) + else: + # general http error + raise _RequestError(err.message, request=request) + + +def streaming_ready() -> bool | None: + if _fetcher: + return _fetcher.streaming_ready + else: + return None # no fetcher, return None to signify that + + +async def wait_for_streaming_ready() -> bool: + if _fetcher: + await _fetcher.js_worker_ready_promise + return True + else: + return False diff --git a/script.module.urllib3/lib/urllib3/contrib/emscripten/request.py b/script.module.urllib3/lib/urllib3/contrib/emscripten/request.py new file mode 100644 index 0000000000..e692e692bd --- /dev/null +++ b/script.module.urllib3/lib/urllib3/contrib/emscripten/request.py @@ -0,0 +1,22 @@ +from __future__ import annotations + +from dataclasses import dataclass, field + +from ..._base_connection import _TYPE_BODY + + +@dataclass +class EmscriptenRequest: + method: str + url: str + params: dict[str, str] | None = None + body: _TYPE_BODY | None = None + headers: dict[str, str] = field(default_factory=dict) + timeout: float = 0 + decode_content: bool = True + + def set_header(self, name: str, value: str) -> None: + self.headers[name.capitalize()] = value + + def set_body(self, body: _TYPE_BODY | None) -> None: + self.body = body diff --git a/script.module.urllib3/lib/urllib3/contrib/emscripten/response.py b/script.module.urllib3/lib/urllib3/contrib/emscripten/response.py new file mode 100644 index 0000000000..cd3d80e430 --- /dev/null +++ b/script.module.urllib3/lib/urllib3/contrib/emscripten/response.py @@ -0,0 +1,285 @@ +from __future__ import annotations + +import json as _json +import logging +import typing +from contextlib import contextmanager +from dataclasses import dataclass +from http.client import HTTPException as HTTPException +from io import BytesIO, IOBase + +from ...exceptions import InvalidHeader, TimeoutError +from ...response import BaseHTTPResponse +from ...util.retry import Retry +from .request import EmscriptenRequest + +if typing.TYPE_CHECKING: + from ..._base_connection import BaseHTTPConnection, BaseHTTPSConnection + +log = logging.getLogger(__name__) + + +@dataclass +class EmscriptenResponse: + status_code: int + headers: dict[str, str] + body: IOBase | bytes + request: EmscriptenRequest + + +class EmscriptenHttpResponseWrapper(BaseHTTPResponse): + def __init__( + self, + internal_response: EmscriptenResponse, + url: str | None = None, + connection: BaseHTTPConnection | BaseHTTPSConnection | None = None, + ): + self._pool = None # set by pool class + self._body = None + self._response = internal_response + self._url = url + self._connection = connection + self._closed = False + super().__init__( + headers=internal_response.headers, + status=internal_response.status_code, + request_url=url, + version=0, + version_string="HTTP/?", + reason="", + decode_content=True, + ) + self.length_remaining = self._init_length(self._response.request.method) + self.length_is_certain = False + + @property + def url(self) -> str | None: + return self._url + + @url.setter + def url(self, url: str | None) -> None: + self._url = url + + @property + def connection(self) -> BaseHTTPConnection | BaseHTTPSConnection | None: + return self._connection + + @property + def retries(self) -> Retry | None: + return self._retries + + @retries.setter + def retries(self, retries: Retry | None) -> None: + # Override the request_url if retries has a redirect location. + self._retries = retries + + def stream( + self, amt: int | None = 2**16, decode_content: bool | None = None + ) -> typing.Generator[bytes, None, None]: + """ + A generator wrapper for the read() method. A call will block until + ``amt`` bytes have been read from the connection or until the + connection is closed. + + :param amt: + How much of the content to read. The generator will return up to + much data per iteration, but may return less. This is particularly + likely when using compressed data. However, the empty string will + never be returned. + + :param decode_content: + If True, will attempt to decode the body based on the + 'content-encoding' header. + """ + while True: + data = self.read(amt=amt, decode_content=decode_content) + + if data: + yield data + else: + break + + def _init_length(self, request_method: str | None) -> int | None: + length: int | None + content_length: str | None = self.headers.get("content-length") + + if content_length is not None: + try: + # RFC 7230 section 3.3.2 specifies multiple content lengths can + # be sent in a single Content-Length header + # (e.g. Content-Length: 42, 42). This line ensures the values + # are all valid ints and that as long as the `set` length is 1, + # all values are the same. Otherwise, the header is invalid. + lengths = {int(val) for val in content_length.split(",")} + if len(lengths) > 1: + raise InvalidHeader( + "Content-Length contained multiple " + "unmatching values (%s)" % content_length + ) + length = lengths.pop() + except ValueError: + length = None + else: + if length < 0: + length = None + + else: # if content_length is None + length = None + + # Check for responses that shouldn't include a body + if ( + self.status in (204, 304) + or 100 <= self.status < 200 + or request_method == "HEAD" + ): + length = 0 + + return length + + def read( + self, + amt: int | None = None, + decode_content: bool | None = None, # ignored because browser decodes always + cache_content: bool = False, + ) -> bytes: + if ( + self._closed + or self._response is None + or (isinstance(self._response.body, IOBase) and self._response.body.closed) + ): + return b"" + + with self._error_catcher(): + # body has been preloaded as a string by XmlHttpRequest + if not isinstance(self._response.body, IOBase): + self.length_remaining = len(self._response.body) + self.length_is_certain = True + # wrap body in IOStream + self._response.body = BytesIO(self._response.body) + if amt is not None and amt >= 0: + # don't cache partial content + cache_content = False + data = self._response.body.read(amt) + if self.length_remaining is not None: + self.length_remaining = max(self.length_remaining - len(data), 0) + if (self.length_is_certain and self.length_remaining == 0) or len( + data + ) < amt: + # definitely finished reading, close response stream + self._response.body.close() + return typing.cast(bytes, data) + else: # read all we can (and cache it) + data = self._response.body.read() + if cache_content: + self._body = data + if self.length_remaining is not None: + self.length_remaining = max(self.length_remaining - len(data), 0) + if len(data) == 0 or ( + self.length_is_certain and self.length_remaining == 0 + ): + # definitely finished reading, close response stream + self._response.body.close() + return typing.cast(bytes, data) + + def read_chunked( + self, + amt: int | None = None, + decode_content: bool | None = None, + ) -> typing.Generator[bytes, None, None]: + # chunked is handled by browser + while True: + bytes = self.read(amt, decode_content) + if not bytes: + break + yield bytes + + def release_conn(self) -> None: + if not self._pool or not self._connection: + return None + + self._pool._put_conn(self._connection) + self._connection = None + + def drain_conn(self) -> None: + self.close() + + @property + def data(self) -> bytes: + if self._body: + return self._body + else: + return self.read(cache_content=True) + + def json(self) -> typing.Any: + """ + Deserializes the body of the HTTP response as a Python object. + + The body of the HTTP response must be encoded using UTF-8, as per + `RFC 8529 Section 8.1 `_. + + To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to + your custom decoder instead. + + If the body of the HTTP response is not decodable to UTF-8, a + `UnicodeDecodeError` will be raised. If the body of the HTTP response is not a + valid JSON document, a `json.JSONDecodeError` will be raised. + + Read more :ref:`here `. + + :returns: The body of the HTTP response as a Python object. + """ + data = self.data.decode("utf-8") + return _json.loads(data) + + def close(self) -> None: + if not self._closed: + if isinstance(self._response.body, IOBase): + self._response.body.close() + if self._connection: + self._connection.close() + self._connection = None + self._closed = True + + @contextmanager + def _error_catcher(self) -> typing.Generator[None, None, None]: + """ + Catch Emscripten specific exceptions thrown by fetch.py, + instead re-raising urllib3 variants, so that low-level exceptions + are not leaked in the high-level api. + + On exit, release the connection back to the pool. + """ + from .fetch import _RequestError, _TimeoutError # avoid circular import + + clean_exit = False + + try: + yield + # If no exception is thrown, we should avoid cleaning up + # unnecessarily. + clean_exit = True + except _TimeoutError as e: + raise TimeoutError(str(e)) + except _RequestError as e: + raise HTTPException(str(e)) + finally: + # If we didn't terminate cleanly, we need to throw away our + # connection. + if not clean_exit: + # The response may not be closed but we're not going to use it + # anymore so close it now + if ( + isinstance(self._response.body, IOBase) + and not self._response.body.closed + ): + self._response.body.close() + # release the connection back to the pool + self.release_conn() + else: + # If we have read everything from the response stream, + # return the connection back to the pool. + if ( + isinstance(self._response.body, IOBase) + and self._response.body.closed + ): + self.release_conn() diff --git a/script.module.urllib3/lib/urllib3/contrib/pyopenssl.py b/script.module.urllib3/lib/urllib3/contrib/pyopenssl.py index 3987d6320d..c12cb3174d 100644 --- a/script.module.urllib3/lib/urllib3/contrib/pyopenssl.py +++ b/script.module.urllib3/lib/urllib3/contrib/pyopenssl.py @@ -40,7 +40,7 @@ from __future__ import annotations -import OpenSSL.SSL # type: ignore[import] +import OpenSSL.SSL # type: ignore[import-untyped] from cryptography import x509 try: @@ -61,13 +61,13 @@ class UnsupportedExtension(Exception): # type: ignore[no-redef] from .. import util if typing.TYPE_CHECKING: - from OpenSSL.crypto import X509 # type: ignore[import] + from OpenSSL.crypto import X509 # type: ignore[import-untyped] __all__ = ["inject_into_urllib3", "extract_from_urllib3"] # Map from urllib3 to PyOpenSSL compatible parameter-values. -_openssl_versions = { +_openssl_versions: dict[int, int] = { util.ssl_.PROTOCOL_TLS: OpenSSL.SSL.SSLv23_METHOD, # type: ignore[attr-defined] util.ssl_.PROTOCOL_TLS_CLIENT: OpenSSL.SSL.SSLv23_METHOD, # type: ignore[attr-defined] ssl.PROTOCOL_TLSv1: OpenSSL.SSL.TLSv1_METHOD, @@ -400,6 +400,10 @@ def getpeercert( def version(self) -> str: return self.connection.get_protocol_version_name() # type: ignore[no-any-return] + def selected_alpn_protocol(self) -> str | None: + alpn_proto = self.connection.get_alpn_proto_negotiated() + return alpn_proto.decode() if alpn_proto else None + WrappedSocket.makefile = socket_cls.makefile # type: ignore[attr-defined] diff --git a/script.module.urllib3/lib/urllib3/contrib/socks.py b/script.module.urllib3/lib/urllib3/contrib/socks.py index 6c3bb764b2..c62b5e0332 100644 --- a/script.module.urllib3/lib/urllib3/contrib/socks.py +++ b/script.module.urllib3/lib/urllib3/contrib/socks.py @@ -41,7 +41,7 @@ from __future__ import annotations try: - import socks # type: ignore[import] + import socks # type: ignore[import-not-found] except ImportError: import warnings @@ -51,7 +51,7 @@ ( "SOCKS support in urllib3 requires the installation of optional " "dependencies: specifically, PySocks. For more information, see " - "https://urllib3.readthedocs.io/en/latest/contrib.html#socks-proxies" + "https://urllib3.readthedocs.io/en/latest/advanced-usage.html#socks-proxies" ), DependencyWarning, ) @@ -71,10 +71,8 @@ except ImportError: ssl = None # type: ignore[assignment] -from typing import TypedDict - -class _TYPE_SOCKS_OPTIONS(TypedDict): +class _TYPE_SOCKS_OPTIONS(typing.TypedDict): socks_version: int proxy_host: str | None proxy_port: str | None diff --git a/script.module.urllib3/lib/urllib3/exceptions.py b/script.module.urllib3/lib/urllib3/exceptions.py index 5bb9236961..b0792f00fd 100644 --- a/script.module.urllib3/lib/urllib3/exceptions.py +++ b/script.module.urllib3/lib/urllib3/exceptions.py @@ -252,13 +252,16 @@ class IncompleteRead(HTTPError, httplib_IncompleteRead): for ``partial`` to avoid creating large objects on streamed reads. """ + partial: int # type: ignore[assignment] + expected: int + def __init__(self, partial: int, expected: int) -> None: - self.partial = partial # type: ignore[assignment] + self.partial = partial self.expected = expected def __repr__(self) -> str: return "IncompleteRead(%i bytes read, %i more expected)" % ( - self.partial, # type: ignore[str-format] + self.partial, self.expected, ) diff --git a/script.module.urllib3/lib/urllib3/fields.py b/script.module.urllib3/lib/urllib3/fields.py index 51d898e24f..3e258a5d8b 100644 --- a/script.module.urllib3/lib/urllib3/fields.py +++ b/script.module.urllib3/lib/urllib3/fields.py @@ -225,13 +225,9 @@ def from_tuples( if isinstance(value, tuple): if len(value) == 3: - filename, data, content_type = typing.cast( - typing.Tuple[str, _TYPE_FIELD_VALUE, str], value - ) + filename, data, content_type = value else: - filename, data = typing.cast( - typing.Tuple[str, _TYPE_FIELD_VALUE], value - ) + filename, data = value content_type = guess_content_type(filename) else: filename = None diff --git a/script.module.urllib3/lib/urllib3/http2/__init__.py b/script.module.urllib3/lib/urllib3/http2/__init__.py new file mode 100644 index 0000000000..133e1d8f23 --- /dev/null +++ b/script.module.urllib3/lib/urllib3/http2/__init__.py @@ -0,0 +1,53 @@ +from __future__ import annotations + +from importlib.metadata import version + +__all__ = [ + "inject_into_urllib3", + "extract_from_urllib3", +] + +import typing + +orig_HTTPSConnection: typing.Any = None + + +def inject_into_urllib3() -> None: + # First check if h2 version is valid + h2_version = version("h2") + if not h2_version.startswith("4."): + raise ImportError( + "urllib3 v2 supports h2 version 4.x.x, currently " + f"the 'h2' module is compiled with {h2_version!r}. " + "See: https://github.com/urllib3/urllib3/issues/3290" + ) + + # Import here to avoid circular dependencies. + from .. import connection as urllib3_connection + from .. import util as urllib3_util + from ..connectionpool import HTTPSConnectionPool + from ..util import ssl_ as urllib3_util_ssl + from .connection import HTTP2Connection + + global orig_HTTPSConnection + orig_HTTPSConnection = urllib3_connection.HTTPSConnection + + HTTPSConnectionPool.ConnectionCls = HTTP2Connection + urllib3_connection.HTTPSConnection = HTTP2Connection # type: ignore[misc] + + # TODO: Offer 'http/1.1' as well, but for testing purposes this is handy. + urllib3_util.ALPN_PROTOCOLS = ["h2"] + urllib3_util_ssl.ALPN_PROTOCOLS = ["h2"] + + +def extract_from_urllib3() -> None: + from .. import connection as urllib3_connection + from .. import util as urllib3_util + from ..connectionpool import HTTPSConnectionPool + from ..util import ssl_ as urllib3_util_ssl + + HTTPSConnectionPool.ConnectionCls = orig_HTTPSConnection + urllib3_connection.HTTPSConnection = orig_HTTPSConnection # type: ignore[misc] + + urllib3_util.ALPN_PROTOCOLS = ["http/1.1"] + urllib3_util_ssl.ALPN_PROTOCOLS = ["http/1.1"] diff --git a/script.module.urllib3/lib/urllib3/http2/connection.py b/script.module.urllib3/lib/urllib3/http2/connection.py new file mode 100644 index 0000000000..f486145280 --- /dev/null +++ b/script.module.urllib3/lib/urllib3/http2/connection.py @@ -0,0 +1,356 @@ +from __future__ import annotations + +import logging +import re +import threading +import types +import typing + +import h2.config # type: ignore[import-untyped] +import h2.connection # type: ignore[import-untyped] +import h2.events # type: ignore[import-untyped] + +from .._base_connection import _TYPE_BODY +from .._collections import HTTPHeaderDict +from ..connection import HTTPSConnection, _get_default_user_agent +from ..exceptions import ConnectionError +from ..response import BaseHTTPResponse + +orig_HTTPSConnection = HTTPSConnection + +T = typing.TypeVar("T") + +log = logging.getLogger(__name__) + +RE_IS_LEGAL_HEADER_NAME = re.compile(rb"^[!#$%&'*+\-.^_`|~0-9a-z]+$") +RE_IS_ILLEGAL_HEADER_VALUE = re.compile(rb"[\0\x00\x0a\x0d\r\n]|^[ \r\n\t]|[ \r\n\t]$") + + +def _is_legal_header_name(name: bytes) -> bool: + """ + "An implementation that validates fields according to the definitions in Sections + 5.1 and 5.5 of [HTTP] only needs an additional check that field names do not + include uppercase characters." (https://httpwg.org/specs/rfc9113.html#n-field-validity) + + `http.client._is_legal_header_name` does not validate the field name according to the + HTTP 1.1 spec, so we do that here, in addition to checking for uppercase characters. + + This does not allow for the `:` character in the header name, so should not + be used to validate pseudo-headers. + """ + return bool(RE_IS_LEGAL_HEADER_NAME.match(name)) + + +def _is_illegal_header_value(value: bytes) -> bool: + """ + "A field value MUST NOT contain the zero value (ASCII NUL, 0x00), line feed + (ASCII LF, 0x0a), or carriage return (ASCII CR, 0x0d) at any position. A field + value MUST NOT start or end with an ASCII whitespace character (ASCII SP or HTAB, + 0x20 or 0x09)." (https://httpwg.org/specs/rfc9113.html#n-field-validity) + """ + return bool(RE_IS_ILLEGAL_HEADER_VALUE.search(value)) + + +class _LockedObject(typing.Generic[T]): + """ + A wrapper class that hides a specific object behind a lock. + The goal here is to provide a simple way to protect access to an object + that cannot safely be simultaneously accessed from multiple threads. The + intended use of this class is simple: take hold of it with a context + manager, which returns the protected object. + """ + + __slots__ = ( + "lock", + "_obj", + ) + + def __init__(self, obj: T): + self.lock = threading.RLock() + self._obj = obj + + def __enter__(self) -> T: + self.lock.acquire() + return self._obj + + def __exit__( + self, + exc_type: type[BaseException] | None, + exc_val: BaseException | None, + exc_tb: types.TracebackType | None, + ) -> None: + self.lock.release() + + +class HTTP2Connection(HTTPSConnection): + def __init__( + self, host: str, port: int | None = None, **kwargs: typing.Any + ) -> None: + self._h2_conn = self._new_h2_conn() + self._h2_stream: int | None = None + self._headers: list[tuple[bytes, bytes]] = [] + + if "proxy" in kwargs or "proxy_config" in kwargs: # Defensive: + raise NotImplementedError("Proxies aren't supported with HTTP/2") + + super().__init__(host, port, **kwargs) + + if self._tunnel_host is not None: + raise NotImplementedError("Tunneling isn't supported with HTTP/2") + + def _new_h2_conn(self) -> _LockedObject[h2.connection.H2Connection]: + config = h2.config.H2Configuration(client_side=True) + return _LockedObject(h2.connection.H2Connection(config=config)) + + def connect(self) -> None: + super().connect() + with self._h2_conn as conn: + conn.initiate_connection() + if data_to_send := conn.data_to_send(): + self.sock.sendall(data_to_send) + + def putrequest( # type: ignore[override] + self, + method: str, + url: str, + **kwargs: typing.Any, + ) -> None: + """putrequest + This deviates from the HTTPConnection method signature since we never need to override + sending accept-encoding headers or the host header. + """ + if "skip_host" in kwargs: + raise NotImplementedError("`skip_host` isn't supported") + if "skip_accept_encoding" in kwargs: + raise NotImplementedError("`skip_accept_encoding` isn't supported") + + self._request_url = url or "/" + self._validate_path(url) # type: ignore[attr-defined] + + if ":" in self.host: + authority = f"[{self.host}]:{self.port or 443}" + else: + authority = f"{self.host}:{self.port or 443}" + + self._headers.append((b":scheme", b"https")) + self._headers.append((b":method", method.encode())) + self._headers.append((b":authority", authority.encode())) + self._headers.append((b":path", url.encode())) + + with self._h2_conn as conn: + self._h2_stream = conn.get_next_available_stream_id() + + def putheader(self, header: str | bytes, *values: str | bytes) -> None: + # TODO SKIPPABLE_HEADERS from urllib3 are ignored. + header = header.encode() if isinstance(header, str) else header + header = header.lower() # A lot of upstream code uses capitalized headers. + if not _is_legal_header_name(header): + raise ValueError(f"Illegal header name {str(header)}") + + for value in values: + value = value.encode() if isinstance(value, str) else value + if _is_illegal_header_value(value): + raise ValueError(f"Illegal header value {str(value)}") + self._headers.append((header, value)) + + def endheaders(self, message_body: typing.Any = None) -> None: # type: ignore[override] + if self._h2_stream is None: + raise ConnectionError("Must call `putrequest` first.") + + with self._h2_conn as conn: + conn.send_headers( + stream_id=self._h2_stream, + headers=self._headers, + end_stream=(message_body is None), + ) + if data_to_send := conn.data_to_send(): + self.sock.sendall(data_to_send) + self._headers = [] # Reset headers for the next request. + + def send(self, data: typing.Any) -> None: + """Send data to the server. + `data` can be: `str`, `bytes`, an iterable, or file-like objects + that support a .read() method. + """ + if self._h2_stream is None: + raise ConnectionError("Must call `putrequest` first.") + + with self._h2_conn as conn: + if data_to_send := conn.data_to_send(): + self.sock.sendall(data_to_send) + + if hasattr(data, "read"): # file-like objects + while True: + chunk = data.read(self.blocksize) + if not chunk: + break + if isinstance(chunk, str): + chunk = chunk.encode() # pragma: no cover + conn.send_data(self._h2_stream, chunk, end_stream=False) + if data_to_send := conn.data_to_send(): + self.sock.sendall(data_to_send) + conn.end_stream(self._h2_stream) + return + + if isinstance(data, str): # str -> bytes + data = data.encode() + + try: + if isinstance(data, bytes): + conn.send_data(self._h2_stream, data, end_stream=True) + if data_to_send := conn.data_to_send(): + self.sock.sendall(data_to_send) + else: + for chunk in data: + conn.send_data(self._h2_stream, chunk, end_stream=False) + if data_to_send := conn.data_to_send(): + self.sock.sendall(data_to_send) + conn.end_stream(self._h2_stream) + except TypeError: + raise TypeError( + "`data` should be str, bytes, iterable, or file. got %r" + % type(data) + ) + + def set_tunnel( + self, + host: str, + port: int | None = None, + headers: typing.Mapping[str, str] | None = None, + scheme: str = "http", + ) -> None: + raise NotImplementedError( + "HTTP/2 does not support setting up a tunnel through a proxy" + ) + + def getresponse( # type: ignore[override] + self, + ) -> HTTP2Response: + status = None + data = bytearray() + with self._h2_conn as conn: + end_stream = False + while not end_stream: + # TODO: Arbitrary read value. + if received_data := self.sock.recv(65535): + events = conn.receive_data(received_data) + for event in events: + if isinstance(event, h2.events.ResponseReceived): + headers = HTTPHeaderDict() + for header, value in event.headers: + if header == b":status": + status = int(value.decode()) + else: + headers.add( + header.decode("ascii"), value.decode("ascii") + ) + + elif isinstance(event, h2.events.DataReceived): + data += event.data + conn.acknowledge_received_data( + event.flow_controlled_length, event.stream_id + ) + + elif isinstance(event, h2.events.StreamEnded): + end_stream = True + + if data_to_send := conn.data_to_send(): + self.sock.sendall(data_to_send) + + assert status is not None + return HTTP2Response( + status=status, + headers=headers, + request_url=self._request_url, + data=bytes(data), + ) + + def request( # type: ignore[override] + self, + method: str, + url: str, + body: _TYPE_BODY | None = None, + headers: typing.Mapping[str, str] | None = None, + *, + preload_content: bool = True, + decode_content: bool = True, + enforce_content_length: bool = True, + **kwargs: typing.Any, + ) -> None: + """Send an HTTP/2 request""" + if "chunked" in kwargs: + # TODO this is often present from upstream. + # raise NotImplementedError("`chunked` isn't supported with HTTP/2") + pass + + if self.sock is not None: + self.sock.settimeout(self.timeout) + + self.putrequest(method, url) + + headers = headers or {} + for k, v in headers.items(): + if k.lower() == "transfer-encoding" and v == "chunked": + continue + else: + self.putheader(k, v) + + if b"user-agent" not in dict(self._headers): + self.putheader(b"user-agent", _get_default_user_agent()) + + if body: + self.endheaders(message_body=body) + self.send(body) + else: + self.endheaders() + + def close(self) -> None: + with self._h2_conn as conn: + try: + conn.close_connection() + if data := conn.data_to_send(): + self.sock.sendall(data) + except Exception: + pass + + # Reset all our HTTP/2 connection state. + self._h2_conn = self._new_h2_conn() + self._h2_stream = None + self._headers = [] + + super().close() + + +class HTTP2Response(BaseHTTPResponse): + # TODO: This is a woefully incomplete response object, but works for non-streaming. + def __init__( + self, + status: int, + headers: HTTPHeaderDict, + request_url: str, + data: bytes, + decode_content: bool = False, # TODO: support decoding + ) -> None: + super().__init__( + status=status, + headers=headers, + # Following CPython, we map HTTP versions to major * 10 + minor integers + version=20, + version_string="HTTP/2", + # No reason phrase in HTTP/2 + reason=None, + decode_content=decode_content, + request_url=request_url, + ) + self._data = data + self.length_remaining = 0 + + @property + def data(self) -> bytes: + return self._data + + def get_redirect_location(self) -> None: + return None + + def close(self) -> None: + pass diff --git a/script.module.urllib3/lib/urllib3/http2/probe.py b/script.module.urllib3/lib/urllib3/http2/probe.py new file mode 100644 index 0000000000..9ea900764f --- /dev/null +++ b/script.module.urllib3/lib/urllib3/http2/probe.py @@ -0,0 +1,87 @@ +from __future__ import annotations + +import threading + + +class _HTTP2ProbeCache: + __slots__ = ( + "_lock", + "_cache_locks", + "_cache_values", + ) + + def __init__(self) -> None: + self._lock = threading.Lock() + self._cache_locks: dict[tuple[str, int], threading.RLock] = {} + self._cache_values: dict[tuple[str, int], bool | None] = {} + + def acquire_and_get(self, host: str, port: int) -> bool | None: + # By the end of this block we know that + # _cache_[values,locks] is available. + value = None + with self._lock: + key = (host, port) + try: + value = self._cache_values[key] + # If it's a known value we return right away. + if value is not None: + return value + except KeyError: + self._cache_locks[key] = threading.RLock() + self._cache_values[key] = None + + # If the value is unknown, we acquire the lock to signal + # to the requesting thread that the probe is in progress + # or that the current thread needs to return their findings. + key_lock = self._cache_locks[key] + key_lock.acquire() + try: + # If the by the time we get the lock the value has been + # updated we want to return the updated value. + value = self._cache_values[key] + + # In case an exception like KeyboardInterrupt is raised here. + except BaseException as e: # Defensive: + assert not isinstance(e, KeyError) # KeyError shouldn't be possible. + key_lock.release() + raise + + return value + + def set_and_release( + self, host: str, port: int, supports_http2: bool | None + ) -> None: + key = (host, port) + key_lock = self._cache_locks[key] + with key_lock: # Uses an RLock, so can be locked again from same thread. + if supports_http2 is None and self._cache_values[key] is not None: + raise ValueError( + "Cannot reset HTTP/2 support for origin after value has been set." + ) # Defensive: not expected in normal usage + + self._cache_values[key] = supports_http2 + key_lock.release() + + def _values(self) -> dict[tuple[str, int], bool | None]: + """This function is for testing purposes only. Gets the current state of the probe cache""" + with self._lock: + return {k: v for k, v in self._cache_values.items()} + + def _reset(self) -> None: + """This function is for testing purposes only. Reset the cache values""" + with self._lock: + self._cache_locks = {} + self._cache_values = {} + + +_HTTP2_PROBE_CACHE = _HTTP2ProbeCache() + +set_and_release = _HTTP2_PROBE_CACHE.set_and_release +acquire_and_get = _HTTP2_PROBE_CACHE.acquire_and_get +_values = _HTTP2_PROBE_CACHE._values +_reset = _HTTP2_PROBE_CACHE._reset + +__all__ = [ + "set_and_release", + "acquire_and_get", +] diff --git a/script.module.urllib3/lib/urllib3/poolmanager.py b/script.module.urllib3/lib/urllib3/poolmanager.py index 32da0a00ab..085d1dbafd 100644 --- a/script.module.urllib3/lib/urllib3/poolmanager.py +++ b/script.module.urllib3/lib/urllib3/poolmanager.py @@ -26,7 +26,8 @@ if typing.TYPE_CHECKING: import ssl - from typing import Literal + + from typing_extensions import Self __all__ = ["PoolManager", "ProxyManager", "proxy_from_url"] @@ -51,8 +52,6 @@ # http.client.HTTPConnection & http.client.HTTPSConnection in Python 3.7 _DEFAULT_BLOCKSIZE = 16384 -_SelfT = typing.TypeVar("_SelfT") - class PoolKey(typing.NamedTuple): """ @@ -214,7 +213,7 @@ def __init__( self.pool_classes_by_scheme = pool_classes_by_scheme self.key_fn_by_scheme = key_fn_by_scheme.copy() - def __enter__(self: _SelfT) -> _SelfT: + def __enter__(self) -> Self: return self def __exit__( @@ -222,7 +221,7 @@ def __exit__( exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None, - ) -> Literal[False]: + ) -> typing.Literal[False]: self.clear() # Return False to re-raise any potential exceptions return False @@ -553,7 +552,7 @@ def __init__( proxy_headers: typing.Mapping[str, str] | None = None, proxy_ssl_context: ssl.SSLContext | None = None, use_forwarding_for_https: bool = False, - proxy_assert_hostname: None | str | Literal[False] = None, + proxy_assert_hostname: None | str | typing.Literal[False] = None, proxy_assert_fingerprint: str | None = None, **connection_pool_kw: typing.Any, ) -> None: diff --git a/script.module.urllib3/lib/urllib3/response.py b/script.module.urllib3/lib/urllib3/response.py index 37936f9397..a0273d65b0 100644 --- a/script.module.urllib3/lib/urllib3/response.py +++ b/script.module.urllib3/lib/urllib3/response.py @@ -14,29 +14,33 @@ from http.client import HTTPResponse as _HttplibHTTPResponse from socket import timeout as SocketTimeout +if typing.TYPE_CHECKING: + from ._base_connection import BaseHTTPConnection + try: try: - import brotlicffi as brotli # type: ignore[import] + import brotlicffi as brotli # type: ignore[import-not-found] except ImportError: - import brotli # type: ignore[import] + import brotli # type: ignore[import-not-found] except ImportError: brotli = None try: - import zstandard as zstd # type: ignore[import] - + import zstandard as zstd +except (AttributeError, ImportError, ValueError): # Defensive: + HAS_ZSTD = False +else: # The package 'zstandard' added the 'eof' property starting # in v0.18.0 which we require to ensure a complete and # valid zstd stream was fed into the ZstdDecoder. # See: https://github.com/urllib3/urllib3/pull/2624 - _zstd_version = _zstd_version = tuple( + _zstd_version = tuple( map(int, re.search(r"^([0-9]+)\.([0-9]+)", zstd.__version__).groups()) # type: ignore[union-attr] ) if _zstd_version < (0, 18): # Defensive: - zstd = None - -except (AttributeError, ImportError, ValueError): # Defensive: - zstd = None + HAS_ZSTD = False + else: + HAS_ZSTD = True from . import util from ._base_connection import _TYPE_BODY @@ -58,8 +62,6 @@ from .util.retry import Retry if typing.TYPE_CHECKING: - from typing import Literal - from .connectionpool import HTTPConnectionPool log = logging.getLogger(__name__) @@ -160,7 +162,7 @@ def flush(self) -> bytes: return b"" -if zstd is not None: +if HAS_ZSTD: class ZstdDecoder(ContentDecoder): def __init__(self) -> None: @@ -180,7 +182,7 @@ def flush(self) -> bytes: ret = self._obj.flush() # note: this is a no-op if not self._obj.eof: raise DecodeError("Zstandard data is incomplete") - return ret # type: ignore[no-any-return] + return ret class MultiDecoder(ContentDecoder): @@ -216,7 +218,7 @@ def _get_decoder(mode: str) -> ContentDecoder: if brotli is not None and mode == "br": return BrotliDecoder() - if zstd is not None and mode == "zstd": + if HAS_ZSTD and mode == "zstd": return ZstdDecoder() return DeflateDecoder() @@ -280,12 +282,26 @@ def get(self, n: int) -> bytes: return ret.getvalue() + def get_all(self) -> bytes: + buffer = self.buffer + if not buffer: + assert self._size == 0 + return b"" + if len(buffer) == 1: + result = buffer.pop() + else: + ret = io.BytesIO() + ret.writelines(buffer.popleft() for _ in range(len(buffer))) + result = ret.getvalue() + self._size = 0 + return result + class BaseHTTPResponse(io.IOBase): CONTENT_DECODERS = ["gzip", "x-gzip", "deflate"] if brotli is not None: CONTENT_DECODERS += ["br"] - if zstd is not None: + if HAS_ZSTD: CONTENT_DECODERS += ["zstd"] REDIRECT_STATUSES = [301, 302, 303, 307, 308] @@ -293,7 +309,7 @@ class BaseHTTPResponse(io.IOBase): if brotli is not None: DECODER_ERROR_CLASSES += (brotli.error,) - if zstd is not None: + if HAS_ZSTD: DECODER_ERROR_CLASSES += (zstd.ZstdError,) def __init__( @@ -302,6 +318,7 @@ def __init__( headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None, status: int, version: int, + version_string: str, reason: str | None, decode_content: bool, request_url: str | None, @@ -313,6 +330,7 @@ def __init__( self.headers = HTTPHeaderDict(headers) # type: ignore[arg-type] self.status = status self.version = version + self.version_string = version_string self.reason = reason self.decode_content = decode_content self._has_decoded_content = False @@ -327,8 +345,9 @@ def __init__( self.chunked = True self._decoder: ContentDecoder | None = None + self.length_remaining: int | None - def get_redirect_location(self) -> str | None | Literal[False]: + def get_redirect_location(self) -> str | None | typing.Literal[False]: """ Should we redirect and where to? @@ -346,13 +365,21 @@ def data(self) -> bytes: def json(self) -> typing.Any: """ - Parses the body of the HTTP response as JSON. + Deserializes the body of the HTTP response as a Python object. + + The body of the HTTP response must be encoded using UTF-8, as per + `RFC 8529 Section 8.1 `_. + + To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to + your custom decoder instead. - To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to the decoder. + If the body of the HTTP response is not decodable to UTF-8, a + `UnicodeDecodeError` will be raised. If the body of the HTTP response is not a + valid JSON document, a `json.JSONDecodeError` will be raised. - This method can raise either `UnicodeDecodeError` or `json.JSONDecodeError`. + Read more :ref:`here `. - Read more :ref:`here `. + :returns: The body of the HTTP response as a Python object. """ data = self.data.decode("utf-8") return _json.loads(data) @@ -366,7 +393,7 @@ def url(self, url: str | None) -> None: raise NotImplementedError() @property - def connection(self) -> HTTPConnection | None: + def connection(self) -> BaseHTTPConnection | None: raise NotImplementedError() @property @@ -393,6 +420,13 @@ def read( ) -> bytes: raise NotImplementedError() + def read1( + self, + amt: int | None = None, + decode_content: bool | None = None, + ) -> bytes: + raise NotImplementedError() + def read_chunked( self, amt: int | None = None, @@ -542,6 +576,7 @@ def __init__( headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None, status: int = 0, version: int = 0, + version_string: str = "HTTP/?", reason: str | None = None, preload_content: bool = True, decode_content: bool = True, @@ -559,6 +594,7 @@ def __init__( headers=headers, status=status, version=version, + version_string=version_string, reason=reason, decode_content=decode_content, request_url=request_url, @@ -724,8 +760,18 @@ def _error_catcher(self) -> typing.Generator[None, None, None]: raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type] + except IncompleteRead as e: + if ( + e.expected is not None + and e.partial is not None + and e.expected == -e.partial + ): + arg = "Response may not contain content." + else: + arg = f"Connection broken: {e!r}" + raise ProtocolError(arg, e) from e + except (HTTPException, OSError) as e: - # This includes IncompleteRead. raise ProtocolError(f"Connection broken: {e!r}", e) from e # If no exception is thrown, we should avoid cleaning up @@ -752,7 +798,12 @@ def _error_catcher(self) -> typing.Generator[None, None, None]: if self._original_response and self._original_response.isclosed(): self.release_conn() - def _fp_read(self, amt: int | None = None) -> bytes: + def _fp_read( + self, + amt: int | None = None, + *, + read1: bool = False, + ) -> bytes: """ Read a response with the thought that reading the number of bytes larger than can fit in a 32-bit int at a time via SSL in some @@ -770,8 +821,14 @@ def _fp_read(self, amt: int | None = None) -> bytes: c_int_max = 2**31 - 1 if ( (amt and amt > c_int_max) - or (self.length_remaining and self.length_remaining > c_int_max) + or ( + amt is None + and self.length_remaining + and self.length_remaining > c_int_max + ) ) and (util.IS_PYOPENSSL or sys.version_info < (3, 10)): + if read1: + return self._fp.read1(c_int_max) buffer = io.BytesIO() # Besides `max_chunk_amt` being a maximum chunk size, it # affects memory overhead of reading a response by this @@ -792,6 +849,8 @@ def _fp_read(self, amt: int | None = None) -> bytes: buffer.write(data) del data # to reduce peak memory usage by `max_chunk_amt`. return buffer.getvalue() + elif read1: + return self._fp.read1(amt) if amt is not None else self._fp.read1() else: # StringIO doesn't like amt=None return self._fp.read(amt) if amt is not None else self._fp.read() @@ -799,6 +858,8 @@ def _fp_read(self, amt: int | None = None) -> bytes: def _raw_read( self, amt: int | None = None, + *, + read1: bool = False, ) -> bytes: """ Reads `amt` of bytes from the socket. @@ -809,7 +870,7 @@ def _raw_read( fp_closed = getattr(self._fp, "closed", False) with self._error_catcher(): - data = self._fp_read(amt) if not fp_closed else b"" + data = self._fp_read(amt, read1=read1) if not fp_closed else b"" if amt is not None and amt != 0 and not data: # Platform-specific: Buggy versions of Python. # Close the connection when no data is returned @@ -831,6 +892,14 @@ def _raw_read( # raised during streaming, so all calls with incorrect # Content-Length are caught. raise IncompleteRead(self._fp_bytes_read, self.length_remaining) + elif read1 and ( + (amt != 0 and not data) or self.length_remaining == len(data) + ): + # All data has been read, but `self._fp.read1` in + # CPython 3.12 and older doesn't always close + # `http.client.HTTPResponse`, so we close it here. + # See https://github.com/python/cpython/issues/113199 + self._fp.close() if data: self._fp_bytes_read += len(data) @@ -868,7 +937,10 @@ def read( if decode_content is None: decode_content = self.decode_content - if amt is not None: + if amt and amt < 0: + # Negative numbers and `None` should be treated the same. + amt = None + elif amt is not None: cache_content = False if len(self._decoded_buffer) >= amt: @@ -909,6 +981,60 @@ def read( return data + def read1( + self, + amt: int | None = None, + decode_content: bool | None = None, + ) -> bytes: + """ + Similar to ``http.client.HTTPResponse.read1`` and documented + in :meth:`io.BufferedReader.read1`, but with an additional parameter: + ``decode_content``. + + :param amt: + How much of the content to read. + + :param decode_content: + If True, will attempt to decode the body based on the + 'content-encoding' header. + """ + if decode_content is None: + decode_content = self.decode_content + if amt and amt < 0: + # Negative numbers and `None` should be treated the same. + amt = None + # try and respond without going to the network + if self._has_decoded_content: + if not decode_content: + raise RuntimeError( + "Calling read1(decode_content=False) is not supported after " + "read1(decode_content=True) was called." + ) + if len(self._decoded_buffer) > 0: + if amt is None: + return self._decoded_buffer.get_all() + return self._decoded_buffer.get(amt) + if amt == 0: + return b"" + + # FIXME, this method's type doesn't say returning None is possible + data = self._raw_read(amt, read1=True) + if not decode_content or data is None: + return data + + self._init_decoder() + while True: + flush_decoder = not data + decoded_data = self._decode(data, decode_content, flush_decoder) + self._decoded_buffer.put(decoded_data) + if decoded_data or flush_decoder: + break + data = self._raw_read(8192, read1=True) + + if amt is None: + return self._decoded_buffer.get_all() + return self._decoded_buffer.get(amt) + def stream( self, amt: int | None = 2**16, decode_content: bool | None = None ) -> typing.Generator[bytes, None, None]: @@ -1001,9 +1127,13 @@ def _update_chunk_length(self) -> None: try: self.chunk_left = int(line, 16) except ValueError: - # Invalid chunked protocol response, abort. self.close() - raise InvalidChunkLength(self, line) from None + if line: + # Invalid chunked protocol response, abort. + raise InvalidChunkLength(self, line) from None + else: + # Truncated at start of next chunk + raise ProtocolError("Response ended prematurely") from None def _handle_chunk(self, amt: int | None) -> bytes: returned_chunk = None @@ -1067,6 +1197,11 @@ def read_chunked( if self._fp.fp is None: # type: ignore[union-attr] return None + if amt and amt < 0: + # Negative numbers and `None` should be treated the same, + # but httplib handles only `None` correctly. + amt = None + while True: self._update_chunk_length() if self.chunk_left == 0: diff --git a/script.module.urllib3/lib/urllib3/util/connection.py b/script.module.urllib3/lib/urllib3/util/connection.py index 5c7da73f4e..89bd189fb2 100644 --- a/script.module.urllib3/lib/urllib3/util/connection.py +++ b/script.module.urllib3/lib/urllib3/util/connection.py @@ -6,7 +6,7 @@ from ..exceptions import LocationParseError from .timeout import _DEFAULT_TIMEOUT, _TYPE_TIMEOUT -_TYPE_SOCKET_OPTIONS = typing.Sequence[typing.Tuple[int, int, typing.Union[int, bytes]]] +_TYPE_SOCKET_OPTIONS = typing.List[typing.Tuple[int, int, typing.Union[int, bytes]]] if typing.TYPE_CHECKING: from .._base_connection import BaseHTTPConnection diff --git a/script.module.urllib3/lib/urllib3/util/request.py b/script.module.urllib3/lib/urllib3/util/request.py index e6905ffca4..82454a099d 100644 --- a/script.module.urllib3/lib/urllib3/util/request.py +++ b/script.module.urllib3/lib/urllib3/util/request.py @@ -21,15 +21,15 @@ ACCEPT_ENCODING = "gzip,deflate" try: try: - import brotlicffi as _unused_module_brotli # type: ignore[import] # noqa: F401 + import brotlicffi as _unused_module_brotli # type: ignore[import-not-found] # noqa: F401 except ImportError: - import brotli as _unused_module_brotli # type: ignore[import] # noqa: F401 + import brotli as _unused_module_brotli # type: ignore[import-not-found] # noqa: F401 except ImportError: pass else: ACCEPT_ENCODING += ",br" try: - import zstandard as _unused_module_zstd # type: ignore[import] # noqa: F401 + import zstandard as _unused_module_zstd # noqa: F401 except ImportError: pass else: @@ -227,7 +227,7 @@ def chunk_readable() -> typing.Iterable[bytes]: if not datablock: break if encode: - datablock = datablock.encode("iso-8859-1") + datablock = datablock.encode("utf-8") yield datablock chunks = chunk_readable() diff --git a/script.module.urllib3/lib/urllib3/util/retry.py b/script.module.urllib3/lib/urllib3/util/retry.py index 7572bfd26a..0456cceba4 100644 --- a/script.module.urllib3/lib/urllib3/util/retry.py +++ b/script.module.urllib3/lib/urllib3/util/retry.py @@ -21,6 +21,8 @@ from .util import reraise if typing.TYPE_CHECKING: + from typing_extensions import Self + from ..connectionpool import ConnectionPool from ..response import BaseHTTPResponse @@ -187,7 +189,9 @@ class Retry: RETRY_AFTER_STATUS_CODES = frozenset([413, 429, 503]) #: Default headers to be used for ``remove_headers_on_redirect`` - DEFAULT_REMOVE_HEADERS_ON_REDIRECT = frozenset(["Cookie", "Authorization"]) + DEFAULT_REMOVE_HEADERS_ON_REDIRECT = frozenset( + ["Cookie", "Authorization", "Proxy-Authorization"] + ) #: Default maximum backoff time. DEFAULT_BACKOFF_MAX = 120 @@ -240,7 +244,7 @@ def __init__( ) self.backoff_jitter = backoff_jitter - def new(self, **kw: typing.Any) -> Retry: + def new(self, **kw: typing.Any) -> Self: params = dict( total=self.total, connect=self.connect, @@ -429,7 +433,7 @@ def increment( error: Exception | None = None, _pool: ConnectionPool | None = None, _stacktrace: TracebackType | None = None, - ) -> Retry: + ) -> Self: """Return a new Retry object with incremented retry counters. :param response: A response object, or None, if the server did not diff --git a/script.module.urllib3/lib/urllib3/util/ssl_.py b/script.module.urllib3/lib/urllib3/util/ssl_.py index e0a7c04a3c..5e93be9cef 100644 --- a/script.module.urllib3/lib/urllib3/util/ssl_.py +++ b/script.module.urllib3/lib/urllib3/util/ssl_.py @@ -1,5 +1,6 @@ from __future__ import annotations +import hashlib import hmac import os import socket @@ -7,7 +8,6 @@ import typing import warnings from binascii import unhexlify -from hashlib import md5, sha1, sha256 from ..exceptions import ProxySchemeUnsupported, SSLError from .url import _BRACELESS_IPV6_ADDRZ_RE, _IPV4_RE @@ -21,7 +21,10 @@ _TYPE_VERSION_INFO = typing.Tuple[int, int, int, str, int] # Maps the length of a digest to a possible hash function producing this digest -HASHFUNC_MAP = {32: md5, 40: sha1, 64: sha256} +HASHFUNC_MAP = { + length: getattr(hashlib, algorithm, None) + for length, algorithm in ((32, "md5"), (40, "sha1"), (64, "sha256")) +} def _is_bpo_43522_fixed( @@ -78,7 +81,7 @@ def _is_has_never_check_common_name_reliable( if typing.TYPE_CHECKING: from ssl import VerifyMode - from typing import Literal, TypedDict + from typing import TypedDict from .ssltransport import SSLTransport as SSLTransportType @@ -159,9 +162,13 @@ def assert_fingerprint(cert: bytes | None, fingerprint: str) -> None: fingerprint = fingerprint.replace(":", "").lower() digest_length = len(fingerprint) - hashfunc = HASHFUNC_MAP.get(digest_length) - if not hashfunc: + if digest_length not in HASHFUNC_MAP: raise SSLError(f"Fingerprint of invalid length: {fingerprint}") + hashfunc = HASHFUNC_MAP.get(digest_length) + if hashfunc is None: + raise SSLError( + f"Hash function implementation unavailable for fingerprint length: {digest_length}" + ) # We need encode() here for py32; works on py2 and p33. fingerprint_bytes = unhexlify(fingerprint.encode()) @@ -319,14 +326,9 @@ def create_urllib3_context( # Enable post-handshake authentication for TLS 1.3, see GH #1634. PHA is # necessary for conditional client cert authentication with TLS 1.3. - # The attribute is None for OpenSSL <= 1.1.0 or does not exist in older - # versions of Python. We only enable if certificate verification is enabled to work - # around Python issue #37428 - # See: https://bugs.python.org/issue37428 - if ( - cert_reqs == ssl.CERT_REQUIRED - and getattr(context, "post_handshake_auth", None) is not None - ): + # The attribute is None for OpenSSL <= 1.1.0 or does not exist when using + # an SSLContext created by pyOpenSSL. + if getattr(context, "post_handshake_auth", None) is not None: context.post_handshake_auth = True # The order of the below lines setting verify_mode and check_hostname @@ -370,7 +372,7 @@ def ssl_wrap_socket( ca_cert_dir: str | None = ..., key_password: str | None = ..., ca_cert_data: None | str | bytes = ..., - tls_in_tls: Literal[False] = ..., + tls_in_tls: typing.Literal[False] = ..., ) -> ssl.SSLSocket: ... @@ -462,10 +464,7 @@ def ssl_wrap_socket( else: context.load_cert_chain(certfile, keyfile, key_password) - try: - context.set_alpn_protocols(ALPN_PROTOCOLS) - except NotImplementedError: # Defensive: in CI, we always have set_alpn_protocols - pass + context.set_alpn_protocols(ALPN_PROTOCOLS) ssl_sock = _ssl_wrap_socket_impl(sock, context, tls_in_tls, server_hostname) return ssl_sock diff --git a/script.module.urllib3/lib/urllib3/util/ssltransport.py b/script.module.urllib3/lib/urllib3/util/ssltransport.py index fa9f2b37c5..d69ccb3420 100644 --- a/script.module.urllib3/lib/urllib3/util/ssltransport.py +++ b/script.module.urllib3/lib/urllib3/util/ssltransport.py @@ -8,12 +8,11 @@ from ..exceptions import ProxySchemeUnsupported if typing.TYPE_CHECKING: - from typing import Literal + from typing_extensions import Self from .ssl_ import _TYPE_PEER_CERT_RET, _TYPE_PEER_CERT_RET_DICT -_SelfT = typing.TypeVar("_SelfT", bound="SSLTransport") _WriteBuffer = typing.Union[bytearray, memoryview] _ReturnValue = typing.TypeVar("_ReturnValue") @@ -70,7 +69,7 @@ def __init__( # Perform initial handshake. self._ssl_io_loop(self.sslobj.do_handshake) - def __enter__(self: _SelfT) -> _SelfT: + def __enter__(self) -> Self: return self def __exit__(self, *_: typing.Any) -> None: @@ -174,12 +173,12 @@ def close(self) -> None: @typing.overload def getpeercert( - self, binary_form: Literal[False] = ... + self, binary_form: typing.Literal[False] = ... ) -> _TYPE_PEER_CERT_RET_DICT | None: ... @typing.overload - def getpeercert(self, binary_form: Literal[True]) -> bytes | None: + def getpeercert(self, binary_form: typing.Literal[True]) -> bytes | None: ... def getpeercert(self, binary_form: bool = False) -> _TYPE_PEER_CERT_RET: @@ -194,9 +193,6 @@ def cipher(self) -> tuple[str, str, int] | None: def selected_alpn_protocol(self) -> str | None: return self.sslobj.selected_alpn_protocol() - def selected_npn_protocol(self) -> str | None: - return self.sslobj.selected_npn_protocol() - def shared_ciphers(self) -> list[tuple[str, str, int]] | None: return self.sslobj.shared_ciphers() diff --git a/script.module.urllib3/lib/urllib3/util/timeout.py b/script.module.urllib3/lib/urllib3/util/timeout.py index f044625c35..4bb1be11d9 100644 --- a/script.module.urllib3/lib/urllib3/util/timeout.py +++ b/script.module.urllib3/lib/urllib3/util/timeout.py @@ -101,10 +101,6 @@ class Timeout: the case; if a server streams one byte every fifteen seconds, a timeout of 20 seconds will not trigger, even though the request will take several minutes to complete. - - If your goal is to cut off any request after a set amount of wall clock - time, consider having a second "watcher" thread to cut off a slow - request. """ #: A sentinel object representing the default timeout value