From 82cd43fb7fac7a89abcb129ee6c8a75270bfa2e1 Mon Sep 17 00:00:00 2001 From: Mike Baamonde Date: Fri, 3 Jun 2022 12:56:58 -0400 Subject: [PATCH 01/68] Update ES Python client dependency to 8.2.0. Also add the elastic-transport library as a dependency. --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 3de965e56..f8866e995 100644 --- a/setup.py +++ b/setup.py @@ -50,7 +50,8 @@ def str_from_file(name): # transitive dependencies: # urllib3: MIT # aiohttp: Apache 2.0 - "elasticsearch[async]==7.14.0", + "elasticsearch[async]==8.2.0", + "elastic-transport==8.1.2", "urllib3==1.26.9", # License: BSD "psutil==5.8.0", From e7d83cc60b890f7a1526958f808535e36f9bd126 Mon Sep 17 00:00:00 2001 From: Mike Baamonde Date: Fri, 3 Jun 2022 14:09:11 -0400 Subject: [PATCH 02/68] Update customizations of the ES client and ensure backwards compatibility. As of 8.0.0, transport logic has been moved out of `elasticsearch-py` and into a standalone library, `elastic-transport-python`. This includes substantial changes to the lower-level APIs for configuring client objects. This commit adjusts accordingly, but preserves Rally's previous behavior. This commit also reproduces the product verification behavior of `v7.14.0` of the client: https://github.com/elastic/elasticsearch-py/blob/v7.14.0/elasticsearch/transport.py#L606 As of `v8.0.0`, the client determines whether the server is Elasticsearch by checking whether HTTP responses contain the `X-elastic-product` header. If they do not, it raises an `UnsupportedProductException`. This header was only introduced in Elasticsearch `7.14.0`, however, so the client will consider any version of ES prior to `7.14.0` unsupported due to responses not including it. Because Rally needs to support versions of ES >= 6.8.0, we resurrect this earlier logic for determining the authenticity of the server, which does not rely exclusively on this header. --- esrally/async_connection.py | 65 ++--- esrally/client.py | 467 ++++++++++++++++++++++++++++++++++-- 2 files changed, 472 insertions(+), 60 deletions(-) diff --git a/esrally/async_connection.py b/esrally/async_connection.py index 09b77d2dd..526cdb759 100644 --- a/esrally/async_connection.py +++ b/esrally/async_connection.py @@ -4,7 +4,7 @@ from typing import List, Optional import aiohttp -import elasticsearch +import elastic_transport from aiohttp import BaseConnector, RequestInfo from aiohttp.client_proto import ResponseHandler from aiohttp.helpers import BaseTimerContext @@ -168,47 +168,19 @@ def response(self, path): return body -class AIOHttpConnection(elasticsearch.AIOHttpConnection): - def __init__( - self, - host="localhost", - port=None, - http_auth=None, - use_ssl=False, - ssl_assert_fingerprint=None, - headers=None, - ssl_context=None, - http_compress=None, - cloud_id=None, - api_key=None, - opaque_id=None, - loop=None, - trace_config=None, - **kwargs, - ): - super().__init__( - host=host, - port=port, - http_auth=http_auth, - use_ssl=use_ssl, - ssl_assert_fingerprint=ssl_assert_fingerprint, - # provided to the base class via `maxsize` to keep base class state consistent despite Rally - # calling the attribute differently. - maxsize=kwargs.get("max_connections", 0), - headers=headers, - ssl_context=ssl_context, - http_compress=http_compress, - cloud_id=cloud_id, - api_key=api_key, - opaque_id=opaque_id, - loop=loop, - **kwargs, - ) +class RallyAiohttpHttpNode(elastic_transport.AiohttpHttpNode): + def __init__(self, config): + super().__init__(config) + + self._loop = asyncio.get_running_loop() + self._limit = None - self._trace_configs = [trace_config] if trace_config else None - self._enable_cleanup_closed = kwargs.get("enable_cleanup_closed", True) + client_options = config._extras.get("_rally_client_options") + if client_options: + self._trace_configs = client_options.get("trace_config") + self._enable_cleanup_closed = client_options.get("enable_cleanup_closed") - static_responses = kwargs.get("static_responses") + static_responses = client_options.get("static_responses") self.use_static_responses = static_responses is not None if self.use_static_responses: @@ -223,21 +195,22 @@ def __init__( self._request_class = aiohttp.ClientRequest self._response_class = RawClientResponse - async def _create_aiohttp_session(self): - if self.loop is None: - self.loop = asyncio.get_running_loop() - + def _create_aiohttp_session(self): if self.use_static_responses: connector = StaticConnector(limit=self._limit, enable_cleanup_closed=self._enable_cleanup_closed) else: connector = aiohttp.TCPConnector( - limit=self._limit, use_dns_cache=True, ssl_context=self._ssl_context, enable_cleanup_closed=self._enable_cleanup_closed + limit=self._limit, + use_dns_cache=True, + # May need to be just ssl=self._ssl_context + ssl_context=self._ssl_context, + enable_cleanup_closed=self._enable_cleanup_closed, ) self.session = aiohttp.ClientSession( headers=self.headers, auto_decompress=True, - loop=self.loop, + loop=self._loop, cookie_jar=aiohttp.DummyCookieJar(), request_class=self._request_class, response_class=self._response_class, diff --git a/esrally/client.py b/esrally/client.py index 33bb680ee..8571accc9 100644 --- a/esrally/client.py +++ b/esrally/client.py @@ -17,14 +17,41 @@ import contextvars import logging +import re import time +import warnings +from datetime import date, datetime +from typing import Any, Iterable, Mapping, Optional import certifi +import elastic_transport import urllib3 +from elastic_transport import ( + ApiResponse, + BinaryApiResponse, + HeadApiResponse, + ListApiResponse, + ObjectApiResponse, + TextApiResponse, +) +from elastic_transport.client_utils import DEFAULT, percent_encode +from elasticsearch.compat import warn_stacklevel +from elasticsearch.exceptions import ( + HTTP_EXCEPTIONS, + ApiError, + ElasticsearchWarning, + UnsupportedProductError, +) from urllib3.connection import is_ipaddress from esrally import doc_link, exceptions -from esrally.utils import console, convert +from esrally.utils import console, convert, versions + +_WARNING_RE = re.compile(r"\"([^\"]*)\"") +# TODO: get versionstr dynamically +_COMPAT_MIMETYPE_TEMPLATE = "application/vnd.elasticsearch+%s; compatible-with=" + str("8.2.0".partition(".")[0]) +_COMPAT_MIMETYPE_RE = re.compile(r"application/(json|x-ndjson|vnd\.mapbox-vector-tile)") +_COMPAT_MIMETYPE_SUB = _COMPAT_MIMETYPE_TEMPLATE % (r"\g<1>",) class RequestContextManager: @@ -112,13 +139,22 @@ def return_raw_response(cls): class EsClientFactory: """ - Abstracts how the Elasticsearch client is created. Intended for testing. + Abstracts how the Elasticsearch client is created and customizes the client for backwards + compatibility guarantees that are broader than the library's defaults. """ - def __init__(self, hosts, client_options): - self.hosts = hosts + def __init__(self, hosts, client_options, distribution_version=None): + # We need to pass a list of connection strings to the client as of elasticsearch-py 8.0 + def host_string(host): + protocol = "https" if client_options.get("use_ssl") else "http" + return f"{protocol}://{host['host']}:{host['port']}" + + self.hosts = [host_string(h) for h in hosts] self.client_options = dict(client_options) self.ssl_context = None + # This attribute is necessary for the backwards-compatibility logic contained in + # RallySyncElasticsearch.perform_request() and RallyAsyncElasticsearch.perform_request(). + self.distribution_version = distribution_version self.logger = logging.getLogger(__name__) masked_client_options = dict(client_options) @@ -134,13 +170,15 @@ def __init__(self, hosts, client_options): import ssl self.logger.info("SSL support: on") - self.client_options["scheme"] = "https" self.ssl_context = ssl.create_default_context( ssl.Purpose.SERVER_AUTH, cafile=self.client_options.pop("ca_certs", certifi.where()) ) - if not self.client_options.pop("verify_certs", True): + # We call get() here instead of pop() in order to pass verify_certs through as a kwarg + # to the elasticsearch.Elasticsearch constructor. Setting the ssl_context's verify_mode to + # ssl.CERT_NONE is insufficient with version 8.0+ of elasticsearch-py. + if not self.client_options.get("verify_certs", True): self.logger.info("SSL certificate verification: off") # order matters to avoid ValueError: check_hostname needs a SSL context with either CERT_OPTIONAL or CERT_REQUIRED self.ssl_context.check_hostname = False @@ -189,11 +227,10 @@ def __init__(self, hosts, client_options): self.ssl_context.load_cert_chain(certfile=client_cert, keyfile=client_key) else: self.logger.info("SSL support: off") - self.client_options["scheme"] = "http" if self._is_set(self.client_options, "basic_auth_user") and self._is_set(self.client_options, "basic_auth_password"): self.logger.info("HTTP basic authentication: on") - self.client_options["http_auth"] = (self.client_options.pop("basic_auth_user"), self.client_options.pop("basic_auth_password")) + self.client_options["basic_auth"] = (self.client_options.pop("basic_auth_user"), self.client_options.pop("basic_auth_password")) else: self.logger.info("HTTP basic authentication: off") @@ -235,7 +272,245 @@ def create(self): # pylint: disable=import-outside-toplevel import elasticsearch - return elasticsearch.Elasticsearch(hosts=self.hosts, ssl_context=self.ssl_context, **self.client_options) + distro = self.distribution_version + + # This reproduces the product verification behavior of v7.14.0 of the client: + # https://github.com/elastic/elasticsearch-py/blob/v7.14.0/elasticsearch/transport.py#L606 + # + # As of v8.0.0, the client determines whether the server is Elasticsearch by checking + # whether HTTP responses contain the `X-elastic-product` header. If they do not, it raises + # an `UnsupportedProductException`. This header was only introduced in Elasticsearch 7.14.0, + # however, so the client will consider any version of ES prior to 7.14.0 unsupported due to + # responses not including it. + # + # Because Rally needs to support versions of ES >= 6.8.0, we resurrect the previous + # logic for determining the authenticity of the server, which does not rely exclusively + # on this header. + class _ProductChecker: + """Class which verifies we're connected to a supported product""" + + # States that can be returned from 'check_product' + SUCCESS = True + UNSUPPORTED_PRODUCT = 2 + UNSUPPORTED_DISTRIBUTION = 3 + + @classmethod + def raise_error(cls, state, meta, body): + # These states mean the product_check() didn't fail so do nothing. + if state in (None, True): + return + + if state == cls.UNSUPPORTED_DISTRIBUTION: + message = "The client noticed that the server is not a supported distribution of Elasticsearch" + else: # UNSUPPORTED_PRODUCT + message = "The client noticed that the server is not Elasticsearch and we do not support this unknown product" + raise UnsupportedProductError(message, meta=meta, body=body) + + @classmethod + def check_product(cls, headers, response): + # type: (dict[str, str], dict[str, str]) -> int + """Verifies that the server we're talking to is Elasticsearch. + Does this by checking HTTP headers and the deserialized + response to the 'info' API. Returns one of the states above. + """ + try: + version = response.get("version", {}) + version_number = tuple( + int(x) if x is not None else 999 + for x in re.search(r"^([0-9]+)\.([0-9]+)(?:\.([0-9]+))?", version["number"]).groups() + ) + except (KeyError, TypeError, ValueError, AttributeError): + # No valid 'version.number' field, effectively 0.0.0 + version = {} + version_number = (0, 0, 0) + + # Check all of the fields and headers for missing/valid values. + try: + bad_tagline = response.get("tagline", None) != "You Know, for Search" + bad_build_flavor = version.get("build_flavor", None) != "default" + bad_product_header = headers.get("x-elastic-product", None) != "Elasticsearch" + except (AttributeError, TypeError): + bad_tagline = True + bad_build_flavor = True + bad_product_header = True + + # 7.0-7.13 and there's a bad 'tagline' or unsupported 'build_flavor' + if (7, 0, 0) <= version_number < (7, 14, 0): + if bad_tagline: + return cls.UNSUPPORTED_PRODUCT + elif bad_build_flavor: + return cls.UNSUPPORTED_DISTRIBUTION + + elif ( + # No version or version less than 6.x + version_number < (6, 0, 0) + # 6.x and there's a bad 'tagline' + or ((6, 0, 0) <= version_number < (7, 0, 0) and bad_tagline) + # 7.14+ and there's a bad 'X-Elastic-Product' HTTP header + or ((7, 14, 0) <= version_number and bad_product_header) + ): + return cls.UNSUPPORTED_PRODUCT + + return True + + class RallySyncElasticsearch(elasticsearch.Elasticsearch): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._verified_elasticsearch = None + + if distro is not None: + self.distribution_version = versions.Version.from_string(distro) + else: + self.distribution_version = None + + def perform_request( + self, + method: str, + path: str, + *, + params: Optional[Mapping[str, Any]] = None, + headers: Optional[Mapping[str, str]] = None, + body: Optional[Any] = None, + ) -> ApiResponse[Any]: + + if body is not None: + if headers is None: + headers = {"content-type": "application/json", "accept": "application/json"} + else: + if headers.get("content-type") is None: + headers["content-type"] = "application/json" + headers["accept"] = "application/json" + + if headers: + request_headers = self._headers.copy() + request_headers.update(headers) + else: + request_headers = self._headers + + if self._verified_elasticsearch is None: + info = self.transport.perform_request(method="GET", target="/", headers=request_headers) + info_meta = info.meta + info_body = info.body + + self._verified_elasticsearch = _ProductChecker.check_product(info_meta.headers, info_body) + + if self._verified_elasticsearch is not True: + _ProductChecker.raise_error(self._verified_elasticsearch, info_meta, info_body) + + def mimetype_header_to_compat(header: str) -> None: + # Converts all parts of a Accept/Content-Type headers + # from application/X -> application/vnd.elasticsearch+X + nonlocal request_headers + mimetype = request_headers.get(header, None) + if mimetype: + request_headers[header] = _COMPAT_MIMETYPE_RE.sub(_COMPAT_MIMETYPE_SUB, mimetype) + + # Custom behavior for backwards compatibility with versions of ES that do not + # recognize the compatible-with header. + if self.distribution_version is not None and self.distribution_version >= versions.Version.from_string("8.0.0"): + mimetype_header_to_compat("Accept") + mimetype_header_to_compat("Content-Type") + + def _escape(value: Any) -> str: + """ + Escape a single value of a URL string or a query parameter. If it is a list + or tuple, turn it into a comma-separated string first. + """ + + # make sequences into comma-separated stings + if isinstance(value, (list, tuple)): + value = ",".join([_escape(item) for item in value]) + + # dates and datetimes into isoformat + elif isinstance(value, (date, datetime)): + value = value.isoformat() + + # make bools into true/false strings + elif isinstance(value, bool): + value = str(value).lower() + + elif isinstance(value, bytes): + return value.decode("utf-8", "surrogatepass") + + if not isinstance(value, str): + return str(value) + return value + + def _quote(value: Any) -> str: + return percent_encode(_escape(value), ",*") + + def _quote_query(query: Mapping[str, Any]) -> str: + return "&".join([f"{k}={_quote(v)}" for k, v in query.items()]) + + if params: + target = f"{path}?{_quote_query(params)}" + else: + target = path + + meta, resp_body = self.transport.perform_request( + method, + target, + headers=request_headers, + body=body, + request_timeout=self._request_timeout, + max_retries=self._max_retries, + retry_on_status=self._retry_on_status, + retry_on_timeout=self._retry_on_timeout, + client_meta=self._client_meta, + ) + + # HEAD with a 404 is returned as a normal response + # since this is used as an 'exists' functionality. + if not (method == "HEAD" and meta.status == 404) and ( + not 200 <= meta.status < 299 + and (self._ignore_status is DEFAULT or self._ignore_status is None or meta.status not in self._ignore_status) + ): + message = str(resp_body) + + # If the response is an error response try parsing + # the raw Elasticsearch error before raising. + if isinstance(resp_body, dict): + try: + error = resp_body.get("error", message) + if isinstance(error, dict) and "type" in error: + error = error["type"] + message = error + except (ValueError, KeyError, TypeError): + pass + + raise HTTP_EXCEPTIONS.get(meta.status, ApiError)(message=message, meta=meta, body=resp_body) + + # 'Warning' headers should be reraised as 'ElasticsearchWarning' + if "warning" in meta.headers: + warning_header = (meta.headers.get("warning") or "").strip() + warning_messages: Iterable[str] = _WARNING_RE.findall(warning_header) or (warning_header,) + stacklevel = warn_stacklevel() + for warning_message in warning_messages: + warnings.warn( + warning_message, + category=ElasticsearchWarning, + stacklevel=stacklevel, + ) + + if method == "HEAD": + response = HeadApiResponse(meta=meta) + elif isinstance(resp_body, dict): + response = ObjectApiResponse(body=resp_body, meta=meta) # type: ignore[assignment] + elif isinstance(resp_body, list): + response = ListApiResponse(body=resp_body, meta=meta) # type: ignore[assignment] + elif isinstance(resp_body, str): + response = TextApiResponse( # type: ignore[assignment] + body=resp_body, + meta=meta, + ) + elif isinstance(resp_body, bytes): + response = BinaryApiResponse(body=resp_body, meta=meta) # type: ignore[assignment] + else: + response = ApiResponse(body=resp_body, meta=meta) # type: ignore[assignment] + + return response + + return RallySyncElasticsearch(hosts=self.hosts, ssl_context=self.ssl_context, **self.client_options) def create_async(self): # pylint: disable=import-outside-toplevel @@ -271,22 +546,186 @@ async def on_request_end(session, trace_config_ctx, params): self.client_options["serializer"] = LazyJSONSerializer() self.client_options["trace_config"] = trace_config - class VerifiedAsyncTransport(elasticsearch.AsyncTransport): + client_options = self.client_options + distro = self.distribution_version + + class RallyAsyncTransport(elastic_transport.AsyncTransport): + def __init__(self, *args, **kwargs): + # We need to pass a trace config to the session that's created in + # async_connection.RallyAiohttphttpnode, which is a subclass of + # elastic_transport.AiohttpHttpNode. + # + # Its constructor only accepts an elastic_transport.NodeConfig object. + # Because we do not fully control creation of these objects , we need to + # pass the trace_config by adding it to the NodeConfig's `extras`, which + # can contain arbitrary metadata. + client_options.update({"trace_config": [trace_config]}) + node_configs = args[0] + for conf in node_configs: + extras = conf._extras + extras.update({"_rally_client_options": client_options}) + conf._extras = extras + original_args = args + new_args = (node_configs, *original_args[1:]) + + super().__init__(*new_args, node_class=esrally.async_connection.RallyAiohttpHttpNode, **kwargs) + + class RallyAsyncElasticsearch(elasticsearch.AsyncElasticsearch, RequestContextHolder): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # skip verification at this point; we've already verified this earlier with the synchronous client. # The async client is used in the hot code path and we use customized overrides (such as that we don't # parse response bodies in some cases for performance reasons, e.g. when using the bulk API). self._verified_elasticsearch = True + if distro is not None: + self.distribution_version = versions.Version.from_string(distro) + else: + self.distribution_version = None + + async def perform_request( + self, + method: str, + path: str, + *, + params: Optional[Mapping[str, Any]] = None, + headers: Optional[Mapping[str, str]] = None, + body: Optional[Any] = None, + ) -> ApiResponse[Any]: + + if body is not None: + if headers is None: + headers = {"content-type": "application/json", "accept": "application/json"} + else: + if headers.get("content-type") is None: + headers["content-type"] = "application/json" + headers["accept"] = "application/json" + + if headers: + request_headers = self._headers.copy() + request_headers.update(headers) + else: + request_headers = self._headers - class RallyAsyncElasticsearch(elasticsearch.AsyncElasticsearch, RequestContextHolder): - pass + def mimetype_header_to_compat(header: str) -> None: + # Converts all parts of a Accept/Content-Type headers + # from application/X -> application/vnd.elasticsearch+X + nonlocal request_headers + mimetype = request_headers.get(header, None) + if mimetype: + request_headers[header] = _COMPAT_MIMETYPE_RE.sub(_COMPAT_MIMETYPE_SUB, mimetype) + + if self.distribution_version is not None and self.distribution_version >= versions.Version.from_string("8.0.0"): + mimetype_header_to_compat("Accept") + mimetype_header_to_compat("Content-Type") + + def _escape(value: Any) -> str: + """ + Escape a single value of a URL string or a query parameter. If it is a list + or tuple, turn it into a comma-separated string first. + """ + + # make sequences into comma-separated stings + if isinstance(value, (list, tuple)): + value = ",".join([_escape(item) for item in value]) + + # dates and datetimes into isoformat + elif isinstance(value, (date, datetime)): + value = value.isoformat() + + # make bools into true/false strings + elif isinstance(value, bool): + value = str(value).lower() + + elif isinstance(value, bytes): + return value.decode("utf-8", "surrogatepass") + + if not isinstance(value, str): + return str(value) + return value + + def _quote(value: Any) -> str: + return percent_encode(_escape(value), ",*") + + def _quote_query(query: Mapping[str, Any]) -> str: + return "&".join([f"{k}={_quote(v)}" for k, v in query.items()]) + + if params: + target = f"{path}?{_quote_query(params)}" + else: + target = path + + meta, resp_body = await self.transport.perform_request( + method, + target, + headers=request_headers, + body=body, + request_timeout=self._request_timeout, + max_retries=self._max_retries, + retry_on_status=self._retry_on_status, + retry_on_timeout=self._retry_on_timeout, + client_meta=self._client_meta, + ) + + # HEAD with a 404 is returned as a normal response + # since this is used as an 'exists' functionality. + if not (method == "HEAD" and meta.status == 404) and ( + not 200 <= meta.status < 299 + and (self._ignore_status is DEFAULT or self._ignore_status is None or meta.status not in self._ignore_status) + ): + message = str(resp_body) + + # If the response is an error response try parsing + # the raw Elasticsearch error before raising. + if isinstance(resp_body, dict): + try: + error = resp_body.get("error", message) + if isinstance(error, dict) and "type" in error: + error = error["type"] + message = error + except (ValueError, KeyError, TypeError): + pass + + raise HTTP_EXCEPTIONS.get(meta.status, ApiError)(message=message, meta=meta, body=resp_body) + + # 'Warning' headers should be reraised as 'ElasticsearchWarning' + if "warning" in meta.headers: + warning_header = (meta.headers.get("warning") or "").strip() + warning_messages: Iterable[str] = _WARNING_RE.findall(warning_header) or (warning_header,) + stacklevel = warn_stacklevel() + for warning_message in warning_messages: + warnings.warn( + warning_message, + category=ElasticsearchWarning, + stacklevel=stacklevel, + ) + + if method == "HEAD": + response = HeadApiResponse(meta=meta) + elif isinstance(resp_body, dict): + response = ObjectApiResponse(body=resp_body, meta=meta) # type: ignore[assignment] + elif isinstance(resp_body, list): + response = ListApiResponse(body=resp_body, meta=meta) # type: ignore[assignment] + elif isinstance(resp_body, str): + response = TextApiResponse( # type: ignore[assignment] + body=resp_body, + meta=meta, + ) + elif isinstance(resp_body, bytes): + response = BinaryApiResponse(body=resp_body, meta=meta) # type: ignore[assignment] + else: + response = ApiResponse(body=resp_body, meta=meta) # type: ignore[assignment] + + return response + + # max_connections and trace_config are not valid kwargs, so we pop them + max = self.client_options.pop("max_connections") + self.client_options.pop("trace_config") return RallyAsyncElasticsearch( hosts=self.hosts, - transport_class=VerifiedAsyncTransport, - connection_class=esrally.async_connection.AIOHttpConnection, + transport_class=RallyAsyncTransport, ssl_context=self.ssl_context, + maxsize=max, **self.client_options, ) From 5eaf5664d65787c0b462047fa5a521b757f8e5d9 Mon Sep 17 00:00:00 2001 From: Mike Baamonde Date: Fri, 3 Jun 2022 14:24:19 -0400 Subject: [PATCH 03/68] Update client.wait_for_rest_layer(). The client's `transport` object will no longer have a `hosts` attribute, but will had a `node_pool` attribute. Exceptions are also different-- a TransportError will not have a `status_code` attribute, but should contain a status code as its `message`. --- esrally/client.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/esrally/client.py b/esrally/client.py index 8571accc9..c233a96b3 100644 --- a/esrally/client.py +++ b/esrally/client.py @@ -741,7 +741,7 @@ def wait_for_rest_layer(es, max_attempts=40): # assume that at least the hosts that we expect to contact should be available. Note that this is not 100% # bullet-proof as a cluster could have e.g. dedicated masters which are not contained in our list of target hosts # but this is still better than just checking for any random node's REST API being reachable. - expected_node_count = len(es.transport.hosts) + expected_node_count = len(es.transport.node_pool) logger = logging.getLogger(__name__) for attempt in range(max_attempts): logger.debug("REST API is available after %s attempts", attempt) @@ -760,12 +760,13 @@ def wait_for_rest_layer(es, max_attempts=40): else: logger.debug("Got connection error on attempt [%s]. Sleeping...", attempt) time.sleep(3) + # TODO: distinguish between TransportError and ApiError except elasticsearch.TransportError as e: # cluster block, x-pack not initialized yet, our wait condition is not reached - if e.status_code in (503, 401, 408): - logger.debug("Got status code [%s] on attempt [%s]. Sleeping...", e.status_code, attempt) + if e.message in (503, 401, 408): + logger.debug("Got status code [%s] on attempt [%s]. Sleeping...", e.message, attempt) time.sleep(3) else: - logger.warning("Got unexpected status code [%s] on attempt [%s].", e.status_code, attempt) + logger.warning("Got unexpected status code [%s] on attempt [%s].", e.message, attempt) raise e return False From 3ed00b899b462dacdcf30c66164df6b59dddbda3 Mon Sep 17 00:00:00 2001 From: Mike Baamonde Date: Fri, 3 Jun 2022 14:27:57 -0400 Subject: [PATCH 04/68] The `_normalize_hosts` function has been removed from the client. --- esrally/utils/opts.py | 48 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/esrally/utils/opts.py b/esrally/utils/opts.py index 663481924..84f510aec 100644 --- a/esrally/utils/opts.py +++ b/esrally/utils/opts.py @@ -146,6 +146,51 @@ def all_options(self): return self.parsed_options +def _normalize_hosts(hosts): + # pylint: disable=import-outside-toplevel + from urllib.parse import unquote, urlparse + + string_types = str, bytes + # if hosts are empty, just defer to defaults down the line + if hosts is None: + return [{}] + + # passed in just one string + if isinstance(hosts, string_types): + hosts = [hosts] + + out = [] + # normalize hosts to dicts + for host in hosts: + if isinstance(host, string_types): + if "://" not in host: + host = "//%s" % host + + parsed_url = urlparse(host) + h = {"host": parsed_url.hostname} + + if parsed_url.port: + h["port"] = parsed_url.port + + if parsed_url.scheme == "https": + h["port"] = parsed_url.port or 443 + h["use_ssl"] = True + + if parsed_url.username or parsed_url.password: + h["http_auth"] = "%s:%s" % ( + unquote(parsed_url.username), + unquote(parsed_url.password), + ) + + if parsed_url.path and parsed_url.path != "/": + h["url_prefix"] = parsed_url.path + + out.append(h) + else: + out.append(host) + return out + + class TargetHosts(ConnectOptions): DEFAULT = "default" @@ -163,9 +208,6 @@ def normalize_to_dict(arg): This is needed to support backwards compatible --target-hosts for single clusters that are not defined as a json string or file. """ - # pylint: disable=import-outside-toplevel - from elasticsearch.client import _normalize_hosts - return {TargetHosts.DEFAULT: _normalize_hosts(arg)} self.parsed_options = to_dict(self.argvalue, default_parser=normalize_to_dict) From c7d58583540b8f613941dc5e0532ff437d870eae Mon Sep 17 00:00:00 2001 From: Mike Baamonde Date: Fri, 3 Jun 2022 14:28:41 -0400 Subject: [PATCH 05/68] Update client unit tests. --- tests/client_test.py | 77 ++++++++++++++++---------------------------- 1 file changed, 28 insertions(+), 49 deletions(-) diff --git a/tests/client_test.py b/tests/client_test.py index 70e074cdf..0acf29af8 100644 --- a/tests/client_test.py +++ b/tests/client_test.py @@ -31,7 +31,6 @@ from pytest_httpserver import HTTPServer from esrally import client, doc_link, exceptions -from esrally.async_connection import AIOHttpConnection from esrally.utils import console @@ -46,10 +45,9 @@ def test_create_http_connection(self): f = client.EsClientFactory(hosts, client_options) - assert f.hosts == hosts + assert f.hosts == ["http://localhost:9200"] assert f.ssl_context is None - assert f.client_options["scheme"] == "http" - assert "http_auth" not in f.client_options + assert "basic_auth" not in f.client_options assert client_options == original_client_options @@ -59,7 +57,7 @@ def test_create_https_connection_verify_server(self, mocked_load_cert_chain): client_options = { "use_ssl": True, "verify_certs": True, - "http_auth": ("user", "password"), + "basic_auth": ("user", "password"), } # make a copy so we can verify later that the factory did not modify it original_client_options = deepcopy(client_options) @@ -79,14 +77,13 @@ def test_create_https_connection_verify_server(self, mocked_load_cert_chain): not mocked_load_cert_chain.called ), "ssl_context.load_cert_chain should not have been called as we have not supplied client certs" - assert f.hosts == hosts + assert f.hosts == ["https://localhost:9200"] assert f.ssl_context.check_hostname assert f.ssl_context.verify_mode == ssl.CERT_REQUIRED - assert f.client_options["scheme"] == "https" - assert f.client_options["http_auth"] == ("user", "password") + assert f.client_options["basic_auth"] == ("user", "password") + assert f.client_options["verify_certs"] assert "use_ssl" not in f.client_options - assert "verify_certs" not in f.client_options assert "ca_certs" not in f.client_options assert client_options == original_client_options @@ -97,7 +94,7 @@ def test_create_https_connection_verify_self_signed_server_and_client_certificat client_options = { "use_ssl": True, "verify_certs": True, - "http_auth": ("user", "password"), + "basic_auth": ("user", "password"), "ca_certs": os.path.join(self.cwd, "utils/resources/certs/ca.crt"), "client_cert": os.path.join(self.cwd, "utils/resources/certs/client.crt"), "client_key": os.path.join(self.cwd, "utils/resources/certs/client.key"), @@ -121,14 +118,14 @@ def test_create_https_connection_verify_self_signed_server_and_client_certificat keyfile=client_options["client_key"], ) - assert f.hosts == hosts + assert f.hosts == ["https://localhost:9200"] assert f.ssl_context.check_hostname assert f.ssl_context.verify_mode == ssl.CERT_REQUIRED - assert f.client_options["scheme"] == "https" - assert f.client_options["http_auth"] == ("user", "password") + assert f.client_options["basic_auth"] == ("user", "password") + assert f.client_options["verify_certs"] + assert "use_ssl" not in f.client_options - assert "verify_certs" not in f.client_options assert "ca_certs" not in f.client_options assert "client_cert" not in f.client_options assert "client_key" not in f.client_options @@ -141,7 +138,7 @@ def test_create_https_connection_only_verify_self_signed_server_certificate(self client_options = { "use_ssl": True, "verify_certs": True, - "http_auth": ("user", "password"), + "basic_auth": ("user", "password"), "ca_certs": os.path.join(self.cwd, "utils/resources/certs/ca.crt"), } # make a copy so we can verify later that the factory did not modify it @@ -161,14 +158,13 @@ def test_create_https_connection_only_verify_self_signed_server_certificate(self assert ( not mocked_load_cert_chain.called ), "ssl_context.load_cert_chain should not have been called as we have not supplied client certs" - assert f.hosts == hosts + assert f.hosts == ["https://localhost:9200"] assert f.ssl_context.check_hostname assert f.ssl_context.verify_mode == ssl.CERT_REQUIRED - assert f.client_options["scheme"] == "https" - assert f.client_options["http_auth"] == ("user", "password") + assert f.client_options["basic_auth"] == ("user", "password") + assert f.client_options["verify_certs"] assert "use_ssl" not in f.client_options - assert "verify_certs" not in f.client_options assert "ca_certs" not in f.client_options assert client_options == original_client_options @@ -178,7 +174,7 @@ def test_raises_error_when_only_one_of_client_cert_and_client_key_defined(self): client_options = { "use_ssl": True, "verify_certs": True, - "http_auth": ("user", "password"), + "basic_auth": ("user", "password"), "ca_certs": os.path.join(self.cwd, "utils/resources/certs/ca.crt"), } @@ -233,14 +229,14 @@ def test_create_https_connection_unverified_certificate(self, mocked_load_cert_c not mocked_load_cert_chain.called ), "ssl_context.load_cert_chain should not have been called as we have not supplied client certs" - assert f.hosts == hosts + assert f.hosts == ["https://localhost:9200"] assert not f.ssl_context.check_hostname assert f.ssl_context.verify_mode == ssl.CERT_NONE - assert f.client_options["scheme"] == "https" - assert f.client_options["http_auth"] == ("user", "password") + assert f.client_options["basic_auth"] == ("user", "password") + assert not f.client_options["verify_certs"] + assert "use_ssl" not in f.client_options - assert "verify_certs" not in f.client_options assert "basic_auth_user" not in f.client_options assert "basic_auth_password" not in f.client_options @@ -252,7 +248,7 @@ def test_create_https_connection_unverified_certificate_present_client_certifica client_options = { "use_ssl": True, "verify_certs": False, - "http_auth": ("user", "password"), + "basic_auth": ("user", "password"), "client_cert": os.path.join(self.cwd, "utils/resources/certs/client.crt"), "client_key": os.path.join(self.cwd, "utils/resources/certs/client.key"), } @@ -274,14 +270,13 @@ def test_create_https_connection_unverified_certificate_present_client_certifica keyfile=client_options["client_key"], ) - assert f.hosts == hosts + assert f.hosts == ["https://localhost:9200"] assert not f.ssl_context.check_hostname assert f.ssl_context.verify_mode == ssl.CERT_NONE - assert f.client_options["scheme"] == "https" - assert f.client_options["http_auth"] == ("user", "password") + assert f.client_options["basic_auth"] == ("user", "password") assert "use_ssl" not in f.client_options - assert "verify_certs" not in f.client_options + assert not f.client_options["verify_certs"] assert "basic_auth_user" not in f.client_options assert "basic_auth_password" not in f.client_options assert "ca_certs" not in f.client_options @@ -313,7 +308,7 @@ def test_check_hostname_false_when_host_is_ip(self): } f = client.EsClientFactory(hosts, client_options) - assert f.hosts == hosts + assert f.hosts == ["https://127.0.0.1:9200"] assert f.ssl_context.check_hostname is False assert f.ssl_context.verify_mode == ssl.CERT_REQUIRED @@ -413,10 +408,7 @@ async def test_propagates_nested_context(self): class TestRestLayer: @mock.patch("elasticsearch.Elasticsearch") def test_successfully_waits_for_rest_layer(self, es): - es.transport.hosts = [ - {"host": "node-a.example.org", "port": 9200}, - {"host": "node-b.example.org", "port": 9200}, - ] + es.transport.node_pool.__len__ = mock.Mock(return_value=2) assert client.wait_for_rest_layer(es, max_attempts=3) es.cluster.health.assert_has_calls( [ @@ -447,21 +439,8 @@ def test_dont_retry_eternally_on_transport_errors(self, es, sleep): @mock.patch("elasticsearch.Elasticsearch") def test_ssl_error(self, es): es.cluster.health.side_effect = elasticsearch.ConnectionError( - "N/A", - "[SSL: UNKNOWN_PROTOCOL] unknown protocol (_ssl.c:719)", - urllib3.exceptions.SSLError("[SSL: UNKNOWN_PROTOCOL] unknown protocol (_ssl.c:719)"), + message="N/A", + errors=[urllib3.exceptions.SSLError("[SSL: UNKNOWN_PROTOCOL] unknown protocol (_ssl.c:719)")], ) with pytest.raises(exceptions.SystemSetupError, match="Could not connect to cluster via https. Is this an https endpoint?"): client.wait_for_rest_layer(es, max_attempts=3) - - -class TestAsyncConnection: - @pytest.mark.asyncio - async def test_enable_cleanup_close(self): - connection = AIOHttpConnection() - # pylint: disable=protected-access - assert connection._enable_cleanup_closed is True - - connection = AIOHttpConnection(enable_cleanup_closed=False) - # pylint: disable=protected-access - assert connection._enable_cleanup_closed is False From 99f8d2b361091a4fbcc118bc748fb15f6da60e7f Mon Sep 17 00:00:00 2001 From: Mike Baamonde Date: Fri, 3 Jun 2022 14:30:56 -0400 Subject: [PATCH 06/68] Update metrics code. The ES client now requires kwargs, so we ensure that we do not use any positional arguments. The client also now distinguishes between a `TransportError` and an `ApiError`, and `ElasticsearchException` has been removed. We adjust accordingly, albeit with a TODO to revisit this. --- esrally/metrics.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/esrally/metrics.py b/esrally/metrics.py index 2b8f9f1b7..f50c501f2 100644 --- a/esrally/metrics.py +++ b/esrally/metrics.py @@ -31,6 +31,7 @@ from http.client import responses import tabulate +from elasticsearch import ApiError, TransportError from esrally import client, config, exceptions, paths, time, version from esrally.utils import console, convert, io, versions @@ -57,16 +58,17 @@ def probe_version(self): raise exceptions.RallyError(msg) def put_template(self, name, template): - return self.guarded(self._client.indices.put_template, name=name, body=template) + tmpl = json.loads(template) + return self.guarded(self._client.indices.put_template, name=name, **tmpl) def template_exists(self, name): - return self.guarded(self._client.indices.exists_template, name) + return self.guarded(self._client.indices.exists_template, name=name) def delete_template(self, name): - self.guarded(self._client.indices.delete_template, name) + self.guarded(self._client.indices.delete_template, name=name) def get_index(self, name): - return self.guarded(self._client.indices.get, name) + return self.guarded(self._client.indices.get, name=name) def create_index(self, index): # ignore 400 cause by IndexAlreadyExistsException when creating an index @@ -146,12 +148,13 @@ def guarded(self, target, *args, **kwargs): ) self.logger.exception(msg) raise exceptions.SystemSetupError(msg) - except elasticsearch.TransportError as e: - if e.status_code in (502, 503, 504, 429) and execution_count < max_execution_count: + # TODO: Refactor error handling to better account for TransportError/ApiError subtleties + except TransportError as e: + if e.message in (502, 503, 504, 429) and execution_count < max_execution_count: self.logger.debug( "%s (code: %d) in attempt [%d/%d]. Sleeping for [%f] seconds.", - responses[e.status_code], - e.status_code, + responses[e.message], + e.message, execution_count, max_execution_count, time_to_sleep, @@ -166,7 +169,7 @@ def guarded(self, target, *args, **kwargs): self.logger.exception(msg) raise exceptions.RallyError(msg) - except elasticsearch.exceptions.ElasticsearchException: + except ApiError: node = self._client.transport.hosts[0] msg = ( "An unknown error occurred while running the operation [%s] against your Elasticsearch metrics store on host [%s] " From b4a63fd06da4537c0bc70fe8d8f3991b5dac6af8 Mon Sep 17 00:00:00 2001 From: Mike Baamonde Date: Fri, 3 Jun 2022 14:35:28 -0400 Subject: [PATCH 07/68] Update metrics unit tests. --- tests/metrics_test.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/metrics_test.py b/tests/metrics_test.py index 9a8b4e885..0f5ac652d 100644 --- a/tests/metrics_test.py +++ b/tests/metrics_test.py @@ -190,7 +190,7 @@ def raise_connection_error(): def test_raises_sytem_setup_error_on_authentication_problems(self): def raise_authentication_error(): - raise elasticsearch.exceptions.AuthenticationException("unit-test") + raise elasticsearch.exceptions.AuthenticationException(meta=None, body=None, message="unit-test") client = metrics.EsClient(self.ClientMock([{"host": "127.0.0.1", "port": "9243"}])) @@ -203,7 +203,7 @@ def raise_authentication_error(): def test_raises_sytem_setup_error_on_authorization_problems(self): def raise_authorization_error(): - raise elasticsearch.exceptions.AuthorizationException("unit-test") + raise elasticsearch.exceptions.AuthorizationException(meta=None, body=None, message="unit-test") client = metrics.EsClient(self.ClientMock([{"host": "127.0.0.1", "port": "9243"}])) @@ -217,14 +217,14 @@ def raise_authorization_error(): def test_raises_rally_error_on_unknown_problems(self): def raise_unknown_error(): - raise elasticsearch.exceptions.SerializationError("unit-test") + raise elasticsearch.exceptions.SerializationError(message="unit-test") client = metrics.EsClient(self.ClientMock([{"host": "127.0.0.1", "port": "9243"}])) with pytest.raises(exceptions.RallyError) as ctx: client.guarded(raise_unknown_error) assert ctx.value.args[0] == ( - "An unknown error occurred while running the operation [raise_unknown_error] against your Elasticsearch metrics " + "A transport error occurred while running the operation [raise_unknown_error] against your Elasticsearch metrics " "store on host [127.0.0.1] at port [9243]." ) From 48462232a8f6154416de026385556af34f84f1dd Mon Sep 17 00:00:00 2001 From: Mike Baamonde Date: Fri, 3 Jun 2022 14:35:52 -0400 Subject: [PATCH 08/68] Ensure that ES client method calls in telemetry use kwargs only. --- esrally/telemetry.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/esrally/telemetry.py b/esrally/telemetry.py index 3f9284bdf..b0e8b29bb 100644 --- a/esrally/telemetry.py +++ b/esrally/telemetry.py @@ -461,7 +461,7 @@ def record(self): ccr_stats_api_endpoint = "/_ccr/stats" filter_path = "follow_stats" stats = self.client.transport.perform_request( - "GET", ccr_stats_api_endpoint, params={"human": "false", "filter_path": filter_path} + method="GET", path=ccr_stats_api_endpoint, params={"human": "false", "filter_path": filter_path} ) except elasticsearch.TransportError: msg = "A transport error occurred while collecting CCR stats from the endpoint [{}?filter_path={}] on cluster [{}]".format( @@ -1233,7 +1233,7 @@ def record(self): # we don't use the existing client support (searchable_snapshots.stats()) # as the API is deliberately undocumented and might change: # https://www.elastic.co/guide/en/elasticsearch/reference/current/searchable-snapshots-api-stats.html - stats = self.client.transport.perform_request("GET", stats_api_endpoint, params={"level": level}) + stats = self.client.transport.perform_request(method="GET", path=stats_api_endpoint, params={"level": level}) except elasticsearch.NotFoundError as e: if "No searchable snapshots indices found" in e.info.get("error").get("reason"): self.logger.info( @@ -2276,7 +2276,9 @@ def on_benchmark_stop(self): for index in self.indices.split(","): self.logger.debug("Gathering disk usage for [%s]", index) try: - response = self.client.transport.perform_request("POST", f"/{index}/_disk_usage", params={"run_expensive_tasks": "true"}) + response = self.client.transport.perform_request( + method="POST", path=f"/{index}/_disk_usage", params={"run_expensive_tasks": "true"} + ) except elasticsearch.RequestError: msg = f"A transport error occurred while collecting disk usage for {index}" self.logger.exception(msg) From 02283264f2123539622fbd97fc9607988f14bc12 Mon Sep 17 00:00:00 2001 From: Mike Baamonde Date: Fri, 3 Jun 2022 14:36:22 -0400 Subject: [PATCH 09/68] Update telemetry unit tests. --- tests/telemetry_test.py | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/tests/telemetry_test.py b/tests/telemetry_test.py index a1052320b..26af52b3c 100644 --- a/tests/telemetry_test.py +++ b/tests/telemetry_test.py @@ -212,7 +212,7 @@ def __call__(self, *args, **kwargs): class TransportClient: - def __init__(self, response=None, force_error=False, error=elasticsearch.TransportError): + def __init__(self, response=None, force_error=False, error=elasticsearch.TransportError(message="transport error")): self._response = response self._force_error = force_error self._error = error @@ -4242,14 +4242,14 @@ def test_uses_indices_by_default(self, es): cfg = create_config() metrics_store = metrics.EsMetricsStore(cfg) tc = TransportClient(response={"_shards": {"failed": 0}}) - es = Client(transport_client=tc) - device = telemetry.DiskUsageStats({}, es, metrics_store, index_names=["foo", "bar"], data_stream_names=[]) + c = Client(transport_client=tc) + device = telemetry.DiskUsageStats({}, c, metrics_store, index_names=["foo", "bar"], data_stream_names=[]) t = telemetry.Telemetry(enabled_devices=[device.command], devices=[device]) t.on_benchmark_start() t.on_benchmark_stop() - assert tc.args == [ - ("POST", "/foo/_disk_usage"), - ("POST", "/bar/_disk_usage"), + assert tc.kwargs == [ + {"method": "POST", "path": "/foo/_disk_usage", "params": {"run_expensive_tasks": "true"}}, + {"method": "POST", "path": "/bar/_disk_usage", "params": {"run_expensive_tasks": "true"}}, ] @mock.patch("elasticsearch.Elasticsearch") @@ -4262,9 +4262,9 @@ def test_uses_data_streams_by_default(self, es): t = telemetry.Telemetry(enabled_devices=[device.command], devices=[device]) t.on_benchmark_start() t.on_benchmark_stop() - assert tc.args == [ - ("POST", "/foo/_disk_usage"), - ("POST", "/bar/_disk_usage"), + assert tc.kwargs == [ + {"method": "POST", "path": "/foo/_disk_usage", "params": {"run_expensive_tasks": "true"}}, + {"method": "POST", "path": "/bar/_disk_usage", "params": {"run_expensive_tasks": "true"}}, ] @mock.patch("elasticsearch.Elasticsearch") @@ -4279,7 +4279,10 @@ def test_uses_indices_param_if_specified_instead_of_index_names(self, es): t = telemetry.Telemetry(enabled_devices=[device.command], devices=[device]) t.on_benchmark_start() t.on_benchmark_stop() - assert tc.args == [("POST", "/foo/_disk_usage"), ("POST", "/bar/_disk_usage")] + assert tc.kwargs == [ + {"method": "POST", "path": "/foo/_disk_usage", "params": {"run_expensive_tasks": "true"}}, + {"method": "POST", "path": "/bar/_disk_usage", "params": {"run_expensive_tasks": "true"}}, + ] @mock.patch("elasticsearch.Elasticsearch") def test_uses_indices_param_if_specified_instead_of_data_stream_names(self, es): @@ -4293,7 +4296,10 @@ def test_uses_indices_param_if_specified_instead_of_data_stream_names(self, es): t = telemetry.Telemetry(enabled_devices=[device.command], devices=[device]) t.on_benchmark_start() t.on_benchmark_stop() - assert tc.args == [("POST", "/foo/_disk_usage"), ("POST", "/bar/_disk_usage")] + assert tc.kwargs == [ + {"method": "POST", "path": "/foo/_disk_usage", "params": {"run_expensive_tasks": "true"}}, + {"method": "POST", "path": "/bar/_disk_usage", "params": {"run_expensive_tasks": "true"}}, + ] @mock.patch("esrally.metrics.EsMetricsStore.put_value_cluster_level") def test_error_on_retrieval_does_not_store_metrics(self, metrics_store_cluster_level): @@ -4302,7 +4308,7 @@ def test_error_on_retrieval_does_not_store_metrics(self, metrics_store_cluster_l es = Client( transport_client=TransportClient( force_error=True, - error=elasticsearch.RequestError, + error=elasticsearch.RequestError(message="400", meta=None, body=None), ) ) device = telemetry.DiskUsageStats({}, es, metrics_store, index_names=["foo"], data_stream_names=[]) @@ -4319,7 +4325,7 @@ def test_no_indices_fails(self, metrics_store_cluster_level): es = Client( transport_client=TransportClient( force_error=True, - error=elasticsearch.RequestError, + error=elasticsearch.RequestError(message="400", meta=None, body=None), ) ) device = telemetry.DiskUsageStats({}, es, metrics_store, index_names=[], data_stream_names=[]) @@ -4333,10 +4339,7 @@ def test_missing_all_fails(self, metrics_store_cluster_level): cfg = create_config() metrics_store = metrics.EsMetricsStore(cfg) es = Client( - transport_client=TransportClient( - force_error=True, - error=elasticsearch.RequestError, - ) + transport_client=TransportClient(force_error=True, error=elasticsearch.RequestError(message="400", meta=None, body=None)) ) device = telemetry.DiskUsageStats({}, es, metrics_store, index_names=["foo", "bar"], data_stream_names=[]) t = telemetry.Telemetry(enabled_devices=[device.command], devices=[device]) @@ -4365,7 +4368,7 @@ def perform_request(self, *args, **kwargs): not_found_transport_client = TransportClient( force_error=True, - error=elasticsearch.NotFoundError, + error=elasticsearch.NotFoundError(message="404", meta=None, body=None), ) successful_client = TransportClient( response={ From 6f43b03af61884d67c3448ab508f4947e0745eef Mon Sep 17 00:00:00 2001 From: Mike Baamonde Date: Fri, 3 Jun 2022 14:39:13 -0400 Subject: [PATCH 10/68] Update tracker: use kwargs, move away from `ElasticsearchException`. --- esrally/tracker/index.py | 2 +- esrally/tracker/tracker.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/esrally/tracker/index.py b/esrally/tracker/index.py index 94d8817ea..f445d8683 100644 --- a/esrally/tracker/index.py +++ b/esrally/tracker/index.py @@ -65,7 +65,7 @@ def extract_index_mapping_and_settings(client, index_pattern): results = {} logger = logging.getLogger(__name__) # the response might contain multiple indices if a wildcard was provided - response = client.indices.get(index_pattern) + response = client.indices.get(index=index_pattern) for index, details in response.items(): valid, reason = is_valid(index) if valid: diff --git a/esrally/tracker/tracker.py b/esrally/tracker/tracker.py index 0882e6a25..104198460 100644 --- a/esrally/tracker/tracker.py +++ b/esrally/tracker/tracker.py @@ -18,7 +18,7 @@ import logging import os -from elasticsearch import ElasticsearchException +from elasticsearch import ApiError, TransportError from jinja2 import Environment, FileSystemLoader from esrally import PROGRAM_NAME @@ -43,7 +43,7 @@ def extract_mappings_and_corpora(client, output_path, indices_to_extract): for index_name in indices_to_extract: try: indices += index.extract(client, output_path, index_name) - except ElasticsearchException: + except (ApiError, TransportError): logging.getLogger(__name__).exception("Failed to extract index [%s]", index_name) # That list only contains valid indices (with index patterns already resolved) From 1f566fb59588fa8dd4fc30167c0faf5b2697381d Mon Sep 17 00:00:00 2001 From: Mike Baamonde Date: Fri, 3 Jun 2022 14:39:44 -0400 Subject: [PATCH 11/68] Update tracker/corpus unit tests. --- tests/tracker/corpus_test.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/tracker/corpus_test.py b/tests/tracker/corpus_test.py index 007fd833d..6ff514217 100644 --- a/tests/tracker/corpus_test.py +++ b/tests/tracker/corpus_test.py @@ -31,6 +31,9 @@ def test_extract(client, mo): doc = {"field1": "stuff", "field2": "things"} doc_data = serialize_doc(doc) client.count.return_value = {"count": 1001} + # the scan helper calls client.options(), which returns a new client instance + # we override this behavior here to facilitate mocking + client.options.return_value = client client.search.return_value = { "_scroll_id": "uohialjrknf", "_shards": { From 9bdb3615f215101fd4001bc8894920c8861003e0 Mon Sep 17 00:00:00 2001 From: Mike Baamonde Date: Fri, 3 Jun 2022 14:40:57 -0400 Subject: [PATCH 12/68] Ensure that the load driver creates ES client objects correctly. --- esrally/driver/driver.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/esrally/driver/driver.py b/esrally/driver/driver.py index 255a8014c..aa46df6a8 100644 --- a/esrally/driver/driver.py +++ b/esrally/driver/driver.py @@ -580,13 +580,16 @@ def __init__(self, target, config, es_client_factory_class=client.EsClientFactor def create_es_clients(self): all_hosts = self.config.opts("client", "hosts").all_hosts + distribution_version = self.config.opts("mechanic", "distribution.version", mandatory=False) es = {} for cluster_name, cluster_hosts in all_hosts.items(): all_client_options = self.config.opts("client", "options").all_client_options cluster_client_options = dict(all_client_options[cluster_name]) # Use retries to avoid aborts on long living connections for telemetry devices - cluster_client_options["retry-on-timeout"] = True - es[cluster_name] = self.es_client_factory(cluster_hosts, cluster_client_options).create() + cluster_client_options["retry_on_timeout"] = True + es[cluster_name] = self.es_client_factory( + cluster_hosts, cluster_client_options, distribution_version=distribution_version + ).create() return es def prepare_telemetry(self, es, enable, index_names, data_stream_names): @@ -1656,16 +1659,22 @@ def _logging_exception_handler(self, loop, context): self.logger.error("Uncaught exception in event loop: %s", context) async def run(self): - def es_clients(all_hosts, all_client_options): + def es_clients(all_hosts, all_client_options, distribution_version): es = {} for cluster_name, cluster_hosts in all_hosts.items(): - es[cluster_name] = client.EsClientFactory(cluster_hosts, all_client_options[cluster_name]).create_async() + es[cluster_name] = client.EsClientFactory( + cluster_hosts, all_client_options[cluster_name], distribution_version=distribution_version + ).create_async() return es # Properly size the internal connection pool to match the number of expected clients but allow the user # to override it if needed. client_count = len(self.task_allocations) - es = es_clients(self.cfg.opts("client", "hosts").all_hosts, self.cfg.opts("client", "options").with_max_connections(client_count)) + es = es_clients( + self.cfg.opts("client", "hosts").all_hosts, + self.cfg.opts("client", "options").with_max_connections(client_count), + self.cfg.opts("mechanic", "distribution.version", mandatory=False), + ) self.logger.info("Task assertions enabled: %s", str(self.assertions_enabled)) runner.enable_assertions(self.assertions_enabled) From c73e58ca7dccab8c8dafea4560533aa0e0435ca9 Mon Sep 17 00:00:00 2001 From: Mike Baamonde Date: Fri, 3 Jun 2022 14:42:43 -0400 Subject: [PATCH 13/68] Handle exceptions from the ES client in the load driver. This migrates to the new `TransportError`/`ApiError` approach, but it's a work in progress and needs another pass before it's considered robust. --- esrally/driver/driver.py | 41 ++++++++++++++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 8 deletions(-) diff --git a/esrally/driver/driver.py b/esrally/driver/driver.py index aa46df6a8..7e1050c3e 100644 --- a/esrally/driver/driver.py +++ b/esrally/driver/driver.py @@ -28,6 +28,7 @@ import time from dataclasses import dataclass from enum import Enum +from io import BytesIO from typing import Callable import thespian.actors @@ -1903,6 +1904,7 @@ async def execute_single(runner, es, params, on_error): total_ops = 1 total_ops_unit = "ops" request_meta_data = {"success": True} + # TODO: This all needs to be refactored except elasticsearch.TransportError as e: # we *specifically* want to distinguish connection refused (a node died?) from connection timeouts # pylint: disable=unidiomatic-typecheck @@ -1913,19 +1915,42 @@ async def execute_single(runner, es, params, on_error): total_ops_unit = "ops" request_meta_data = {"success": False, "error-type": "transport"} # The ES client will sometimes return string like "N/A" or "TIMEOUT" for connection errors. - if isinstance(e.status_code, int): - request_meta_data["http-status"] = e.status_code + if isinstance(e.message, int): + request_meta_data["http-status"] = e.message # connection timeout errors don't provide a helpful description if isinstance(e, elasticsearch.ConnectionTimeout): request_meta_data["error-description"] = "network connection timed out" - elif e.info: - request_meta_data["error-description"] = f"{e.error} ({e.info})" + # elif e.info: + # request_meta_data["error-description"] = f"{e.error} ({e.info})" else: - if isinstance(e.error, bytes): - error_description = e.error.decode("utf-8") - else: - error_description = str(e.error) + error_description = str(e) request_meta_data["error-description"] = error_description + except elasticsearch.ApiError as e: + total_ops = 0 + total_ops_unit = "ops" + request_meta_data = {"success": False, "error-type": "api"} + + if isinstance(e.message, bytes): + error_message = e.message.decode("utf-8") + elif isinstance(e.message, BytesIO): + error_message = e.message.read().decode("utf-8") + else: + error_message = e.message + + if isinstance(e.body, bytes): + error_body = e.body.decode("utf-8") + elif isinstance(e.body, BytesIO): + error_body = e.body.read().decode("utf-8") + else: + error_body = e.body + + if error_body: + error_message += f" ({error_body})" + error_description = error_message + + request_meta_data["error-description"] = error_description + if e.meta.status: + request_meta_data["http-status"] = e.meta.status except KeyError as e: logging.getLogger(__name__).exception("Cannot execute runner [%s]; most likely due to missing parameters.", str(runner)) msg = "Cannot execute [%s]. Provided parameters are: %s. Error: [%s]." % (str(runner), list(params.keys()), str(e)) From e4a547e332f67e016492553e4b17a769b5e3dd4a Mon Sep 17 00:00:00 2001 From: Mike Baamonde Date: Fri, 3 Jun 2022 14:46:25 -0400 Subject: [PATCH 14/68] Set `request_timeout` via `es.options()` instead of API method kwarg. This is the new, preferred way of doing this, and these requests fail otherwise. We should genericize this behavior across runners. --- esrally/driver/runner.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/esrally/driver/runner.py b/esrally/driver/runner.py index 77755de74..b6ac7b902 100644 --- a/esrally/driver/runner.py +++ b/esrally/driver/runner.py @@ -200,6 +200,7 @@ def _default_kw_params(self, params): # filter Nones return dict(filter(lambda kv: kv[1] is not None, full_result.items())) + # TODO: have this function call options() on the es instance? def _transport_request_params(self, params): request_params = params.get("request-params", {}) request_timeout = params.get("request-timeout") @@ -726,7 +727,7 @@ class NodeStats(Runner): async def __call__(self, es, params): request_timeout = params.get("request-timeout") - await es.nodes.stats(metric="_all", request_timeout=request_timeout) + await es.options(request_timeout=request_timeout).nodes.stats(metric="_all") def __repr__(self, *args, **kwargs): return "node-stats" @@ -823,6 +824,9 @@ def __init__(self): async def __call__(self, es, params): request_params, headers = self._transport_request_params(params) + request_timeout = request_params.pop("request_timeout", None) + if request_timeout is not None: + es.options(request_timeout=request_timeout) # Mandatory to ensure it is always provided. This is especially important when this runner is used in a # composite context where there is no actual parameter source and the entire request structure must be provided # by the composite's parameter source. From d3c8c1bcd0ed5bdd04d7ec4eb893e8fcba8ab9ed Mon Sep 17 00:00:00 2001 From: Mike Baamonde Date: Fri, 3 Jun 2022 14:48:55 -0400 Subject: [PATCH 15/68] Ensure runners use kwargs when calling ES client methods. --- esrally/driver/runner.py | 187 +++++++++++++++++---------------------- 1 file changed, 79 insertions(+), 108 deletions(-) diff --git a/esrally/driver/runner.py b/esrally/driver/runner.py index b6ac7b902..6d820c018 100644 --- a/esrally/driver/runner.py +++ b/esrally/driver/runner.py @@ -954,8 +954,12 @@ async def _scroll_query(es, params): took = props.get("took", 0) all_results_collected = (size is not None and hits < size) or hits == 0 else: - r = await es.transport.perform_request( - "GET", "/_search/scroll", body={"scroll_id": scroll_id, "scroll": "10s"}, params=request_params, headers=headers + r = await es.perform_request( + method="GET", + path="/_search/scroll", + body={"scroll_id": scroll_id, "scroll": "10s"}, + params=request_params, + headers=headers, ) props = parse(r, ["timed_out", "took"], ["hits.hits"]) timed_out = timed_out or props.get("timed_out", False) @@ -1011,7 +1015,7 @@ async def _raw_search(self, es, doc_type, index, body, params, headers=None): components.append(doc_type) components.append("_search") path = "/".join(components) - return await es.transport.perform_request("GET", "/" + path, params=params, body=body, headers=headers) + return await es.perform_request(method="GET", path=f"/{path}", params=params, body=body, headers=headers) def _query_headers(self, params): # reduces overhead due to decompression of very large responses @@ -1183,7 +1187,7 @@ async def __call__(self, es, params): data_streams = mandatory(params, "data-streams", self) request_params = mandatory(params, "request-params", self) for data_stream in data_streams: - await es.indices.create_data_stream(data_stream, params=request_params) + await es.indices.create_data_stream(name=data_stream, params=request_params) return { "weight": len(data_streams), "unit": "ops", @@ -1258,11 +1262,11 @@ async def __call__(self, es, params): for data_stream in data_streams: if not only_if_exists: - await es.indices.delete_data_stream(data_stream, ignore=[404], params=request_params) + await es.indices.delete_data_stream(name=data_stream, ignore=[404], params=request_params) ops += 1 elif only_if_exists and await es.indices.exists(index=data_stream): self.logger.info("Data stream [%s] already exists. Deleting it.", data_stream) - await es.indices.delete_data_stream(data_stream, params=request_params) + await es.indices.delete_data_stream(name=data_stream, params=request_params) ops += 1 return { @@ -1307,19 +1311,12 @@ async def __call__(self, es, params): only_if_exists = mandatory(params, "only-if-exists", self) request_params = mandatory(params, "request-params", self) - async def _exists(name): - # pylint: disable=import-outside-toplevel - from elasticsearch.client import _make_path - - # currently not supported by client and hence custom request - return await es.transport.perform_request("HEAD", _make_path("_component_template", name)) - ops_count = 0 for template_name in template_names: if not only_if_exists: await es.cluster.delete_component_template(name=template_name, params=request_params, ignore=[404]) ops_count += 1 - elif only_if_exists and await _exists(template_name): + elif only_if_exists and await es.cluster.exists_component_template(name=template_name): self.logger.info("Component Index template [%s] already exists. Deleting it.", template_name) await es.cluster.delete_component_template(name=template_name, params=request_params) ops_count += 1 @@ -1369,7 +1366,7 @@ async def __call__(self, es, params): if not only_if_exists: await es.indices.delete_index_template(name=template_name, params=request_params, ignore=[404]) ops_count += 1 - elif only_if_exists and await es.indices.exists_index_template(template_name): + elif only_if_exists and await es.indices.exists_index_template(name=template_name): self.logger.info("Composable Index template [%s] already exists. Deleting it.", template_name) await es.indices.delete_index_template(name=template_name, params=request_params) ops_count += 1 @@ -1424,7 +1421,7 @@ async def __call__(self, es, params): if not only_if_exists: await es.indices.delete_template(name=template_name, params=request_params) ops_count += 1 - elif only_if_exists and await es.indices.exists_template(template_name): + elif only_if_exists and await es.indices.exists_template(name=template_name): self.logger.info("Index template [%s] already exists. Deleting it.", template_name) await es.indices.delete_template(name=template_name, params=request_params) ops_count += 1 @@ -1465,7 +1462,7 @@ async def _wait_for(self, es, idx, description): async def __call__(self, es, params): source_index = mandatory(params, "source-index", self) - source_indices_get = await es.indices.get(source_index) + source_indices_get = await es.indices.get(index=source_index) source_indices = list(source_indices_get.keys()) source_indices_stem = commonprefix(source_indices) @@ -1537,17 +1534,14 @@ async def __call__(self, es, params): datafeed_id = mandatory(params, "datafeed-id", self) body = mandatory(params, "body", self) try: - await es.xpack.ml.put_datafeed(datafeed_id=datafeed_id, body=body) - except elasticsearch.TransportError as e: + await es.ml.put_datafeed(datafeed_id=datafeed_id, body=body) + except elasticsearch.BadRequestError: # fallback to old path - if e.status_code == 400: - await es.transport.perform_request( - "PUT", - f"/_xpack/ml/datafeeds/{datafeed_id}", - body=body, - ) - else: - raise e + await es.perform_request( + method="PUT", + path=f"/_xpack/ml/datafeeds/{datafeed_id}", + body=body, + ) def __repr__(self, *args, **kwargs): return "create-ml-datafeed" @@ -1566,17 +1560,13 @@ async def __call__(self, es, params): force = params.get("force", False) try: # we don't want to fail if a datafeed does not exist, thus we ignore 404s. - await es.xpack.ml.delete_datafeed(datafeed_id=datafeed_id, force=force, ignore=[404]) - except elasticsearch.TransportError as e: - # fallback to old path (ES < 7) - if e.status_code == 400: - await es.transport.perform_request( - "DELETE", - f"/_xpack/ml/datafeeds/{datafeed_id}", - params={"force": escape(force), "ignore": 404}, - ) - else: - raise e + await es.ml.delete_datafeed(datafeed_id=datafeed_id, force=force, ignore=[404]) + except elasticsearch.BadRequestError: + await es.perform_request( + method="DELETE", + path=f"/_xpack/ml/datafeeds/{datafeed_id}", + params={"force": escape(force), "ignore": 404}, + ) def __repr__(self, *args, **kwargs): return "delete-ml-datafeed" @@ -1597,17 +1587,13 @@ async def __call__(self, es, params): end = params.get("end") timeout = params.get("timeout") try: - await es.xpack.ml.start_datafeed(datafeed_id=datafeed_id, body=body, start=start, end=end, timeout=timeout) - except elasticsearch.TransportError as e: - # fallback to old path (ES < 7) - if e.status_code == 400: - await es.transport.perform_request( - "POST", - f"/_xpack/ml/datafeeds/{datafeed_id}/_start", - body=body, - ) - else: - raise e + await es.ml.start_datafeed(datafeed_id=datafeed_id, body=body, start=start, end=end, timeout=timeout) + except elasticsearch.BadRequestError: + await es.perform_request( + method="POST", + path=f"/_xpack/ml/datafeeds/{datafeed_id}/_start", + body=body, + ) def __repr__(self, *args, **kwargs): return "start-ml-datafeed" @@ -1626,18 +1612,15 @@ async def __call__(self, es, params): force = params.get("force", False) timeout = params.get("timeout") try: - await es.xpack.ml.stop_datafeed(datafeed_id=datafeed_id, force=force, timeout=timeout) - except elasticsearch.TransportError as e: + await es.ml.stop_datafeed(datafeed_id=datafeed_id, force=force, timeout=timeout) + except elasticsearch.BadRequestError: # fallback to old path (ES < 7) - if e.status_code == 400: - request_params = { - "force": escape(force), - } - if timeout: - request_params["timeout"] = escape(timeout) - await es.transport.perform_request("POST", f"/_xpack/ml/datafeeds/{datafeed_id}/_stop", params=request_params) - else: - raise e + request_params = { + "force": escape(force), + } + if timeout: + request_params["timeout"] = escape(timeout) + await es.perform_request(method="POST", path=f"/_xpack/ml/datafeeds/{datafeed_id}/_stop", params=request_params) def __repr__(self, *args, **kwargs): return "stop-ml-datafeed" @@ -1655,17 +1638,14 @@ async def __call__(self, es, params): job_id = mandatory(params, "job-id", self) body = mandatory(params, "body", self) try: - await es.xpack.ml.put_job(job_id=job_id, body=body) - except elasticsearch.TransportError as e: + await es.ml.put_job(job_id=job_id, body=body) + except elasticsearch.BadRequestError: # fallback to old path (ES < 7) - if e.status_code == 400: - await es.transport.perform_request( - "PUT", - f"/_xpack/ml/anomaly_detectors/{job_id}", - body=body, - ) - else: - raise e + await es.perform_request( + method="PUT", + path=f"/_xpack/ml/anomaly_detectors/{job_id}", + body=body, + ) def __repr__(self, *args, **kwargs): return "create-ml-job" @@ -1684,17 +1664,14 @@ async def __call__(self, es, params): force = params.get("force", False) # we don't want to fail if a job does not exist, thus we ignore 404s. try: - await es.xpack.ml.delete_job(job_id=job_id, force=force, ignore=[404]) - except elasticsearch.TransportError as e: + await es.ml.delete_job(job_id=job_id, force=force, ignore=[404]) + except elasticsearch.BadRequestError: # fallback to old path (ES < 7) - if e.status_code == 400: - await es.transport.perform_request( - "DELETE", - f"/_xpack/ml/anomaly_detectors/{job_id}", - params={"force": escape(force), "ignore": 404}, - ) - else: - raise e + await es.perform_request( + method="DELETE", + path=f"/_xpack/ml/anomaly_detectors/{job_id}", + params={"force": escape(force), "ignore": 404}, + ) def __repr__(self, *args, **kwargs): return "delete-ml-job" @@ -1711,16 +1688,13 @@ async def __call__(self, es, params): job_id = mandatory(params, "job-id", self) try: - await es.xpack.ml.open_job(job_id=job_id) - except elasticsearch.TransportError as e: + await es.ml.open_job(job_id=job_id) + except elasticsearch.BadRequestError: # fallback to old path (ES < 7) - if e.status_code == 400: - await es.transport.perform_request( - "POST", - f"/_xpack/ml/anomaly_detectors/{job_id}/_open", - ) - else: - raise e + await es.perform_request( + method="POST", + path=f"/_xpack/ml/anomaly_detectors/{job_id}/_open", + ) def __repr__(self, *args, **kwargs): return "open-ml-job" @@ -1739,23 +1713,20 @@ async def __call__(self, es, params): force = params.get("force", False) timeout = params.get("timeout") try: - await es.xpack.ml.close_job(job_id=job_id, force=force, timeout=timeout) - except elasticsearch.TransportError as e: + await es.ml.close_job(job_id=job_id, force=force, timeout=timeout) + except elasticsearch.BadRequestError: # fallback to old path (ES < 7) - if e.status_code == 400: - request_params = { - "force": escape(force), - } - if timeout: - request_params["timeout"] = escape(timeout) + request_params = { + "force": escape(force), + } + if timeout: + request_params["timeout"] = escape(timeout) - await es.transport.perform_request( - "POST", - f"/_xpack/ml/anomaly_detectors/{job_id}/_close", - params=request_params, - ) - else: - raise e + await es.perform_request( + method="POST", + path=f"/_xpack/ml/anomaly_detectors/{job_id}/_close", + params=request_params, + ) def __repr__(self, *args, **kwargs): return "close-ml-job" @@ -1774,8 +1745,8 @@ async def __call__(self, es, params): # counter-intuitive, but preserves prior behavior headers = None - await es.transport.perform_request( - method=params.get("method", "GET"), url=path, headers=headers, body=params.get("body"), params=request_params + await es.perform_request( + method=params.get("method", "GET"), path=path, headers=headers, body=params.get("body"), params=request_params ) def __repr__(self, *args, **kwargs): @@ -2466,7 +2437,7 @@ async def __call__(self, es, params): es.return_raw_response() - r = await es.transport.perform_request("POST", "/_sql", body=body) + r = await es.perform_request(method="POST", path="/_sql", body=body) pages -= 1 weight = 1 @@ -2478,7 +2449,7 @@ async def __call__(self, es, params): "Result set has been exhausted before all pages have been fetched, {} page(s) remaining.".format(pages) ) - r = await es.transport.perform_request("POST", "/_sql", body={"cursor": cursor}) + r = await es.perform_request(method="POST", path="/_sql", body={"cursor": cursor}) pages -= 1 weight += 1 From 2df742c8080d46f31775393fa19ae715b6e284e0 Mon Sep 17 00:00:00 2001 From: Mike Baamonde Date: Fri, 3 Jun 2022 14:49:36 -0400 Subject: [PATCH 16/68] Update driver unit tests. --- tests/driver/driver_test.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/tests/driver/driver_test.py b/tests/driver/driver_test.py index 75c63f358..7df30c123 100644 --- a/tests/driver/driver_test.py +++ b/tests/driver/driver_test.py @@ -23,6 +23,7 @@ import unittest.mock as mock from datetime import datetime +import elastic_transport import elasticsearch import pytest @@ -1528,7 +1529,7 @@ async def perform_request(*args, **kwargs): es.init_request_context.return_value = {"request_start": 0, "request_end": 10} # as this method is called several times we need to return a fresh instance every time as the previous # one has been "consumed". - es.transport.perform_request.side_effect = perform_request + es.perform_request.side_effect = perform_request params.register_param_source_for_name("driver-test-param-source", DriverTestParamSource) test_track = track.Track(name="unittest", description="unittest track", indices=None, challenges=None) @@ -1743,30 +1744,35 @@ async def test_execute_single_dict(self): async def test_execute_single_with_connection_error_always_aborts(self, on_error): es = None params = None - # ES client uses pseudo-status "N/A" in this case... - runner = mock.AsyncMock(side_effect=elasticsearch.ConnectionError("N/A", "no route to host", None)) + runner = mock.AsyncMock(side_effect=elasticsearch.ConnectionError(message="Connection error")) with pytest.raises(exceptions.RallyAssertionError) as exc: await driver.execute_single(self.context_managed(runner), es, params, on_error=on_error) - assert exc.value.args[0] == "Request returned an error. Error type: transport, Description: no route to host" + assert exc.value.args[0] == "Request returned an error. Error type: transport, Description: Connection error" @pytest.mark.asyncio async def test_execute_single_with_http_400_aborts_when_specified(self): es = None params = None - runner = mock.AsyncMock(side_effect=elasticsearch.NotFoundError(404, "not found", "the requested document could not be found")) + error_meta = elastic_transport.ApiResponseMeta(status=404, http_version="1.1", headers={}, duration=0.0, node=None) + runner = mock.AsyncMock( + side_effect=elasticsearch.NotFoundError(message="not found", meta=error_meta, body="the requested document could not be found") + ) with pytest.raises(exceptions.RallyAssertionError) as exc: await driver.execute_single(self.context_managed(runner), es, params, on_error="abort") assert exc.value.args[0] == ( - "Request returned an error. Error type: transport, Description: not found (the requested document could not be found)" + "Request returned an error. Error type: api, Description: not found (the requested document could not be found)" ) @pytest.mark.asyncio async def test_execute_single_with_http_400(self): es = None params = None - runner = mock.AsyncMock(side_effect=elasticsearch.NotFoundError(404, "not found", "the requested document could not be found")) + error_meta = elastic_transport.ApiResponseMeta(status=404, http_version="1.1", headers={}, duration=0.0, node=None) + runner = mock.AsyncMock( + side_effect=elasticsearch.NotFoundError(message="not found", meta=error_meta, body="the requested document could not be found") + ) ops, unit, request_meta_data = await driver.execute_single(self.context_managed(runner), es, params, on_error="continue") @@ -1774,7 +1780,7 @@ async def test_execute_single_with_http_400(self): assert unit == "ops" assert request_meta_data == { "http-status": 404, - "error-type": "transport", + "error-type": "api", "error-description": "not found (the requested document could not be found)", "success": False, } @@ -1783,7 +1789,8 @@ async def test_execute_single_with_http_400(self): async def test_execute_single_with_http_413(self): es = None params = None - runner = mock.AsyncMock(side_effect=elasticsearch.NotFoundError(413, b"", b"")) + error_meta = elastic_transport.ApiResponseMeta(status=413, http_version="1.1", headers={}, duration=0.0, node=None) + runner = mock.AsyncMock(side_effect=elasticsearch.NotFoundError(message="", meta=error_meta, body="")) ops, unit, request_meta_data = await driver.execute_single(self.context_managed(runner), es, params, on_error="continue") @@ -1791,7 +1798,7 @@ async def test_execute_single_with_http_413(self): assert unit == "ops" assert request_meta_data == { "http-status": 413, - "error-type": "transport", + "error-type": "api", "error-description": "", "success": False, } From 17d03e7b667b56c7f2977779ad21db88acfe3c37 Mon Sep 17 00:00:00 2001 From: Mike Baamonde Date: Fri, 3 Jun 2022 14:49:47 -0400 Subject: [PATCH 17/68] Update runner unit tests. --- tests/driver/runner_test.py | 325 ++++++++++++++++++------------------ 1 file changed, 165 insertions(+), 160 deletions(-) diff --git a/tests/driver/runner_test.py b/tests/driver/runner_test.py index 1d1b43715..2fe8f20ed 100644 --- a/tests/driver/runner_test.py +++ b/tests/driver/runner_test.py @@ -24,6 +24,7 @@ import random import unittest.mock as mock +import elastic_transport import elasticsearch import pytest @@ -1231,7 +1232,7 @@ async def test_force_merge_with_polling_no_timeout(self, es): @mock.patch("elasticsearch.Elasticsearch") @pytest.mark.asyncio async def test_force_merge_with_polling(self, es): - es.indices.forcemerge = mock.AsyncMock(side_effect=elasticsearch.ConnectionTimeout()) + es.indices.forcemerge = mock.AsyncMock(side_effect=elasticsearch.ConnectionTimeout(message="connection timeout")) es.tasks.list = mock.AsyncMock( side_effect=[ { @@ -1278,7 +1279,7 @@ async def test_force_merge_with_polling(self, es): @mock.patch("elasticsearch.Elasticsearch") @pytest.mark.asyncio async def test_force_merge_with_polling_and_params(self, es): - es.indices.forcemerge = mock.AsyncMock(return_value=elasticsearch.ConnectionTimeout()) + es.indices.forcemerge = mock.AsyncMock(return_value=elasticsearch.ConnectionTimeout("connection timeout")) es.tasks.list = mock.AsyncMock( side_effect=[ { @@ -1496,7 +1497,7 @@ async def test_query_match_only_request_body_defined(self, es): ], }, } - es.transport.perform_request = mock.AsyncMock(return_value=io.StringIO(json.dumps(search_response))) + es.perform_request = mock.AsyncMock(return_value=io.StringIO(json.dumps(search_response))) query_runner = runner.Query() @@ -1525,8 +1526,8 @@ async def test_query_match_only_request_body_defined(self, es): "took": 5, } - es.transport.perform_request.assert_awaited_once_with( - "GET", "/_all/_search", params={"request_cache": "true"}, body=params["body"], headers=None + es.perform_request.assert_awaited_once_with( + method="GET", path="/_all/_search", params={"request_cache": "true"}, body=params["body"], headers=None ) es.clear_scroll.assert_not_called() @@ -1544,7 +1545,7 @@ async def test_query_with_timeout_and_headers(self, es): ], }, } - es.transport.perform_request = mock.AsyncMock(return_value=io.StringIO(json.dumps(search_response))) + es.perform_request = mock.AsyncMock(return_value=io.StringIO(json.dumps(search_response))) query_runner = runner.Query() @@ -1572,10 +1573,11 @@ async def test_query_with_timeout_and_headers(self, es): "took": 5, } - es.transport.perform_request.assert_awaited_once_with( - "GET", - "/_all/_search", - params={"request_timeout": 3.0, "request_cache": "true"}, + es.perform_request.assert_awaited_once_with( + method="GET", + path="/_all/_search", + # params={"request_timeout": 3.0, "request_cache": "true"}, + params={"request_cache": "true"}, body=params["body"], headers={"header1": "value1", "x-opaque-id": "test-id1"}, ) @@ -1598,7 +1600,7 @@ async def test_query_match_using_request_params(self, es): ], }, } - es.transport.perform_request = mock.AsyncMock(return_value=io.StringIO(json.dumps(response))) + es.perform_request = mock.AsyncMock(return_value=io.StringIO(json.dumps(response))) query_runner = runner.Query() params = { @@ -1625,9 +1627,9 @@ async def test_query_match_using_request_params(self, es): "took": 62, } - es.transport.perform_request.assert_awaited_once_with( - "GET", - "/_all/_search", + es.perform_request.assert_awaited_once_with( + method="GET", + path="/_all/_search", params={ "request_cache": "false", "q": "user:kimchy", @@ -1654,7 +1656,7 @@ async def test_query_no_detailed_results(self, es): ], }, } - es.transport.perform_request = mock.AsyncMock(return_value=io.StringIO(json.dumps(response))) + es.perform_request = mock.AsyncMock(return_value=io.StringIO(json.dumps(response))) query_runner = runner.Query() params = { @@ -1679,9 +1681,9 @@ async def test_query_no_detailed_results(self, es): assert "took" not in result assert "error-type" not in result - es.transport.perform_request.assert_awaited_once_with( - "GET", - "/_all/_search", + es.perform_request.assert_awaited_once_with( + method="GET", + path="/_all/_search", params={ "q": "user:kimchy", }, @@ -1704,7 +1706,7 @@ async def test_query_hits_total_as_number(self, es): ], }, } - es.transport.perform_request = mock.AsyncMock(return_value=io.StringIO(json.dumps(search_response))) + es.perform_request = mock.AsyncMock(return_value=io.StringIO(json.dumps(search_response))) query_runner = runner.Query() @@ -1733,9 +1735,9 @@ async def test_query_hits_total_as_number(self, es): "took": 5, } - es.transport.perform_request.assert_awaited_once_with( - "GET", - "/_all/_search", + es.perform_request.assert_awaited_once_with( + method="GET", + path="/_all/_search", params={ "request_cache": "true", }, @@ -1761,7 +1763,7 @@ async def test_query_match_all(self, es): ], }, } - es.transport.perform_request = mock.AsyncMock(return_value=io.StringIO(json.dumps(search_response))) + es.perform_request = mock.AsyncMock(return_value=io.StringIO(json.dumps(search_response))) query_runner = runner.Query() @@ -1790,9 +1792,9 @@ async def test_query_match_all(self, es): "took": 5, } - es.transport.perform_request.assert_awaited_once_with( - "GET", - "/unittest/_search", + es.perform_request.assert_awaited_once_with( + method="GET", + path="/unittest/_search", params={}, body=params["body"], headers={ @@ -1816,7 +1818,7 @@ async def test_query_match_all_doc_type_fallback(self, es): }, } - es.transport.perform_request = mock.AsyncMock(return_value=io.StringIO(json.dumps(search_response))) + es.perform_request = mock.AsyncMock(return_value=io.StringIO(json.dumps(search_response))) query_runner = runner.Query() @@ -1846,9 +1848,9 @@ async def test_query_match_all_doc_type_fallback(self, es): "took": 5, } - es.transport.perform_request.assert_awaited_once_with( - "GET", - "/unittest/type/_search", + es.perform_request.assert_awaited_once_with( + method="GET", + path="/unittest/type/_search", body=params["body"], params={}, headers=None, @@ -1872,7 +1874,7 @@ async def test_scroll_query_only_one_page(self, es): }, } - es.transport.perform_request = mock.AsyncMock(return_value=io.StringIO(json.dumps(search_response))) + es.perform_request = mock.AsyncMock(return_value=io.StringIO(json.dumps(search_response))) es.clear_scroll = mock.AsyncMock(return_value=io.StringIO('{"acknowledged": true}')) query_runner = runner.Query() @@ -1904,9 +1906,9 @@ async def test_scroll_query_only_one_page(self, es): } assert "error-type" not in results - es.transport.perform_request.assert_awaited_once_with( - "GET", - "/unittest/_search", + es.perform_request.assert_awaited_once_with( + method="GET", + path="/unittest/_search", params={ "request_cache": "true", "sort": "_doc", @@ -1935,7 +1937,7 @@ async def test_scroll_query_no_request_cache(self, es): }, } - es.transport.perform_request = mock.AsyncMock(return_value=io.StringIO(json.dumps(search_response))) + es.perform_request = mock.AsyncMock(return_value=io.StringIO(json.dumps(search_response))) es.clear_scroll = mock.AsyncMock(return_value=io.StringIO('{"acknowledged": true}')) query_runner = runner.Query() @@ -1967,9 +1969,9 @@ async def test_scroll_query_no_request_cache(self, es): } assert "error-type" not in results - es.transport.perform_request.assert_awaited_once_with( - "GET", - "/unittest/_search", + es.perform_request.assert_awaited_once_with( + method="GET", + path="/unittest/_search", params={"sort": "_doc", "scroll": "10s", "size": 100}, body=params["body"], headers={"Accept-Encoding": "identity"}, @@ -1993,7 +1995,7 @@ async def test_scroll_query_only_one_page_only_request_body_defined(self, es): }, } - es.transport.perform_request = mock.AsyncMock(return_value=io.StringIO(json.dumps(search_response))) + es.perform_request = mock.AsyncMock(return_value=io.StringIO(json.dumps(search_response))) es.clear_scroll = mock.AsyncMock(return_value=io.StringIO('{"acknowledged": true}')) query_runner = runner.Query() @@ -2024,9 +2026,9 @@ async def test_scroll_query_only_one_page_only_request_body_defined(self, es): } assert "error-type" not in results - es.transport.perform_request.assert_awaited_once_with( - "GET", - "/_all/_search", + es.perform_request.assert_awaited_once_with( + method="GET", + path="/_all/_search", params={ "sort": "_doc", "scroll": "10s", @@ -2071,7 +2073,7 @@ async def test_scroll_query_with_explicit_number_of_pages(self, es): }, } - es.transport.perform_request = mock.AsyncMock( + es.perform_request = mock.AsyncMock( side_effect=[ io.StringIO(json.dumps(search_response)), io.StringIO(json.dumps(scroll_response)), @@ -2127,8 +2129,8 @@ async def test_scroll_query_cannot_clear_scroll(self, es): }, } - es.transport.perform_request = mock.AsyncMock(return_value=io.StringIO(json.dumps(search_response))) - es.clear_scroll = mock.AsyncMock(side_effect=elasticsearch.ConnectionTimeout()) + es.perform_request = mock.AsyncMock(return_value=io.StringIO(json.dumps(search_response))) + es.clear_scroll = mock.AsyncMock(side_effect=elasticsearch.ConnectionTimeout(message="connection timeout")) query_runner = runner.Query() @@ -2190,7 +2192,7 @@ async def test_scroll_query_request_all_pages(self, es): }, } - es.transport.perform_request = mock.AsyncMock( + es.perform_request = mock.AsyncMock( side_effect=[ io.StringIO(json.dumps(search_response)), io.StringIO(json.dumps(scroll_response)), @@ -2246,7 +2248,7 @@ async def test_query_runner_search_with_pages_logs_warning_and_executes(self, es }, } - es.transport.perform_request = mock.AsyncMock(return_value=io.StringIO(json.dumps(search_response))) + es.perform_request = mock.AsyncMock(return_value=io.StringIO(json.dumps(search_response))) es.clear_scroll = mock.AsyncMock(return_value=io.StringIO('{"acknowledged": true}')) query_runner = runner.Query() @@ -2623,8 +2625,8 @@ async def test_creates_multiple_data_streams(self, es): es.indices.create_data_stream.assert_has_awaits( [ - mock.call("data-stream-A", params=request_params), - mock.call("data-stream-B", params=request_params), + mock.call(name="data-stream-A", params=request_params), + mock.call(name="data-stream-B", params=request_params), ] ) @@ -2730,7 +2732,7 @@ async def test_deletes_existing_data_streams(self, es): "success": True, } - es.indices.delete_data_stream.assert_awaited_once_with("data-stream-B", params={}) + es.indices.delete_data_stream.assert_awaited_once_with(name="data-stream-B", params={}) @mock.patch("elasticsearch.Elasticsearch") @pytest.mark.asyncio @@ -2756,8 +2758,8 @@ async def test_deletes_all_data_streams(self, es): es.indices.delete_data_stream.assert_has_awaits( [ - mock.call("data-stream-A", ignore=[404], params=params["request-params"]), - mock.call("data-stream-B", ignore=[404], params=params["request-params"]), + mock.call(name="data-stream-A", ignore=[404], params=params["request-params"]), + mock.call(name="data-stream-B", ignore=[404], params=params["request-params"]), ] ) assert es.indices.exists.await_count == 0 @@ -2976,11 +2978,7 @@ async def test_deletes_all_index_templates(self, es): @mock.patch("elasticsearch.Elasticsearch") @pytest.mark.asyncio async def test_deletes_only_existing_index_templates(self, es): - async def _side_effect(http_method, path): - if http_method == "HEAD": - return path == "/_component_template/templateB" - - es.transport.perform_request = mock.AsyncMock(side_effect=_side_effect) + es.cluster.exists_component_template = mock.AsyncMock(side_effect=[False, True]) es.cluster.delete_component_template = mock.AsyncMock() r = runner.DeleteComponentTemplate() @@ -3187,20 +3185,21 @@ class TestCreateMlDatafeed: @mock.patch("elasticsearch.Elasticsearch") @pytest.mark.asyncio async def test_create_ml_datafeed(self, es): - es.xpack.ml.put_datafeed = mock.AsyncMock() + es.ml.put_datafeed = mock.AsyncMock() params = {"datafeed-id": "some-data-feed", "body": {"job_id": "total-requests", "indices": ["server-metrics"]}} r = runner.CreateMlDatafeed() await r(es, params) - es.xpack.ml.put_datafeed.assert_awaited_once_with(datafeed_id=params["datafeed-id"], body=params["body"]) + es.ml.put_datafeed.assert_awaited_once_with(datafeed_id=params["datafeed-id"], body=params["body"]) @mock.patch("elasticsearch.Elasticsearch") @pytest.mark.asyncio async def test_create_ml_datafeed_fallback(self, es): - es.xpack.ml.put_datafeed = mock.AsyncMock(side_effect=elasticsearch.TransportError(400, "Bad Request")) - es.transport.perform_request = mock.AsyncMock() + error_meta = elastic_transport.ApiResponseMeta(status=400, http_version="1.1", headers=None, duration=0, node=None) + es.ml.put_datafeed = mock.AsyncMock(side_effect=elasticsearch.BadRequestError(message=400, meta=error_meta, body="Bad Request")) + es.perform_request = mock.AsyncMock() datafeed_id = "some-data-feed" body = {"job_id": "total-requests", "indices": ["server-metrics"]} params = {"datafeed-id": datafeed_id, "body": body} @@ -3208,14 +3207,14 @@ async def test_create_ml_datafeed_fallback(self, es): r = runner.CreateMlDatafeed() await r(es, params) - es.transport.perform_request.assert_awaited_once_with("PUT", f"/_xpack/ml/datafeeds/{datafeed_id}", body=body) + es.perform_request.assert_awaited_once_with(method="PUT", path=f"/_xpack/ml/datafeeds/{datafeed_id}", body=body) class TestDeleteMlDatafeed: @mock.patch("elasticsearch.Elasticsearch") @pytest.mark.asyncio async def test_delete_ml_datafeed(self, es): - es.xpack.ml.delete_datafeed = mock.AsyncMock() + es.ml.delete_datafeed = mock.AsyncMock() datafeed_id = "some-data-feed" params = {"datafeed-id": datafeed_id} @@ -3223,13 +3222,15 @@ async def test_delete_ml_datafeed(self, es): r = runner.DeleteMlDatafeed() await r(es, params) - es.xpack.ml.delete_datafeed.assert_awaited_once_with(datafeed_id=datafeed_id, force=False, ignore=[404]) + es.ml.delete_datafeed.assert_awaited_once_with(datafeed_id=datafeed_id, force=False, ignore=[404]) @mock.patch("elasticsearch.Elasticsearch") @pytest.mark.asyncio async def test_delete_ml_datafeed_fallback(self, es): - es.xpack.ml.delete_datafeed = mock.AsyncMock(side_effect=elasticsearch.TransportError(400, "Bad Request")) - es.transport.perform_request = mock.AsyncMock() + error_meta = elastic_transport.ApiResponseMeta(status=400, http_version="1.1", headers=None, duration=0, node=None) + es.ml.delete_datafeed = mock.AsyncMock(side_effect=elasticsearch.BadRequestError(message=400, meta=error_meta, body="Bad Request")) + + es.perform_request = mock.AsyncMock() datafeed_id = "some-data-feed" params = { "datafeed-id": datafeed_id, @@ -3238,8 +3239,8 @@ async def test_delete_ml_datafeed_fallback(self, es): r = runner.DeleteMlDatafeed() await r(es, params) - es.transport.perform_request.assert_awaited_once_with( - "DELETE", f"/_xpack/ml/datafeeds/{datafeed_id}", params={"force": "false", "ignore": 404} + es.perform_request.assert_awaited_once_with( + method="DELETE", path=f"/_xpack/ml/datafeeds/{datafeed_id}", params={"force": "false", "ignore": 404} ) @@ -3247,33 +3248,34 @@ class TestStartMlDatafeed: @mock.patch("elasticsearch.Elasticsearch") @pytest.mark.asyncio async def test_start_ml_datafeed_with_body(self, es): - es.xpack.ml.start_datafeed = mock.AsyncMock() + es.ml.start_datafeed = mock.AsyncMock() params = {"datafeed-id": "some-data-feed", "body": {"end": "now"}} r = runner.StartMlDatafeed() await r(es, params) - es.xpack.ml.start_datafeed.assert_awaited_once_with( + es.ml.start_datafeed.assert_awaited_once_with( datafeed_id=params["datafeed-id"], body=params["body"], start=None, end=None, timeout=None ) @mock.patch("elasticsearch.Elasticsearch") @pytest.mark.asyncio async def test_start_ml_datafeed_with_body_fallback(self, es): - es.xpack.ml.start_datafeed = mock.AsyncMock(side_effect=elasticsearch.TransportError(400, "Bad Request")) - es.transport.perform_request = mock.AsyncMock() + error_meta = elastic_transport.ApiResponseMeta(status=400, http_version="1.1", headers=None, duration=0, node=None) + es.ml.start_datafeed = mock.AsyncMock(side_effect=elasticsearch.BadRequestError(message=400, meta=error_meta, body="Bad Request")) + es.perform_request = mock.AsyncMock() body = {"end": "now"} params = {"datafeed-id": "some-data-feed", "body": body} r = runner.StartMlDatafeed() await r(es, params) - es.transport.perform_request.assert_awaited_once_with("POST", f"/_xpack/ml/datafeeds/{params['datafeed-id']}/_start", body=body) + es.perform_request.assert_awaited_once_with(method="POST", path=f"/_xpack/ml/datafeeds/{params['datafeed-id']}/_start", body=body) @mock.patch("elasticsearch.Elasticsearch") @pytest.mark.asyncio async def test_start_ml_datafeed_with_params(self, es): - es.xpack.ml.start_datafeed = mock.AsyncMock() + es.ml.start_datafeed = mock.AsyncMock() params = { "datafeed-id": "some-data-feed", "start": "2017-01-01T01:00:00Z", @@ -3284,7 +3286,7 @@ async def test_start_ml_datafeed_with_params(self, es): r = runner.StartMlDatafeed() await r(es, params) - es.xpack.ml.start_datafeed.assert_awaited_once_with( + es.ml.start_datafeed.assert_awaited_once_with( datafeed_id=params["datafeed-id"], body=None, start=params["start"], end=params["end"], timeout=params["timeout"] ) @@ -3293,7 +3295,7 @@ class TestStopMlDatafeed: @mock.patch("elasticsearch.Elasticsearch") @pytest.mark.asyncio async def test_stop_ml_datafeed(self, es): - es.xpack.ml.stop_datafeed = mock.AsyncMock() + es.ml.stop_datafeed = mock.AsyncMock() params = { "datafeed-id": "some-data-feed", "force": random.choice([False, True]), @@ -3303,15 +3305,14 @@ async def test_stop_ml_datafeed(self, es): r = runner.StopMlDatafeed() await r(es, params) - es.xpack.ml.stop_datafeed.assert_awaited_once_with( - datafeed_id=params["datafeed-id"], force=params["force"], timeout=params["timeout"] - ) + es.ml.stop_datafeed.assert_awaited_once_with(datafeed_id=params["datafeed-id"], force=params["force"], timeout=params["timeout"]) @mock.patch("elasticsearch.Elasticsearch") @pytest.mark.asyncio async def test_stop_ml_datafeed_fallback(self, es): - es.xpack.ml.stop_datafeed = mock.AsyncMock(side_effect=elasticsearch.TransportError(400, "Bad Request")) - es.transport.perform_request = mock.AsyncMock() + error_meta = elastic_transport.ApiResponseMeta(status=400, http_version="1.1", headers=None, duration=0, node=None) + es.ml.stop_datafeed = mock.AsyncMock(side_effect=elasticsearch.BadRequestError(message=400, meta=error_meta, body="Bad Request")) + es.perform_request = mock.AsyncMock() params = { "datafeed-id": "some-data-feed", @@ -3322,9 +3323,9 @@ async def test_stop_ml_datafeed_fallback(self, es): r = runner.StopMlDatafeed() await r(es, params) - es.transport.perform_request.assert_awaited_once_with( - "POST", - f"/_xpack/ml/datafeeds/{params['datafeed-id']}/_stop", + es.perform_request.assert_awaited_once_with( + method="POST", + path=f"/_xpack/ml/datafeeds/{params['datafeed-id']}/_stop", params={"force": str(params["force"]).lower(), "timeout": params["timeout"]}, ) @@ -3333,7 +3334,7 @@ class TestCreateMlJob: @mock.patch("elasticsearch.Elasticsearch") @pytest.mark.asyncio async def test_create_ml_job(self, es): - es.xpack.ml.put_job = mock.AsyncMock() + es.ml.put_job = mock.AsyncMock() params = { "job-id": "an-ml-job", @@ -3356,13 +3357,14 @@ async def test_create_ml_job(self, es): r = runner.CreateMlJob() await r(es, params) - es.xpack.ml.put_job.assert_awaited_once_with(job_id=params["job-id"], body=params["body"]) + es.ml.put_job.assert_awaited_once_with(job_id=params["job-id"], body=params["body"]) @mock.patch("elasticsearch.Elasticsearch") @pytest.mark.asyncio async def test_create_ml_job_fallback(self, es): - es.xpack.ml.put_job = mock.AsyncMock(side_effect=elasticsearch.TransportError(400, "Bad Request")) - es.transport.perform_request = mock.AsyncMock() + error_meta = elastic_transport.ApiResponseMeta(status=400, http_version="1.1", headers=None, duration=0, node=None) + es.ml.put_job = mock.AsyncMock(side_effect=elasticsearch.BadRequestError(message=400, meta=error_meta, body="Bad Request")) + es.perform_request = mock.AsyncMock() body = { "description": "Total sum of requests", @@ -3383,14 +3385,14 @@ async def test_create_ml_job_fallback(self, es): r = runner.CreateMlJob() await r(es, params) - es.transport.perform_request.assert_awaited_once_with("PUT", f"/_xpack/ml/anomaly_detectors/{params['job-id']}", body=body) + es.perform_request.assert_awaited_once_with(method="PUT", path=f"/_xpack/ml/anomaly_detectors/{params['job-id']}", body=body) class TestDeleteMlJob: @mock.patch("elasticsearch.Elasticsearch") @pytest.mark.asyncio async def test_delete_ml_job(self, es): - es.xpack.ml.delete_job = mock.AsyncMock() + es.ml.delete_job = mock.AsyncMock() job_id = "an-ml-job" params = {"job-id": job_id} @@ -3398,13 +3400,14 @@ async def test_delete_ml_job(self, es): r = runner.DeleteMlJob() await r(es, params) - es.xpack.ml.delete_job.assert_awaited_once_with(job_id=job_id, force=False, ignore=[404]) + es.ml.delete_job.assert_awaited_once_with(job_id=job_id, force=False, ignore=[404]) @mock.patch("elasticsearch.Elasticsearch") @pytest.mark.asyncio async def test_delete_ml_job_fallback(self, es): - es.xpack.ml.delete_job = mock.AsyncMock(side_effect=elasticsearch.TransportError(400, "Bad Request")) - es.transport.perform_request = mock.AsyncMock() + error_meta = elastic_transport.ApiResponseMeta(status=400, http_version="1.1", headers=None, duration=0, node=None) + es.ml.delete_job = mock.AsyncMock(side_effect=elasticsearch.BadRequestError(message=400, meta=error_meta, body="Bad Request")) + es.perform_request = mock.AsyncMock() job_id = "an-ml-job" params = {"job-id": job_id} @@ -3412,8 +3415,8 @@ async def test_delete_ml_job_fallback(self, es): r = runner.DeleteMlJob() await r(es, params) - es.transport.perform_request.assert_awaited_once_with( - "DELETE", f"/_xpack/ml/anomaly_detectors/{params['job-id']}", params={"force": "false", "ignore": 404} + es.perform_request.assert_awaited_once_with( + method="DELETE", path=f"/_xpack/ml/anomaly_detectors/{params['job-id']}", params={"force": "false", "ignore": 404} ) @@ -3421,7 +3424,7 @@ class TestOpenMlJob: @mock.patch("elasticsearch.Elasticsearch") @pytest.mark.asyncio async def test_open_ml_job(self, es): - es.xpack.ml.open_job = mock.AsyncMock() + es.ml.open_job = mock.AsyncMock() job_id = "an-ml-job" params = {"job-id": job_id} @@ -3429,13 +3432,14 @@ async def test_open_ml_job(self, es): r = runner.OpenMlJob() await r(es, params) - es.xpack.ml.open_job.assert_awaited_once_with(job_id=job_id) + es.ml.open_job.assert_awaited_once_with(job_id=job_id) @mock.patch("elasticsearch.Elasticsearch") @pytest.mark.asyncio async def test_open_ml_job_fallback(self, es): - es.xpack.ml.open_job = mock.AsyncMock(side_effect=elasticsearch.TransportError(400, "Bad Request")) - es.transport.perform_request = mock.AsyncMock() + error_meta = elastic_transport.ApiResponseMeta(status=400, http_version="1.1", headers=None, duration=0, node=None) + es.ml.open_job = mock.AsyncMock(side_effect=elasticsearch.BadRequestError(message=400, meta=error_meta, body="Bad Request")) + es.perform_request = mock.AsyncMock() job_id = "an-ml-job" params = {"job-id": job_id} @@ -3443,14 +3447,14 @@ async def test_open_ml_job_fallback(self, es): r = runner.OpenMlJob() await r(es, params) - es.transport.perform_request.assert_awaited_once_with("POST", f"/_xpack/ml/anomaly_detectors/{params['job-id']}/_open") + es.perform_request.assert_awaited_once_with(method="POST", path=f"/_xpack/ml/anomaly_detectors/{params['job-id']}/_open") class TestCloseMlJob: @mock.patch("elasticsearch.Elasticsearch") @pytest.mark.asyncio async def test_close_ml_job(self, es): - es.xpack.ml.close_job = mock.AsyncMock() + es.ml.close_job = mock.AsyncMock() params = { "job-id": "an-ml-job", "force": random.choice([False, True]), @@ -3460,13 +3464,14 @@ async def test_close_ml_job(self, es): r = runner.CloseMlJob() await r(es, params) - es.xpack.ml.close_job.assert_awaited_once_with(job_id=params["job-id"], force=params["force"], timeout=params["timeout"]) + es.ml.close_job.assert_awaited_once_with(job_id=params["job-id"], force=params["force"], timeout=params["timeout"]) @mock.patch("elasticsearch.Elasticsearch") @pytest.mark.asyncio async def test_close_ml_job_fallback(self, es): - es.xpack.ml.close_job = mock.AsyncMock(side_effect=elasticsearch.TransportError(400, "Bad Request")) - es.transport.perform_request = mock.AsyncMock() + error_meta = elastic_transport.ApiResponseMeta(status=400, http_version="1.1", headers=None, duration=0, node=None) + es.ml.close_job = mock.AsyncMock(side_effect=elasticsearch.BadRequestError(message=400, meta=error_meta, body="Bad Request")) + es.perform_request = mock.AsyncMock() params = { "job-id": "an-ml-job", @@ -3477,9 +3482,9 @@ async def test_close_ml_job_fallback(self, es): r = runner.CloseMlJob() await r(es, params) - es.transport.perform_request.assert_awaited_once_with( - "POST", - f"/_xpack/ml/anomaly_detectors/{params['job-id']}/_close", + es.perform_request.assert_awaited_once_with( + method="POST", + path=f"/_xpack/ml/anomaly_detectors/{params['job-id']}/_close", params={"force": str(params["force"]).lower(), "timeout": params["timeout"]}, ) @@ -3488,7 +3493,7 @@ class TestRawRequestRunner: @mock.patch("elasticsearch.Elasticsearch") @pytest.mark.asyncio async def test_raises_missing_slash(self, es): - es.transport.perform_request = mock.AsyncMock() + es.perform_request = mock.AsyncMock() r = runner.RawRequest() params = {"path": "_cat/count"} @@ -3504,18 +3509,18 @@ async def test_raises_missing_slash(self, es): @mock.patch("elasticsearch.Elasticsearch") @pytest.mark.asyncio async def test_issue_request_with_defaults(self, es): - es.transport.perform_request = mock.AsyncMock() + es.perform_request = mock.AsyncMock() r = runner.RawRequest() params = {"path": "/_cat/count"} await r(es, params) - es.transport.perform_request.assert_called_once_with(method="GET", url="/_cat/count", headers=None, body=None, params={}) + es.perform_request.assert_called_once_with(method="GET", path="/_cat/count", headers=None, body=None, params={}) @mock.patch("elasticsearch.Elasticsearch") @pytest.mark.asyncio async def test_issue_delete_index(self, es): - es.transport.perform_request = mock.AsyncMock() + es.perform_request = mock.AsyncMock() r = runner.RawRequest() params = { @@ -3528,14 +3533,14 @@ async def test_issue_delete_index(self, es): } await r(es, params) - es.transport.perform_request.assert_called_once_with( - method="DELETE", url="/twitter", headers=None, body=None, params={"ignore": [400, 404], "pretty": "true"} + es.perform_request.assert_called_once_with( + method="DELETE", path="/twitter", headers=None, body=None, params={"ignore": [400, 404], "pretty": "true"} ) @mock.patch("elasticsearch.Elasticsearch") @pytest.mark.asyncio async def test_issue_create_index(self, es): - es.transport.perform_request = mock.AsyncMock() + es.perform_request = mock.AsyncMock() r = runner.RawRequest() params = { @@ -3551,14 +3556,14 @@ async def test_issue_create_index(self, es): } await r(es, params) - es.transport.perform_request.assert_called_once_with( - method="POST", url="/twitter", headers=None, body={"settings": {"index": {"number_of_replicas": 0}}}, params={} + es.perform_request.assert_called_once_with( + method="POST", path="/twitter", headers=None, body={"settings": {"index": {"number_of_replicas": 0}}}, params={} ) @mock.patch("elasticsearch.Elasticsearch") @pytest.mark.asyncio async def test_issue_msearch(self, es): - es.transport.perform_request = mock.AsyncMock() + es.perform_request = mock.AsyncMock() r = runner.RawRequest() params = { @@ -3573,9 +3578,9 @@ async def test_issue_msearch(self, es): } await r(es, params) - es.transport.perform_request.assert_called_once_with( + es.perform_request.assert_called_once_with( method="GET", - url="/_msearch", + path="/_msearch", headers={"Content-Type": "application/x-ndjson"}, body=[ {"index": "test"}, @@ -3589,7 +3594,7 @@ async def test_issue_msearch(self, es): @mock.patch("elasticsearch.Elasticsearch") @pytest.mark.asyncio async def test_raw_with_timeout_and_opaqueid(self, es): - es.transport.perform_request = mock.AsyncMock() + es.perform_request = mock.AsyncMock() r = runner.RawRequest() params = { @@ -3606,9 +3611,9 @@ async def test_raw_with_timeout_and_opaqueid(self, es): } await r(es, params) - es.transport.perform_request.assert_called_once_with( + es.perform_request.assert_called_once_with( method="GET", - url="/_msearch", + path="/_msearch", headers={"Content-Type": "application/x-ndjson", "x-opaque-id": "test-id1"}, body=[ {"index": "test"}, @@ -4948,7 +4953,7 @@ class TestSqlRunner: @mock.patch("elasticsearch.Elasticsearch") @pytest.mark.asyncio async def test_fetch_one_page(self, es): - es.transport.perform_request = mock.AsyncMock(return_value=io.StringIO(json.dumps(self.default_response))) + es.perform_request = mock.AsyncMock(return_value=io.StringIO(json.dumps(self.default_response))) sql_runner = runner.Sql() params = { @@ -4966,12 +4971,12 @@ async def test_fetch_one_page(self, es): assert result == {"success": True, "weight": 1, "unit": "ops"} - es.transport.perform_request.assert_awaited_once_with("POST", "/_sql", body=params["body"]) + es.perform_request.assert_awaited_once_with(method="POST", path="/_sql", body=params["body"]) @mock.patch("elasticsearch.Elasticsearch") @pytest.mark.asyncio async def test_fetch_all_pages(self, es): - es.transport.perform_request = mock.AsyncMock(return_value=io.StringIO(json.dumps(self.default_response))) + es.perform_request = mock.AsyncMock(return_value=io.StringIO(json.dumps(self.default_response))) sql_runner = runner.Sql() params = {"operation-type": "sql", "body": {"query": "SELECT first_name FROM emp"}, "pages": 3} @@ -4981,18 +4986,18 @@ async def test_fetch_all_pages(self, es): assert result == {"success": True, "weight": 3, "unit": "ops"} - es.transport.perform_request.assert_has_calls( + es.perform_request.assert_has_calls( [ - mock.call("POST", "/_sql", body=params["body"]), - mock.call("POST", "/_sql", body={"cursor": self.default_response["cursor"]}), - mock.call("POST", "/_sql", body={"cursor": self.default_response["cursor"]}), + mock.call(method="POST", path="/_sql", body=params["body"]), + mock.call(method="POST", path="/_sql", body={"cursor": self.default_response["cursor"]}), + mock.call(method="POST", path="/_sql", body={"cursor": self.default_response["cursor"]}), ] ) @mock.patch("elasticsearch.Elasticsearch") @pytest.mark.asyncio async def test_failure_on_too_few_pages(self, es): - es.transport.perform_request = mock.AsyncMock( + es.perform_request = mock.AsyncMock( side_effect=[io.StringIO(json.dumps(self.default_response)), io.StringIO(json.dumps({"rows": [["John"]]}))] ) @@ -5011,10 +5016,10 @@ async def test_failure_on_too_few_pages(self, es): assert ctx.value.message == "Result set has been exhausted before all pages have been fetched, 1 page(s) remaining." - es.transport.perform_request.assert_has_calls( + es.perform_request.assert_has_calls( [ - mock.call("POST", "/_sql", body=params["body"]), - mock.call("POST", "/_sql", body={"cursor": self.default_response["cursor"]}), + mock.call(method="POST", path="/_sql", body=params["body"]), + mock.call(method="POST", path="/_sql", body={"cursor": self.default_response["cursor"]}), ] ) @@ -5235,7 +5240,7 @@ async def test_search_after_with_pit(self, es): }, } - es.transport.perform_request = mock.AsyncMock( + es.perform_request = mock.AsyncMock( side_effect=[ io.BytesIO(json.dumps(page_1).encode()), io.BytesIO(json.dumps(page_2).encode()), @@ -5250,11 +5255,11 @@ async def test_search_after_with_pit(self, es): # make sure pit_id is updated afterward assert runner.CompositeContext.get(pit_op) == "fedcba9876543211" - es.transport.perform_request.assert_has_awaits( + es.perform_request.assert_has_awaits( [ mock.call( - "GET", - "/_search", + method="GET", + path="/_search", params={}, body={ "query": { @@ -5275,8 +5280,8 @@ async def test_search_after_with_pit(self, es): headers=None, ), mock.call( - "GET", - "/_search", + method="GET", + path="/_search", params={}, body={ "query": { @@ -5345,7 +5350,7 @@ async def test_search_after_without_pit(self, es): }, } - es.transport.perform_request = mock.AsyncMock( + es.perform_request = mock.AsyncMock( side_effect=[ io.BytesIO(json.dumps(page_1).encode()), io.BytesIO(json.dumps(page_2).encode()), @@ -5354,11 +5359,11 @@ async def test_search_after_without_pit(self, es): r = runner.Query() await r(es, params) - es.transport.perform_request.assert_has_awaits( + es.perform_request.assert_has_awaits( [ mock.call( - "GET", - "/test-index-1/_search", + method="GET", + path="/test-index-1/_search", params={}, body={ "query": { @@ -5372,8 +5377,8 @@ async def test_search_after_without_pit(self, es): headers=None, ), mock.call( - "GET", - "/test-index-1/_search", + method="GET", + path="/test-index-1/_search", params={}, body={ "query": { @@ -5543,7 +5548,7 @@ def teardown_method(self, method): @mock.patch("elasticsearch.Elasticsearch") @pytest.mark.asyncio async def test_execute_multiple_streams(self, es): - es.transport.perform_request = mock.AsyncMock( + es.perform_request = mock.AsyncMock( side_effect=[ # raw-request None, @@ -5606,17 +5611,17 @@ async def test_execute_multiple_streams(self, es): r = runner.Composite() await r(es, params) - es.transport.perform_request.assert_has_awaits( + es.perform_request.assert_has_awaits( [ - mock.call(method="GET", url="/", headers=None, body={}, params={}), - mock.call("GET", "/test/_search", params={}, body={"query": {"match_all": {}}}, headers=None), + mock.call(method="GET", path="/", headers=None, body={}, params={}), + mock.call(method="GET", path="/test/_search", params={}, body={"query": {"match_all": {}}}, headers=None), ] ) @mock.patch("elasticsearch.Elasticsearch") @pytest.mark.asyncio async def test_propagates_violated_assertions(self, es): - es.transport.perform_request = mock.AsyncMock( + es.perform_request = mock.AsyncMock( side_effect=[ # search io.StringIO( @@ -5665,11 +5670,11 @@ async def test_propagates_violated_assertions(self, es): with pytest.raises(exceptions.RallyTaskAssertionError, match=r"Expected \[hits\] to be > \[0\] but was \[0\]."): await r(es, params) - es.transport.perform_request.assert_has_awaits( + es.perform_request.assert_has_awaits( [ mock.call( - "GET", - "/test/_search", + method="GET", + path="/test/_search", params={}, body={ "query": { @@ -5684,7 +5689,7 @@ async def test_propagates_violated_assertions(self, es): @mock.patch("elasticsearch.Elasticsearch") @pytest.mark.asyncio async def test_executes_tasks_in_specified_order(self, es): - es.transport.perform_request = mock.AsyncMock() + es.perform_request = mock.AsyncMock() params = { "requests": [ @@ -6018,10 +6023,10 @@ async def test_is_does_not_retry_on_success(self): async def test_retries_on_timeout_if_wanted_and_raises_if_no_recovery(self): delegate = mock.AsyncMock( side_effect=[ - elasticsearch.ConnectionError("N/A", "no route to host"), - elasticsearch.ConnectionError("N/A", "no route to host"), - elasticsearch.ConnectionError("N/A", "no route to host"), - elasticsearch.ConnectionError("N/A", "no route to host"), + elasticsearch.ConnectionError(message="no route to host"), + elasticsearch.ConnectionError(message="no route to host"), + elasticsearch.ConnectionError(message="no route to host"), + elasticsearch.ConnectionError(message="no route to host"), ] ) es = None @@ -6045,7 +6050,7 @@ async def test_retries_on_timeout_if_wanted_and_returns_first_call(self): delegate = mock.AsyncMock( side_effect=[ - elasticsearch.ConnectionError("N/A", "no route to host"), + elasticsearch.ConnectionError(message="no route to host"), failed_return_value, ] ) @@ -6067,7 +6072,7 @@ async def test_retries_on_timeout_if_wanted_and_returns_first_call(self): @pytest.mark.asyncio async def test_retries_mixed_timeout_and_application_errors(self): - connection_error = elasticsearch.ConnectionError("N/A", "no route to host") + connection_error = elasticsearch.ConnectionError(message="no route to host") failed_return_value = {"weight": 1, "unit": "ops", "success": False} success_return_value = {"weight": 1, "unit": "ops", "success": False} From ef921df8371065700666ad405b323d06cae699b6 Mon Sep 17 00:00:00 2001 From: Mike Baamonde Date: Fri, 3 Jun 2022 15:29:30 -0400 Subject: [PATCH 18/68] Fix linting error. --- esrally/client.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/esrally/client.py b/esrally/client.py index c233a96b3..01b4a7a96 100644 --- a/esrally/client.py +++ b/esrally/client.py @@ -523,12 +523,12 @@ def create_async(self): import esrally.async_connection class LazyJSONSerializer(JSONSerializer): - def loads(self, s): + def loads(self, data): meta = RallyAsyncElasticsearch.request_context.get() if "raw_response" in meta: - return io.BytesIO(s) + return io.BytesIO(data) else: - return super().loads(s) + return super().loads(data) async def on_request_start(session, trace_config_ctx, params): RallyAsyncElasticsearch.on_request_start() From 9627d291895ac90f7ae99143f2567f8d8cd1209c Mon Sep 17 00:00:00 2001 From: Mike Baamonde Date: Fri, 3 Jun 2022 16:08:21 -0400 Subject: [PATCH 19/68] Begin DRYing up EsClientFactory. --- esrally/client.py | 132 ++++++++++++++++++---------------------------- 1 file changed, 50 insertions(+), 82 deletions(-) diff --git a/esrally/client.py b/esrally/client.py index 01b4a7a96..ea650513a 100644 --- a/esrally/client.py +++ b/esrally/client.py @@ -137,6 +137,48 @@ def return_raw_response(cls): ctx["raw_response"] = True +def _mimetype_header_to_compat(header, request_headers): + # Converts all parts of a Accept/Content-Type headers + # from application/X -> application/vnd.elasticsearch+X + mimetype = request_headers.get(header, None) + if mimetype: + request_headers[header] = _COMPAT_MIMETYPE_RE.sub(_COMPAT_MIMETYPE_SUB, mimetype) + + +def _escape(value: Any) -> str: + """ + Escape a single value of a URL string or a query parameter. If it is a list + or tuple, turn it into a comma-separated string first. + """ + + # make sequences into comma-separated stings + if isinstance(value, (list, tuple)): + value = ",".join([_escape(item) for item in value]) + + # dates and datetimes into isoformat + elif isinstance(value, (date, datetime)): + value = value.isoformat() + + # make bools into true/false strings + elif isinstance(value, bool): + value = str(value).lower() + + elif isinstance(value, bytes): + return value.decode("utf-8", "surrogatepass") + + if not isinstance(value, str): + return str(value) + return value + + +def _quote(value: Any) -> str: + return percent_encode(_escape(value), ",*") + + +def _quote_query(query: Mapping[str, Any]) -> str: + return "&".join([f"{k}={_quote(v)}" for k, v in query.items()]) + + class EsClientFactory: """ Abstracts how the Elasticsearch client is created and customizes the client for backwards @@ -373,12 +415,14 @@ def perform_request( body: Optional[Any] = None, ) -> ApiResponse[Any]: + # We need to ensure that we provide content-type and accept headers if body is not None: if headers is None: headers = {"content-type": "application/json", "accept": "application/json"} else: if headers.get("content-type") is None: headers["content-type"] = "application/json" + if headers.get("accept") is None: headers["accept"] = "application/json" if headers: @@ -397,50 +441,11 @@ def perform_request( if self._verified_elasticsearch is not True: _ProductChecker.raise_error(self._verified_elasticsearch, info_meta, info_body) - def mimetype_header_to_compat(header: str) -> None: - # Converts all parts of a Accept/Content-Type headers - # from application/X -> application/vnd.elasticsearch+X - nonlocal request_headers - mimetype = request_headers.get(header, None) - if mimetype: - request_headers[header] = _COMPAT_MIMETYPE_RE.sub(_COMPAT_MIMETYPE_SUB, mimetype) - # Custom behavior for backwards compatibility with versions of ES that do not # recognize the compatible-with header. if self.distribution_version is not None and self.distribution_version >= versions.Version.from_string("8.0.0"): - mimetype_header_to_compat("Accept") - mimetype_header_to_compat("Content-Type") - - def _escape(value: Any) -> str: - """ - Escape a single value of a URL string or a query parameter. If it is a list - or tuple, turn it into a comma-separated string first. - """ - - # make sequences into comma-separated stings - if isinstance(value, (list, tuple)): - value = ",".join([_escape(item) for item in value]) - - # dates and datetimes into isoformat - elif isinstance(value, (date, datetime)): - value = value.isoformat() - - # make bools into true/false strings - elif isinstance(value, bool): - value = str(value).lower() - - elif isinstance(value, bytes): - return value.decode("utf-8", "surrogatepass") - - if not isinstance(value, str): - return str(value) - return value - - def _quote(value: Any) -> str: - return percent_encode(_escape(value), ",*") - - def _quote_query(query: Mapping[str, Any]) -> str: - return "&".join([f"{k}={_quote(v)}" for k, v in query.items()]) + _mimetype_header_to_compat("Accept", request_headers) + _mimetype_header_to_compat("Content-Type", request_headers) if params: target = f"{path}?{_quote_query(params)}" @@ -592,12 +597,14 @@ async def perform_request( body: Optional[Any] = None, ) -> ApiResponse[Any]: + # We need to ensure that we provide content-type and accept headers if body is not None: if headers is None: headers = {"content-type": "application/json", "accept": "application/json"} else: if headers.get("content-type") is None: headers["content-type"] = "application/json" + if headers.get("accept") is None: headers["accept"] = "application/json" if headers: @@ -606,48 +613,9 @@ async def perform_request( else: request_headers = self._headers - def mimetype_header_to_compat(header: str) -> None: - # Converts all parts of a Accept/Content-Type headers - # from application/X -> application/vnd.elasticsearch+X - nonlocal request_headers - mimetype = request_headers.get(header, None) - if mimetype: - request_headers[header] = _COMPAT_MIMETYPE_RE.sub(_COMPAT_MIMETYPE_SUB, mimetype) - if self.distribution_version is not None and self.distribution_version >= versions.Version.from_string("8.0.0"): - mimetype_header_to_compat("Accept") - mimetype_header_to_compat("Content-Type") - - def _escape(value: Any) -> str: - """ - Escape a single value of a URL string or a query parameter. If it is a list - or tuple, turn it into a comma-separated string first. - """ - - # make sequences into comma-separated stings - if isinstance(value, (list, tuple)): - value = ",".join([_escape(item) for item in value]) - - # dates and datetimes into isoformat - elif isinstance(value, (date, datetime)): - value = value.isoformat() - - # make bools into true/false strings - elif isinstance(value, bool): - value = str(value).lower() - - elif isinstance(value, bytes): - return value.decode("utf-8", "surrogatepass") - - if not isinstance(value, str): - return str(value) - return value - - def _quote(value: Any) -> str: - return percent_encode(_escape(value), ",*") - - def _quote_query(query: Mapping[str, Any]) -> str: - return "&".join([f"{k}={_quote(v)}" for k, v in query.items()]) + _mimetype_header_to_compat("Accept", request_headers) + _mimetype_header_to_compat("Content-Type", request_headers) if params: target = f"{path}?{_quote_query(params)}" From 70393ab15ed5efe559585e952e9604dbaf5b2c8a Mon Sep 17 00:00:00 2001 From: Quentin Pradet Date: Fri, 4 Nov 2022 09:13:01 +0400 Subject: [PATCH 20/68] Use newer ssl parameter in aiohttp client API Using ssl= is the preferred way since 3.0 (aio-libs/aiohttp#2626) and ssl_context= goes away in the next version, 4.0 (aio-libs/aiohttp#3548). --- esrally/client/asynchronous.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esrally/client/asynchronous.py b/esrally/client/asynchronous.py index d1d5d5c1c..e76f83995 100644 --- a/esrally/client/asynchronous.py +++ b/esrally/client/asynchronous.py @@ -248,7 +248,7 @@ async def _create_aiohttp_session(self): connector = StaticConnector(limit=self._limit, enable_cleanup_closed=self._enable_cleanup_closed) else: connector = aiohttp.TCPConnector( - limit=self._limit, use_dns_cache=True, ssl_context=self._ssl_context, enable_cleanup_closed=self._enable_cleanup_closed + limit=self._limit, use_dns_cache=True, ssl=self._ssl_context, enable_cleanup_closed=self._enable_cleanup_closed ) self.session = aiohttp.ClientSession( From a4c44105523acab4fb92c67cc1d672d342f09d90 Mon Sep 17 00:00:00 2001 From: Brad Deam Date: Wed, 1 Feb 2023 14:16:49 +1030 Subject: [PATCH 21/68] Set connection attributes after client instantiation --- esrally/client/asynchronous.py | 79 ++++++++++++++++------------------ esrally/client/factory.py | 20 ++++++--- 2 files changed, 51 insertions(+), 48 deletions(-) diff --git a/esrally/client/asynchronous.py b/esrally/client/asynchronous.py index a61a09f1b..92d3a5103 100644 --- a/esrally/client/asynchronous.py +++ b/esrally/client/asynchronous.py @@ -202,22 +202,38 @@ def response(self, path): class RallyAiohttpHttpNode(elastic_transport.AiohttpHttpNode): def __init__(self, config): super().__init__(config) - - self._loop = asyncio.get_running_loop() - self._limit = None - - client_options = config._extras.get("_rally_client_options") - if client_options: - self._trace_configs = client_options.get("trace_config") - self._enable_cleanup_closed = client_options.get("enable_cleanup_closed") - - static_responses = client_options.get("static_responses") - self.use_static_responses = static_responses is not None - - if self.use_static_responses: + self._loop = None + self._trace_configs = None + self._enable_cleanup_closed = None + self._static_responses = None + + @property + def trace_configs(self): + return self._trace_configs + + @trace_configs.setter + def trace_configs(self, trace_configs): + self._trace_configs = [trace_configs] + + @property + def enable_cleanup_closed(self): + return self._enable_cleanup_closed + + @enable_cleanup_closed.setter + def enable_cleanup_closed(self, enable_cleanup_closed): + self._enable_cleanup_closed = enable_cleanup_closed + + @property + def static_responses(self): + return self._static_responses + + @static_responses.setter + def static_responses(self, static_responses): + self._static_responses = static_responses + if self._static_responses: # read static responses once and reuse them if not StaticRequest.RESPONSES: - with open(io.normalize_path(static_responses)) as f: + with open(io.normalize_path(self._static_responses)) as f: StaticRequest.RESPONSES = ResponseMatcher(json.load(f)) self._request_class = StaticRequest @@ -227,11 +243,14 @@ def __init__(self, config): self._response_class = RawClientResponse def _create_aiohttp_session(self): - if self.use_static_responses: - connector = StaticConnector(limit=self._limit, enable_cleanup_closed=self._enable_cleanup_closed) + if self._loop is None: + self._loop = asyncio.get_running_loop() + + if self._static_responses: + connector = StaticConnector(limit_per_host=self._connections_per_node, enable_cleanup_closed=self._enable_cleanup_closed) else: connector = aiohttp.TCPConnector( - limit=self._limit, use_dns_cache=True, ssl=self._ssl_context, enable_cleanup_closed=self._enable_cleanup_closed + limit_per_host=self._connections_per_node, use_dns_cache=True, ssl=self._ssl_context, enable_cleanup_closed=self._enable_cleanup_closed ) self.session = aiohttp.ClientSession( @@ -248,31 +267,7 @@ def _create_aiohttp_session(self): class RallyAsyncTransport(elastic_transport.AsyncTransport): def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - # TODO The previous way of handling trace_config relied on a closure, which is - # not possible anymore now that RallyAsyncTransport is in its own file - - # We need to pass a trace config to the session that's created in - # async_connection.RallyAiohttphttpnode, which is a subclass of - # elastic_transport.AiohttpHttpNode. - # - # Its constructor only accepts an elastic_transport.NodeConfig object. - # Because we do not fully control creation of these objects , we need to - # pass the trace_config by adding it to the NodeConfig's `extras`, which - # can contain arbitrary metadata. - - # client_options.update({"trace_config": [trace_config]}) - # node_configs = args[0] - # for conf in node_configs: - # extras = conf._extras - # extras.update({"_rally_client_options": client_options}) - # conf._extras = extras - # original_args = args - # new_args = (node_configs, *original_args[1:]) - - # super().__init__(*new_args, node_class=RallyAiohttpHttpNode, **kwargs) - + super().__init__(*args, node_class=RallyAiohttpHttpNode, **kwargs) class RallyAsyncElasticsearch(elasticsearch.AsyncElasticsearch, RequestContextHolder): def __init__(self, distro, *args, **kwargs): diff --git a/esrally/client/factory.py b/esrally/client/factory.py index 16886f192..f5054f1e8 100644 --- a/esrally/client/factory.py +++ b/esrally/client/factory.py @@ -118,6 +118,7 @@ def host_string(host): self.logger.debug("SSL support: off") if self._is_set(self.client_options, "create_api_key_per_client"): + self.client_options.pop("create_api_key_per_client") basic_auth_user = self.client_options.get("basic_auth_user", False) basic_auth_password = self.client_options.get("basic_auth_password", False) provided_auth = {"basic_auth_user": basic_auth_user, "basic_auth_password": basic_auth_password} @@ -155,8 +156,9 @@ def host_string(host): else: self.logger.debug("HTTP compression: off") - if self._is_set(self.client_options, "enable_cleanup_closed"): - self.client_options["enable_cleanup_closed"] = convert.to_bool(self.client_options.pop("enable_cleanup_closed")) + self.enable_cleanup_closed = convert.to_bool(self.client_options.pop("enable_cleanup_closed", True)) + self.max_connections = max(256, self.client_options.pop("max_connections", 0)) + self.static_responses = self.client_options.pop("static_responses", None) @staticmethod def _only_hostnames(hosts): @@ -222,22 +224,28 @@ async def on_request_end(session, trace_config_ctx, params): # override the builtin JSON serializer self.client_options["serializer"] = LazyJSONSerializer() - # TODO see esrally.client/asynchronous - # self.client_options["trace_config"] = trace_config if api_key is not None: self.client_options.pop("http_auth") self.client_options["api_key"] = api_key - return RallyAsyncElasticsearch( + async_client = RallyAsyncElasticsearch( distro=self.distribution_version, hosts=self.hosts, transport_class=RallyAsyncTransport, ssl_context=self.ssl_context, - maxsize=max, + maxsize=self.max_connections, **self.client_options, ) + # the AsyncElasticsearch constructor automatically creates the corresponding NodeConfig objects, so we set + # their instance attributes after they've been instantiated + for node_connection in async_client.transport.node_pool.all(): + node_connection.trace_configs = trace_config + node_connection.enable_cleanup_closed = self.enable_cleanup_closed + node_connection.static_responses = self.static_responses + + return async_client def wait_for_rest_layer(es, max_attempts=40): """ From a233c017845ed5eea68e1cd2c61cad29a783b6be Mon Sep 17 00:00:00 2001 From: Brad Deam Date: Thu, 2 Feb 2023 11:31:57 +1030 Subject: [PATCH 22/68] Refactor API keys creation --- esrally/client/factory.py | 10 +++++---- tests/client/factory_test.py | 41 +++++++++++++++++------------------- 2 files changed, 25 insertions(+), 26 deletions(-) diff --git a/esrally/client/factory.py b/esrally/client/factory.py index f5054f1e8..8820adbcf 100644 --- a/esrally/client/factory.py +++ b/esrally/client/factory.py @@ -226,7 +226,8 @@ async def on_request_end(session, trace_config_ctx, params): self.client_options["serializer"] = LazyJSONSerializer() if api_key is not None: - self.client_options.pop("http_auth") + self.client_options.pop("http_auth", None) + self.client_options.pop("basic_auth", None) self.client_options["api_key"] = api_key async_client = RallyAsyncElasticsearch( @@ -247,6 +248,7 @@ async def on_request_end(session, trace_config_ctx, params): return async_client + def wait_for_rest_layer(es, max_attempts=40): """ Waits for ``max_attempts`` until Elasticsearch's REST API is available. @@ -303,7 +305,7 @@ def create_api_key(es, client_id, max_attempts=5): try: logger.debug("Creating ES API key for client ID [%s]", client_id) - return es.security.create_api_key({"name": f"rally-client-{client_id}"}) + return es.security.create_api_key(name=f"rally-client-{client_id}") except elasticsearch.TransportError as e: logger.debug("Got transport error [%s] on attempt [%s]. Sleeping...", str(e), attempt) time.sleep(1) @@ -348,7 +350,7 @@ def raise_exception(failed_ids, cause=None): try: if current_version >= minimum_version: - resp = es.security.invalidate_api_key({"ids": remaining}) + resp = es.security.invalidate_api_key(ids=remaining) deleted += resp["invalidated_api_keys"] remaining = [i for i in ids if i not in deleted] # Like bulk indexing requests, we can get an HTTP 200, but the @@ -374,7 +376,7 @@ def raise_exception(failed_ids, cause=None): remaining = [i for i in ids if i not in deleted] if attempt < max_attempts: for i in remaining: - es.security.invalidate_api_key({"id": i}) + es.security.invalidate_api_key(id=i) deleted.append(i) else: if remaining: diff --git a/tests/client/factory_test.py b/tests/client/factory_test.py index f02aae778..b35cb2085 100644 --- a/tests/client/factory_test.py +++ b/tests/client/factory_test.py @@ -322,11 +322,7 @@ def test_check_hostname_false_when_host_is_ip(self): @mock.patch("esrally.client.asynchronous.RallyAsyncElasticsearch") def test_create_async_client_with_api_key_auth_override(self, es): hosts = [{"host": "localhost", "port": 9200}] - client_options = { - "use_ssl": True, - "verify_certs": True, - "http_auth": ("user", "password"), - } + client_options = {"use_ssl": True, "verify_certs": True, "http_auth": ("user", "password"), "max_connections": 600} # make a copy so we can verify later that the factory did not modify it original_client_options = deepcopy(client_options) api_key = ("id", "secret") @@ -336,6 +332,7 @@ def test_create_async_client_with_api_key_auth_override(self, es): assert f.create_async(api_key=api_key) assert "http_auth" not in f.client_options assert f.client_options["api_key"] == api_key + assert client_options["max_connections"] == f.max_connections assert client_options == original_client_options es.assert_called_once_with( @@ -343,7 +340,7 @@ def test_create_async_client_with_api_key_auth_override(self, es): hosts=["https://localhost:9200"], transport_class=RallyAsyncTransport, ssl_context=f.ssl_context, - maxsize=max, + maxsize=f.max_connections, verify_certs=True, serializer=f.client_options["serializer"], api_key=api_key, @@ -489,7 +486,7 @@ def test_successfully_creates_api_keys(self, es): client_id = 0 assert client.create_api_key(es, client_id, max_attempts=3) # even though max_attempts is 3, this should only be called once - es.security.create_api_key.assert_called_once_with({"name": f"rally-client-{client_id}"}) + es.security.create_api_key.assert_called_once_with(name=f"rally-client-{client_id}") @mock.patch("elasticsearch.Elasticsearch") def test_api_key_creation_fails_on_405_and_raises_system_setup_error(self, es): @@ -501,7 +498,7 @@ def test_api_key_creation_fails_on_405_and_raises_system_setup_error(self, es): ): client.create_api_key(es, client_id, max_attempts=5) - es.security.create_api_key.assert_called_once_with({"name": f"rally-client-{client_id}"}) + es.security.create_api_key.assert_called_once_with(name=f"rally-client-{client_id}") @mock.patch("time.sleep") @mock.patch("elasticsearch.Elasticsearch") @@ -514,7 +511,7 @@ def test_retries_api_key_creation_on_transport_errors(self, es, sleep): _api_error(500, "Internal Server Error"), {"id": "abc", "name": f"rally-client-{client_id}", "api_key": "123"}, ] - calls = [mock.call({"name": "rally-client-0"}) for _ in range(5)] + calls = [mock.call(name="rally-client-0") for _ in range(5)] assert client.create_api_key(es, client_id, max_attempts=5) assert es.security.create_api_key.call_args_list == calls @@ -537,7 +534,7 @@ def test_successfully_deletes_api_keys(self, es, version): ] else: es.security.invalidate_api_key.return_value = {"invalidated_api_keys": ["foo", "bar", "baz"], "error_count": 0} - calls = [mock.call({"ids": ids})] + calls = [mock.call(ids=ids)] assert client.delete_api_keys(es, ids, max_attempts=3) assert es.security.invalidate_api_key.has_calls(calls, any_order=True) @@ -559,12 +556,12 @@ def test_retries_api_keys_deletion_on_transport_errors(self, es, sleep, version) ] calls = [ # foo and bar are deleted successfully, leaving only baz - mock.call({"id": "foo"}), - mock.call({"id": "bar"}), + mock.call(id="foo"), + mock.call(id="bar"), # two exceptions are thrown, so it should take 3 attempts to delete baz - mock.call({"id": "baz"}), - mock.call({"id": "baz"}), - mock.call({"id": "baz"}), + mock.call(id="baz"), + mock.call(id="baz"), + mock.call(id="baz"), ] else: es.security.invalidate_api_key.side_effect = [ @@ -574,7 +571,7 @@ def test_retries_api_keys_deletion_on_transport_errors(self, es, sleep, version) _api_error(500, "Internal Server Error"), {"invalidated_api_keys": ["foo", "bar", "baz"], "error_count": 0}, ] - calls = [mock.call({"ids": ids}) for _ in range(max_attempts)] + calls = [mock.call(ids=ids) for _ in range(max_attempts)] assert client.delete_api_keys(es, ids, max_attempts=max_attempts) assert es.security.invalidate_api_key.call_args_list == calls @@ -593,9 +590,9 @@ def test_raises_exception_when_api_key_deletion_fails(self, es, version): ] calls = [ - mock.call({"id": "foo"}), - mock.call({"id": "bar"}), - mock.call({"id": "baz"}), + mock.call(id="foo"), + mock.call(id="bar"), + mock.call(id="baz"), ] else: # Since there are two ways this version can fail, we interleave them @@ -613,9 +610,9 @@ def test_raises_exception_when_api_key_deletion_fails(self, es, version): ] calls = [ - mock.call({"ids": ["foo", "bar", "baz", "qux"]}), - mock.call({"ids": ["bar", "baz", "qux"]}), - mock.call({"ids": ["bar", "baz", "qux"]}), + mock.call(ids=["foo", "bar", "baz", "qux"]), + mock.call(ids=["bar", "baz", "qux"]), + mock.call(ids=["bar", "baz", "qux"]), ] with pytest.raises(exceptions.RallyError, match=re.escape(f"Could not delete API keys with the following IDs: {failed_to_delete}")): From d85604cd3ccbed1ecf6e6e0bcd577ff07362febb Mon Sep 17 00:00:00 2001 From: Brad Deam Date: Thu, 2 Feb 2023 11:32:30 +1030 Subject: [PATCH 23/68] Get client version string for compatability headers --- esrally/client/asynchronous.py | 5 ++++- esrally/client/common.py | 11 +++++++---- esrally/client/synchronous.py | 7 ++++--- tests/client/common_test.py | 23 +++++++++++++++++++++++ 4 files changed, 38 insertions(+), 8 deletions(-) create mode 100644 tests/client/common_test.py diff --git a/esrally/client/asynchronous.py b/esrally/client/asynchronous.py index 92d3a5103..2e0f252be 100644 --- a/esrally/client/asynchronous.py +++ b/esrally/client/asynchronous.py @@ -270,7 +270,7 @@ def __init__(self, *args, **kwargs): super().__init__(*args, node_class=RallyAiohttpHttpNode, **kwargs) class RallyAsyncElasticsearch(elasticsearch.AsyncElasticsearch, RequestContextHolder): - def __init__(self, distro, *args, **kwargs): + def __init__(self, distro=None, *args, **kwargs): super().__init__(*args, **kwargs) # skip verification at this point; we've already verified this earlier with the synchronous client. # The async client is used in the hot code path and we use customized overrides (such as that we don't @@ -307,6 +307,9 @@ async def perform_request( else: request_headers = self._headers + # Converts all parts of a Accept/Content-Type headers + # from application/X -> application/vnd.elasticsearch+X + # see https://github.com/elastic/elasticsearch/issues/51816 if self.distribution_version is not None and self.distribution_version >= versions.Version.from_string("8.0.0"): _mimetype_header_to_compat("Accept", request_headers) _mimetype_header_to_compat("Content-Type", request_headers) diff --git a/esrally/client/common.py b/esrally/client/common.py index 7460e2c0c..96082afeb 100644 --- a/esrally/client/common.py +++ b/esrally/client/common.py @@ -1,13 +1,16 @@ import re from datetime import date, datetime from typing import Any, Mapping - +from elasticsearch import VERSION from elastic_transport.client_utils import percent_encode -_WARNING_RE = re.compile(r"\"([^\"]*)\"") -# TODO: get versionstr dynamically -_COMPAT_MIMETYPE_TEMPLATE = "application/vnd.elasticsearch+%s; compatible-with=" + str("8.2.0".partition(".")[0]) +def _client_major_version_to_str(version: tuple) -> str: + return ".".join(map(str, version)).partition(".")[0] + + +_WARNING_RE = re.compile(r"\"([^\"]*)\"") +_COMPAT_MIMETYPE_TEMPLATE = "application/vnd.elasticsearch+%s; compatible-with=" + _client_major_version_to_str(VERSION) _COMPAT_MIMETYPE_RE = re.compile(r"application/(json|x-ndjson|vnd\.mapbox-vector-tile)") _COMPAT_MIMETYPE_SUB = _COMPAT_MIMETYPE_TEMPLATE % (r"\g<1>",) diff --git a/esrally/client/synchronous.py b/esrally/client/synchronous.py index 15dc34dd9..1c7eb1b44 100644 --- a/esrally/client/synchronous.py +++ b/esrally/client/synchronous.py @@ -121,7 +121,7 @@ def check_product(cls, headers, response): class RallySyncElasticsearch(elasticsearch.Elasticsearch): - def __init__(self, distro, *args, **kwargs): + def __init__(self, distro=None, *args, **kwargs): super().__init__(*args, **kwargs) self._verified_elasticsearch = None @@ -166,8 +166,9 @@ def perform_request( if self._verified_elasticsearch is not True: _ProductChecker.raise_error(self._verified_elasticsearch, info_meta, info_body) - # Custom behavior for backwards compatibility with versions of ES that do not - # recognize the compatible-with header. + # Converts all parts of a Accept/Content-Type headers + # from application/X -> application/vnd.elasticsearch+X + # see https://github.com/elastic/elasticsearch/issues/51816 if self.distribution_version is not None and self.distribution_version >= versions.Version.from_string("8.0.0"): _mimetype_header_to_compat("Accept", request_headers) _mimetype_header_to_compat("Content-Type", request_headers) diff --git a/tests/client/common_test.py b/tests/client/common_test.py new file mode 100644 index 000000000..4b2d003b4 --- /dev/null +++ b/tests/client/common_test.py @@ -0,0 +1,23 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from esrally.client import common + + +def test_client_major_version_to_str(): + version = (8, 2, 0) + assert common._client_major_version_to_str(version) == "8" From 70c2e6aae9b6a69c21cedda3a214573c46a998a4 Mon Sep 17 00:00:00 2001 From: Brad Deam Date: Thu, 2 Feb 2023 14:20:30 +1030 Subject: [PATCH 24/68] Fix static_response connector --- esrally/client/asynchronous.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/esrally/client/asynchronous.py b/esrally/client/asynchronous.py index 2e0f252be..a891be3d2 100644 --- a/esrally/client/asynchronous.py +++ b/esrally/client/asynchronous.py @@ -56,6 +56,9 @@ def is_closing(self): def close(self): self.closed = True + def abort(self): + self.close() + class StaticConnector(BaseConnector): async def _create_connection(self, req: "ClientRequest", traces: List["Trace"], timeout: "ClientTimeout") -> ResponseHandler: @@ -123,6 +126,9 @@ async def start(self, connection: "Connection") -> "ClientResponse": async def text(self, encoding=None, errors="strict"): return self.static_body + async def read(self): + return self.static_body + class RawClientResponse(aiohttp.ClientResponse): """ @@ -250,7 +256,10 @@ def _create_aiohttp_session(self): connector = StaticConnector(limit_per_host=self._connections_per_node, enable_cleanup_closed=self._enable_cleanup_closed) else: connector = aiohttp.TCPConnector( - limit_per_host=self._connections_per_node, use_dns_cache=True, ssl=self._ssl_context, enable_cleanup_closed=self._enable_cleanup_closed + limit_per_host=self._connections_per_node, + use_dns_cache=True, + ssl=self._ssl_context, + enable_cleanup_closed=self._enable_cleanup_closed, ) self.session = aiohttp.ClientSession( @@ -269,6 +278,7 @@ class RallyAsyncTransport(elastic_transport.AsyncTransport): def __init__(self, *args, **kwargs): super().__init__(*args, node_class=RallyAiohttpHttpNode, **kwargs) + class RallyAsyncElasticsearch(elasticsearch.AsyncElasticsearch, RequestContextHolder): def __init__(self, distro=None, *args, **kwargs): super().__init__(*args, **kwargs) @@ -290,7 +300,6 @@ async def perform_request( headers: Optional[Mapping[str, str]] = None, body: Optional[Any] = None, ) -> ApiResponse[Any]: - # We need to ensure that we provide content-type and accept headers if body is not None: if headers is None: From 93c338e9a3e82dbedd5169ece961b41e883f646c Mon Sep 17 00:00:00 2001 From: Brad Deam Date: Fri, 10 Feb 2023 16:04:52 +1030 Subject: [PATCH 25/68] Refactor exception handling --- esrally/client/factory.py | 22 +++- esrally/driver/driver.py | 41 ++++--- esrally/driver/runner.py | 12 +- esrally/metrics.py | 110 ++++++++++-------- tests/client/factory_test.py | 29 ++++- tests/metrics_test.py | 220 ++++++++++++++++++----------------- 6 files changed, 254 insertions(+), 180 deletions(-) diff --git a/esrally/client/factory.py b/esrally/client/factory.py index 51e2a0b93..d9ac2334b 100644 --- a/esrally/client/factory.py +++ b/esrally/client/factory.py @@ -268,14 +268,30 @@ def wait_for_rest_layer(es, max_attempts=40): es.cluster.health(wait_for_nodes=f">={expected_node_count}") logger.debug("REST API is available for >= [%s] nodes after [%s] attempts.", expected_node_count, attempt) return True + except elasticsearch.SerializationError as e: + if "Client sent an HTTP request to an HTTPS server" in str(e): + raise exceptions.SystemSetupError( + "Rally sent an HTTP request to an HTTPS server. Are you sure this is an HTTP endpoint?", e + ) + + logger.debug("Got serialization error [%s] on attempt [%s]. Sleeping...", e, attempt) + time.sleep(3) + except elasticsearch.SSLError as e: + raise exceptions.SystemSetupError("Could not connect to cluster via HTTPS. Are you sure this is an HTTPS endpoint?", e) + except elasticsearch.exceptions.ConnectionError as e: + if "ProtocolError" in str(e): + raise exceptions.SystemSetupError( + "Received a protocol error. Are you sure you're using the correct scheme (HTTP or HTTPS)?", e + ) + + logger.debug("Got connection error on attempt [%s]. Sleeping...", attempt) + time.sleep(3) except elasticsearch.TransportError as e: - if "SSL: UNKNOWN_PROTOCOL" in str(e): - raise exceptions.SystemSetupError("Could not connect to cluster via https. Is this an https endpoint?", e) logger.debug("Got transport error on attempt [%s]. Sleeping...", attempt) time.sleep(3) except elasticsearch.ApiError as e: # cluster block, x-pack not initialized yet, our wait condition is not reached - if e.meta.status in (503, 401, 408): + if e.status_code in (503, 401, 408): logger.debug("Got status code [%s] on attempt [%s]. Sleeping...", e.message, attempt) time.sleep(3) else: diff --git a/esrally/driver/driver.py b/esrally/driver/driver.py index 3e3ffbab3..a10d626e5 100644 --- a/esrally/driver/driver.py +++ b/esrally/driver/driver.py @@ -1983,7 +1983,6 @@ async def execute_single(runner, es, params, on_error): total_ops = 1 total_ops_unit = "ops" request_meta_data = {"success": True} - # TODO: This all needs to be refactored except elasticsearch.TransportError as e: # we *specifically* want to distinguish connection refused (a node died?) from connection timeouts # pylint: disable=unidiomatic-typecheck @@ -1993,43 +1992,47 @@ async def execute_single(runner, es, params, on_error): total_ops = 0 total_ops_unit = "ops" request_meta_data = {"success": False, "error-type": "transport"} - # The ES client will sometimes return string like "N/A" or "TIMEOUT" for connection errors. - if isinstance(e.message, int): - request_meta_data["http-status"] = e.message + # For the 'errors' attribute, errors are ordered from + # most recently raised (index=0) to least recently raised (index=N) + # + # If an HTTP status code is available with the error it + # will be stored under 'status'. If HTTP headers are available + # they are stored under 'headers'. + if e.errors: + if hasattr(e.errors[0], "status"): + request_meta_data["http-status"] = e.errors[0].status # connection timeout errors don't provide a helpful description if isinstance(e, elasticsearch.ConnectionTimeout): request_meta_data["error-description"] = "network connection timed out" - # elif e.info: - # request_meta_data["error-description"] = f"{e.error} ({e.info})" else: - error_description = str(e) + error_description = e.message request_meta_data["error-description"] = error_description except elasticsearch.ApiError as e: total_ops = 0 total_ops_unit = "ops" request_meta_data = {"success": False, "error-type": "api"} - if isinstance(e.message, bytes): - error_message = e.message.decode("utf-8") - elif isinstance(e.message, BytesIO): - error_message = e.message.read().decode("utf-8") + if isinstance(e.error, bytes): + error_message = e.error.decode("utf-8") + elif isinstance(e.error, BytesIO): + error_message = e.error.read().decode("utf-8") else: - error_message = e.message + error_message = e.error - if isinstance(e.body, bytes): - error_body = e.body.decode("utf-8") - elif isinstance(e.body, BytesIO): - error_body = e.body.read().decode("utf-8") + if isinstance(e.info, bytes): + error_body = e.info.decode("utf-8") + elif isinstance(e.info, BytesIO): + error_body = e.info.read().decode("utf-8") else: - error_body = e.body + error_body = e.info if error_body: error_message += f" ({error_body})" error_description = error_message request_meta_data["error-description"] = error_description - if e.meta.status: - request_meta_data["http-status"] = e.meta.status + if e.status_code: + request_meta_data["http-status"] = e.status_code except KeyError as e: logging.getLogger(__name__).exception("Cannot execute runner [%s]; most likely due to missing parameters.", str(runner)) msg = "Cannot execute [%s]. Provided parameters are: %s. Error: [%s]." % (str(runner), list(params.keys()), str(e)) diff --git a/esrally/driver/runner.py b/esrally/driver/runner.py index 8e54c9302..7a40a7cb9 100644 --- a/esrally/driver/runner.py +++ b/esrally/driver/runner.py @@ -2867,7 +2867,7 @@ async def __call__(self, es, params): if last_attempt or not retry_on_timeout: raise await asyncio.sleep(sleep_time) - except elasticsearch.exceptions.TransportError as e: + except elasticsearch.ApiError as e: if last_attempt or not retry_on_timeout: raise e @@ -2877,6 +2877,16 @@ async def __call__(self, es, params): else: raise e + except elasticsearch.exceptions.ConnectionTimeout as e: + if last_attempt or not retry_on_timeout: + raise e + + self.logger.info("[%s] has timed out. Retrying in [%.2f] seconds.", repr(self.delegate), sleep_time) + await asyncio.sleep(sleep_time) + except elasticsearch.exceptions.TransportError as e: + if last_attempt or not retry_on_timeout: + raise e + async def __aexit__(self, exc_type, exc_val, exc_tb): return await self.delegate.__aexit__(exc_type, exc_val, exc_tb) diff --git a/esrally/metrics.py b/esrally/metrics.py index 377f9d6cd..9c98fad16 100644 --- a/esrally/metrics.py +++ b/esrally/metrics.py @@ -30,7 +30,6 @@ import uuid import zlib from enum import Enum, IntEnum -from http.client import responses import tabulate from elasticsearch import ApiError, TransportError @@ -106,91 +105,102 @@ def guarded(self, target, *args, **kwargs): # pylint: disable=import-outside-toplevel import elasticsearch - max_execution_count = 11 + max_execution_count = 10 execution_count = 0 - while execution_count < max_execution_count: + while execution_count <= max_execution_count: time_to_sleep = 2**execution_count + random.random() execution_count += 1 try: return target(*args, **kwargs) + except elasticsearch.exceptions.ConnectionTimeout as e: + if execution_count <= max_execution_count: + self.logger.debug( + "Connection timeout [%s] in attempt [%d/%d]. Sleeping for [%f] seconds.", + e.message, + execution_count, + max_execution_count, + time_to_sleep, + ) + time.sleep(time_to_sleep) + else: + operation = target.__name__ + self.logger.exception("Connection timeout while running [%s] (retried %d times).", operation, max_execution_count) + node = self._client.transport.node_pool.get() + msg = ( + "A connection timeout occurred while running the operation [%s] against your Elasticsearch metrics store on " + "host [%s] at port [%s]." % (operation, node.host, node.port) + ) + raise exceptions.RallyError(msg) + except elasticsearch.exceptions.ConnectionError as e: + if execution_count <= max_execution_count: + self.logger.debug( + "Connection error [%s] in attempt [%d/%d]. Sleeping for [%f] seconds.", + e.message, + execution_count, + max_execution_count, + time_to_sleep, + ) + time.sleep(time_to_sleep) + else: + node = self._client.transport.node_pool.get() + msg = ( + "Could not connect to your Elasticsearch metrics store. Please check that it is running on host [%s] at port [%s]" + " or fix the configuration in [%s]." % (node.host, node.port, config.ConfigFile().location) + ) + self.logger.exception(msg) + # connection errors doesn't neccessarily mean it's during setup + raise exceptions.RallyError(msg) except elasticsearch.exceptions.AuthenticationException: # we know that it is just one host (see EsClientFactory) - node = self._client.transport.hosts[0] + node = self._client.transport.node_pool.get() msg = ( "The configured user could not authenticate against your Elasticsearch metrics store running on host [%s] at " "port [%s] (wrong password?). Please fix the configuration in [%s]." - % (node["host"], node["port"], config.ConfigFile().location) + % (node.host, node.port, config.ConfigFile().location) ) self.logger.exception(msg) raise exceptions.SystemSetupError(msg) except elasticsearch.exceptions.AuthorizationException: - node = self._client.transport.hosts[0] + node = self._client.transport.node_pool.get() msg = ( "The configured user does not have enough privileges to run the operation [%s] against your Elasticsearch metrics " "store running on host [%s] at port [%s]. Please adjust your x-pack configuration or specify a user with enough " - "privileges in the configuration in [%s]." % (target.__name__, node["host"], node["port"], config.ConfigFile().location) + "privileges in the configuration in [%s]." % (target.__name__, node.host, node.port, config.ConfigFile().location) ) self.logger.exception(msg) raise exceptions.SystemSetupError(msg) - except elasticsearch.exceptions.ConnectionTimeout: - if execution_count < max_execution_count: - self.logger.debug("Connection timeout in attempt [%d/%d].", execution_count, max_execution_count) - time.sleep(time_to_sleep) - else: - operation = target.__name__ - self.logger.exception("Connection timeout while running [%s] (retried %d times).", operation, max_execution_count) - node = self._client.transport.hosts[0] - msg = ( - "A connection timeout occurred while running the operation [%s] against your Elasticsearch metrics store on " - "host [%s] at port [%s]." % (operation, node["host"], node["port"]) - ) - raise exceptions.RallyError(msg) - except elasticsearch.exceptions.ConnectionError: - node = self._client.transport.hosts[0] - msg = ( - "Could not connect to your Elasticsearch metrics store. Please check that it is running on host [%s] at port [%s]" - " or fix the configuration in [%s]." % (node["host"], node["port"], config.ConfigFile().location) - ) - self.logger.exception(msg) - raise exceptions.SystemSetupError(msg) - # TODO: Refactor error handling to better account for TransportError/ApiError subtleties - except TransportError as e: - if e.status == 404 and e.error == "index_not_found_exception": - node = self._client.transport.hosts[0] - msg = ( - "The operation [%s] against your Elasticsearch metrics store on " - "host [%s] at port [%s] failed because index [%s] does not exist." - % (target.__name__, node["host"], node["port"], kwargs.get("index")) - ) - self.logger.exception(msg) - raise exceptions.RallyError(msg) - - if e.message in (502, 503, 504, 429) and execution_count < max_execution_count: + except ApiError as e: + if e.status_code in (502, 503, 504, 429) and execution_count <= max_execution_count: self.logger.debug( "%s (code: %d) in attempt [%d/%d]. Sleeping for [%f] seconds.", - responses[e.message], - e.message, + e.error, + e.status_code, execution_count, max_execution_count, time_to_sleep, ) time.sleep(time_to_sleep) else: - node = self._client.transport.hosts[0] + node = self._client.transport.node_pool.get() msg = ( - "A transport error occurred while running the operation [%s] against your Elasticsearch metrics store on " - "host [%s] at port [%s]." % (target.__name__, node["host"], node["port"]) + "An error [%s] occurred while running the operation [%s] against your Elasticsearch metrics store on host [%s] " + "at port [%s]." % (e.error, target.__name__, node.host, node.port) ) self.logger.exception(msg) + # this does not necessarily mean it's a system setup problem... raise exceptions.RallyError(msg) + except TransportError as e: + node = self._client.transport.node_pool.get() - except ApiError: - node = self._client.transport.hosts[0] + if e.errors: + err = e.errors + else: + err = e msg = ( - "An unknown error occurred while running the operation [%s] against your Elasticsearch metrics store on host [%s] " - "at port [%s]." % (target.__name__, node["host"], node["port"]) + "Transport error(s) [%s] occurred while running the operation [%s] against your Elasticsearch metrics store on " + "host [%s] at port [%s]." % (err, target.__name__, node.host, node.port) ) self.logger.exception(msg) # this does not necessarily mean it's a system setup problem... diff --git a/tests/client/factory_test.py b/tests/client/factory_test.py index 6d51ba734..f3431a206 100644 --- a/tests/client/factory_test.py +++ b/tests/client/factory_test.py @@ -471,12 +471,35 @@ def test_dont_retry_eternally_on_transport_errors(self, es, sleep): assert not client.wait_for_rest_layer(es, max_attempts=3) @mock.patch("elasticsearch.Elasticsearch") - def test_ssl_error(self, es): + def test_ssl_serialization_error(self, es): + es.cluster.health.side_effect = elasticsearch.SerializationError(message="Client sent an HTTP request to an HTTPS server") + with pytest.raises( + exceptions.SystemSetupError, + match="Rally sent an HTTP request to an HTTPS server. Are you sure this is an HTTP endpoint?", + ): + client.wait_for_rest_layer(es, max_attempts=3) + + @mock.patch("elasticsearch.Elasticsearch") + def test_connection_ssl_error(self, es): + es.cluster.health.side_effect = elasticsearch.SSLError( + message="SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1131)", + ) + with pytest.raises( + exceptions.SystemSetupError, + match="Could not connect to cluster via HTTPS. Are you sure this is an HTTPS endpoint?", + ): + client.wait_for_rest_layer(es, max_attempts=3) + + @mock.patch("elasticsearch.Elasticsearch") + def test_connection_protocol_error(self, es): es.cluster.health.side_effect = elasticsearch.ConnectionError( message="N/A", - errors=[urllib3.exceptions.SSLError("[SSL: UNKNOWN_PROTOCOL] unknown protocol (_ssl.c:719)")], + errors=[urllib3.exceptions.ProtocolError("Connection aborted.")], ) - with pytest.raises(exceptions.SystemSetupError, match="Could not connect to cluster via https. Is this an https endpoint?"): + with pytest.raises( + exceptions.SystemSetupError, + match="Received a protocol error. Are you sure you're using the correct scheme (HTTP or HTTPS)?", + ): client.wait_for_rest_layer(es, max_attempts=3) diff --git a/tests/metrics_test.py b/tests/metrics_test.py index c63dbd4ac..bea39cbdb 100644 --- a/tests/metrics_test.py +++ b/tests/metrics_test.py @@ -24,6 +24,7 @@ import string import tempfile import uuid +from dataclasses import dataclass from unittest import mock import elasticsearch.exceptions @@ -83,39 +84,28 @@ def total_time(self): return 0 -class TransportErrors: - err_return_codes = { - 502: "Bad Gateway", - 503: "Service Unavailable", - 504: "Gateway Timeout", - 429: "Too Many Requests", - } - - def __init__(self, max_err_responses=10): - self.max_err_responses = max_err_responses - # allow duplicates in list of error codes - self.rnd_err_codes = [random.choice(list(TransportErrors.err_return_codes)) for _ in range(self.max_err_responses)] - - @property - def code_list(self): - return self.rnd_err_codes - - @property - def side_effects(self): - side_effect_list = [] - for rnd_code in self.rnd_err_codes: - exc = elasticsearch.exceptions.TransportError(rnd_code, TransportErrors.err_return_codes[rnd_code]) - exc.status = rnd_code - side_effect_list.append(exc) - side_effect_list.append("success") +class TestEsClient: + class NodeMock: + def __init__(self, host, port): + self.host = host + self.port = port - return side_effect_list + class NodePoolMock: + def __init__(self, hosts): + self.nodes = [] + for h in hosts: + self.nodes.append(TestEsClient.NodeMock(host=h["host"], port=h["port"])) + def get(self): + return self.nodes[0] -class TestEsClient: class TransportMock: def __init__(self, hosts): - self.hosts = hosts + self.node_pool = TestEsClient.NodePoolMock(hosts) + + @dataclass + class ApiResponseMeta: + status: int class ClientMock: def __init__(self, hosts): @@ -153,18 +143,106 @@ def test_config_opts_parsing(self, client_esclientfactory): hosts=[{"host": _datastore_host, "port": _datastore_port}], client_options=expected_client_options ) - def test_raises_sytem_setup_error_on_connection_problems(self): - def raise_connection_error(): - raise elasticsearch.exceptions.ConnectionError("unit-test") + @mock.patch("random.random") + @mock.patch("esrally.time.sleep") + def test_retries_on_various_errors(self, mocked_sleep, mocked_random, caplog): + class ConnectionError: + def logging_statements(self, retries): + logging_statements = [] + for i, v in enumerate(range(retries)): + logging_statements.append( + "Connection error [%s] in attempt [%d/%d]. Sleeping for [%f] seconds." + % ( + "unit-test", + i + 1, + max_retry, + sleep_slots[v], + ) + ) + logging_statements.append( + "Could not connect to your Elasticsearch metrics store. Please check that it is running on host [127.0.0.1] at " + f"port [9200] or fix the configuration in [{paths.rally_confdir()}/rally.ini]." + ) + return logging_statements + + def raise_error(self): + raise elasticsearch.exceptions.ConnectionError("unit-test") + + class ConnectionTimeout: + def logging_statements(self, retries): + logging_statements = [] + for i, v in enumerate(range(retries)): + logging_statements.append( + "Connection timeout [%s] in attempt [%d/%d]. Sleeping for [%f] seconds." + % ( + "unit-test", + i + 1, + max_retry, + sleep_slots[v], + ) + ) + logging_statements.append(f"Connection timeout while running [raise_error] (retried {retries} times).") + return logging_statements + + def raise_error(self): + raise elasticsearch.exceptions.ConnectionTimeout("unit-test") + + class ApiError: + def __init__(self, status_code): + self.status_code = status_code + + def logging_statements(self, retries): + logging_statements = [] + for i, v in enumerate(range(retries)): + logging_statements.append( + "%s (code: %d) in attempt [%d/%d]. Sleeping for [%f] seconds." + % ( + "unit-test", + self.status_code, + i + 1, + max_retry, + sleep_slots[v], + ) + ) + logging_statements.append( + "An error [unit-test] occurred while running the operation [raise_error] against your Elasticsearch " + "metrics store on host [127.0.0.1] at port [9200]." + ) + return logging_statements + + def raise_error(self): + err = elasticsearch.exceptions.ApiError("unit-test", meta=TestEsClient.ApiResponseMeta(status=self.status_code), body={}) + raise err + + retriable_errors = [ApiError(429), ApiError(502), ApiError(503), ApiError(504), ConnectionError(), ConnectionTimeout()] + + max_retry = 10 + + # The sec to sleep for 10 transport errors is + # [1, 2, 4, 8, 16, 32, 64, 128, 256, 512] ~> 17.05min in total + sleep_slots = [float(2**i) for i in range(0, max_retry)] + + # we want deterministic timings to assess logging statements + mocked_random.return_value = 0 client = metrics.EsClient(self.ClientMock([{"host": "127.0.0.1", "port": "9200"}])) - with pytest.raises(exceptions.SystemSetupError) as ctx: - client.guarded(raise_connection_error) - assert ctx.value.args[0] == ( - "Could not connect to your Elasticsearch metrics store. Please check that it is running on host [127.0.0.1] at " - f"port [9200] or fix the configuration in [{paths.rally_confdir()}/rally.ini]." - ) + exepcted_logger_calls = [] + expected_sleep_calls = [] + + for e in retriable_errors: + exepcted_logger_calls += e.logging_statements(max_retry) + expected_sleep_calls += [mock.call(int(sleep_slots[i])) for i in range(0, max_retry)] + + with pytest.raises(exceptions.RallyError): + with caplog.at_level(logging.DEBUG): + client.guarded(e.raise_error) + + actual_logger_calls = [r.message for r in caplog.records] + actual_sleep_calls = mocked_sleep.call_args_list + + assert actual_sleep_calls == expected_sleep_calls + assert actual_logger_calls == exepcted_logger_calls def test_raises_sytem_setup_error_on_authentication_problems(self): def raise_authentication_error(): @@ -196,7 +274,6 @@ def raise_authorization_error(): def test_raises_rally_error_on_unknown_problems(self): def raise_unknown_error(): exc = elasticsearch.exceptions.TransportError(message="unit-test") - exc.status = 500 raise exc client = metrics.EsClient(self.ClientMock([{"host": "127.0.0.1", "port": "9243"}])) @@ -204,72 +281,7 @@ def raise_unknown_error(): with pytest.raises(exceptions.RallyError) as ctx: client.guarded(raise_unknown_error) assert ctx.value.args[0] == ( - "A transport error occurred while running the operation [raise_unknown_error] against your Elasticsearch metrics " - "store on host [127.0.0.1] at port [9243]." - ) - - def test_retries_on_various_transport_errors(self): - @mock.patch("random.random") - @mock.patch("esrally.time.sleep") - def test_transport_error_retries(side_effect, expected_logging_calls, expected_sleep_calls, mocked_sleep, mocked_random): - # should return on first success - operation = mock.Mock(side_effect=side_effect) - - # Disable additional randomization time in exponential backoff calls - mocked_random.return_value = 0 - - client = metrics.EsClient(self.ClientMock([{"host": "127.0.0.1", "port": "9243"}])) - - logger = logging.getLogger("esrally.metrics") - with mock.patch.object(logger, "debug") as mocked_debug_logger: - test_result = client.guarded(operation) - mocked_sleep.assert_has_calls(expected_sleep_calls) - mocked_debug_logger.assert_has_calls(expected_logging_calls, any_order=True) - assert test_result == "success" - - max_retry = 10 - all_err_codes = TransportErrors.err_return_codes - transport_errors = TransportErrors(max_err_responses=max_retry) - rnd_err_codes = transport_errors.code_list - rnd_side_effects = transport_errors.side_effects - rnd_mocked_logger_calls = [] - - # The sec to sleep for 10 transport errors is - # [1, 2, 4, 8, 16, 32, 64, 128, 256, 512] ~> 17.05min in total - sleep_slots = [float(2**i) for i in range(0, max_retry)] - mocked_sleep_calls = [mock.call(sleep_slots[i]) for i in range(0, max_retry)] - - for rnd_err_idx, rnd_err_code in enumerate(rnd_err_codes): - # List of logger.debug calls to expect - rnd_mocked_logger_calls.append( - mock.call( - "%s (code: %d) in attempt [%d/%d]. Sleeping for [%f] seconds.", - all_err_codes[rnd_err_code], - rnd_err_code, - rnd_err_idx + 1, - max_retry + 1, - sleep_slots[rnd_err_idx], - ) - ) - # pylint: disable=no-value-for-parameter - test_transport_error_retries(rnd_side_effects, rnd_mocked_logger_calls, mocked_sleep_calls) - - @mock.patch("esrally.time.sleep") - def test_fails_after_too_many_errors(self, mocked_sleep): - def random_transport_error(rnd_resp_code): - exc = elasticsearch.exceptions.TransportError(rnd_resp_code, TransportErrors.err_return_codes[rnd_resp_code]) - exc.status = rnd_resp_code - raise exc - - client = metrics.EsClient(self.ClientMock([{"host": "127.0.0.1", "port": "9243"}])) - rnd_code = random.choice(list(TransportErrors.err_return_codes)) - - with pytest.raises(exceptions.RallyError) as ctx: - client.guarded(random_transport_error, rnd_code) - - assert ctx.value.args[0] == ( - "A transport error occurred while running the operation " - "[random_transport_error] against your Elasticsearch metrics " + "Transport error(s) [unit-test] occurred while running the operation [raise_unknown_error] against your Elasticsearch metrics " "store on host [127.0.0.1] at port [9243]." ) From 808e977c9fca52a2276917bb8c7385a114cfb398 Mon Sep 17 00:00:00 2001 From: Brad Deam Date: Fri, 10 Feb 2023 16:08:38 +1030 Subject: [PATCH 26/68] Refactor client options and target hosts --- esrally/client/factory.py | 6 +- esrally/utils/opts.py | 98 ++++++++++++----------- tests/utils/opts_test.py | 13 ++- tests/utils/resources/target_hosts_1.json | 2 +- 4 files changed, 62 insertions(+), 57 deletions(-) diff --git a/esrally/client/factory.py b/esrally/client/factory.py index d9ac2334b..326caca87 100644 --- a/esrally/client/factory.py +++ b/esrally/client/factory.py @@ -35,7 +35,8 @@ class EsClientFactory: def __init__(self, hosts, client_options, distribution_version=None): # We need to pass a list of connection strings to the client as of elasticsearch-py 8.0 def host_string(host): - protocol = "https" if client_options.get("use_ssl") else "http" + # protocol can be set at either host or client opts level + protocol = "https" if client_options.get("use_ssl") or host.get("use_ssl") else "http" return f"{protocol}://{host['host']}:{host['port']}" self.hosts = [host_string(h) for h in hosts] @@ -156,6 +157,9 @@ def host_string(host): self.max_connections = max(256, self.client_options.pop("max_connections", 0)) self.static_responses = self.client_options.pop("static_responses", None) + if self._is_set(self.client_options, "timeout"): + self.client_options["request_timeout"] = self.client_options.pop("timeout") + @staticmethod def _only_hostnames(hosts): has_ip = False diff --git a/esrally/utils/opts.py b/esrally/utils/opts.py index a6a5792b2..e81213278 100644 --- a/esrally/utils/opts.py +++ b/esrally/utils/opts.py @@ -161,51 +161,6 @@ def all_options(self): return self.parsed_options -def _normalize_hosts(hosts): - # pylint: disable=import-outside-toplevel - from urllib.parse import unquote, urlparse - - string_types = str, bytes - # if hosts are empty, just defer to defaults down the line - if hosts is None: - return [{}] - - # passed in just one string - if isinstance(hosts, string_types): - hosts = [hosts] - - out = [] - # normalize hosts to dicts - for host in hosts: - if isinstance(host, string_types): - if "://" not in host: - host = "//%s" % host - - parsed_url = urlparse(host) - h = {"host": parsed_url.hostname} - - if parsed_url.port: - h["port"] = parsed_url.port - - if parsed_url.scheme == "https": - h["port"] = parsed_url.port or 443 - h["use_ssl"] = True - - if parsed_url.username or parsed_url.password: - h["http_auth"] = "%s:%s" % ( - unquote(parsed_url.username), - unquote(parsed_url.password), - ) - - if parsed_url.path and parsed_url.path != "/": - h["url_prefix"] = parsed_url.path - - out.append(h) - else: - out.append(host) - return out - - class TargetHosts(ConnectOptions): DEFAULT = "default" @@ -216,6 +171,50 @@ def __init__(self, argvalue): self.parse_options() + def _normalize_hosts(self, hosts): + # pylint: disable=import-outside-toplevel + from urllib.parse import unquote, urlparse + + string_types = str, bytes + # if hosts are empty, just defer to defaults down the line + if hosts is None: + return [{}] + + # passed in just one string + if isinstance(hosts, string_types): + hosts = [hosts] + + out = [] + # normalize hosts to dicts + for host in hosts: + if isinstance(host, string_types): + if "://" not in host: + host = "//%s" % host + + parsed_url = urlparse(host) + h = {"host": parsed_url.hostname} + + if parsed_url.port: + h["port"] = parsed_url.port + + if parsed_url.scheme == "https": + h["port"] = parsed_url.port or 443 + h["use_ssl"] = True + + if parsed_url.username or parsed_url.password: + h["http_auth"] = "%s:%s" % ( + unquote(parsed_url.username), + unquote(parsed_url.password), + ) + + if parsed_url.path and parsed_url.path != "/": + h["url_prefix"] = parsed_url.path + + out.append(h) + else: + out.append(host) + return out + def parse_options(self): def normalize_to_dict(arg): """ @@ -223,9 +222,14 @@ def normalize_to_dict(arg): This is needed to support backwards compatible --target-hosts for single clusters that are not defined as a json string or file. """ - return {TargetHosts.DEFAULT: _normalize_hosts(arg)} + return {TargetHosts.DEFAULT: self._normalize_hosts(arg)} + + parsed_options = to_dict(self.argvalue, default_parser=normalize_to_dict) + p_opts_copy = parsed_options.copy() + for cluster_name, nodes in p_opts_copy.items(): + parsed_options[cluster_name] = self._normalize_hosts(nodes) - self.parsed_options = to_dict(self.argvalue, default_parser=normalize_to_dict) + self.parsed_options = parsed_options @property def all_hosts(self): diff --git a/tests/utils/opts_test.py b/tests/utils/opts_test.py index ed9dfbe4a..e3c6a057c 100644 --- a/tests/utils/opts_test.py +++ b/tests/utils/opts_test.py @@ -182,8 +182,6 @@ def test_csv_hosts_parses(self): assert opts.TargetHosts(target_hosts).default == [{"host": "127.0.0.1", "port": 9200}, {"host": "10.17.0.5", "port": 19200}] - assert opts.TargetHosts(target_hosts).default == [{"host": "127.0.0.1", "port": 9200}, {"host": "10.17.0.5", "port": 19200}] - def test_jsonstring_parses_as_dict_of_clusters(self): target_hosts = ( '{"default": ["127.0.0.1:9200","10.17.0.5:19200"],' @@ -192,14 +190,14 @@ def test_jsonstring_parses_as_dict_of_clusters(self): ) assert opts.TargetHosts(target_hosts).all_hosts == { - "default": ["127.0.0.1:9200", "10.17.0.5:19200"], - "remote_1": ["88.33.22.15:19200"], - "remote_2": ["10.18.0.6:19200", "10.18.0.7:19201"], + "default": [{"host": "127.0.0.1", "port": 9200}, {"host": "10.17.0.5", "port": 19200}], + "remote_1": [{"host": "88.33.22.15", "port": 19200}], + "remote_2": [{"host": "10.18.0.6", "port": 19200}, {"host": "10.18.0.7", "port": 19201}], } def test_json_file_parameter_parses(self): assert opts.TargetHosts(os.path.join(os.path.dirname(__file__), "resources", "target_hosts_1.json")).all_hosts == { - "default": ["127.0.0.1:9200", "10.127.0.3:19200"] + "default": [{"host": "127.0.0.1", "port": 9200, "use_ssl": True}, {"host": "10.127.0.3", "port": 19200}] } assert opts.TargetHosts(os.path.join(os.path.dirname(__file__), "resources", "target_hosts_2.json")).all_hosts == { @@ -277,8 +275,7 @@ def test_no_client_option_parses_to_default(self): def test_no_client_option_parses_to_default_with_multicluster(self): client_options_string = opts.ClientOptions.DEFAULT_CLIENT_OPTIONS - target_hosts = opts.TargetHosts('{"default": ["127.0.0.1:9200,10.17.0.5:19200"], "remote": ["88.33.22.15:19200"]}') - + target_hosts = opts.TargetHosts('{"default": ["127.0.0.1:9200", "10.17.0.5:19200"], "remote": ["88.33.22.15:19200"]}') assert opts.ClientOptions(client_options_string, target_hosts=target_hosts).default == {"timeout": 60} assert opts.ClientOptions(client_options_string, target_hosts=target_hosts).all_client_options == { diff --git a/tests/utils/resources/target_hosts_1.json b/tests/utils/resources/target_hosts_1.json index c65ea169e..2dfe79a77 100644 --- a/tests/utils/resources/target_hosts_1.json +++ b/tests/utils/resources/target_hosts_1.json @@ -1 +1 @@ -{ "default": ["127.0.0.1:9200","10.127.0.3:19200"] } +{ "default": ["https://127.0.0.1:9200","10.127.0.3:19200"] } From 7e117b97327d1946d3176c38e9fe39266458ec8c Mon Sep 17 00:00:00 2001 From: Brad Deam Date: Fri, 10 Feb 2023 16:13:03 +1030 Subject: [PATCH 27/68] Refactor client creation to pop 'distro' arg before calling parent class, and perform verification after the target request --- esrally/client/asynchronous.py | 5 +++-- esrally/client/synchronous.py | 24 ++++++++++++------------ 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/esrally/client/asynchronous.py b/esrally/client/asynchronous.py index a891be3d2..9e838c494 100644 --- a/esrally/client/asynchronous.py +++ b/esrally/client/asynchronous.py @@ -280,13 +280,14 @@ def __init__(self, *args, **kwargs): class RallyAsyncElasticsearch(elasticsearch.AsyncElasticsearch, RequestContextHolder): - def __init__(self, distro=None, *args, **kwargs): + def __init__(self, *args, **kwargs): + distro = kwargs.pop("distro", None) super().__init__(*args, **kwargs) # skip verification at this point; we've already verified this earlier with the synchronous client. # The async client is used in the hot code path and we use customized overrides (such as that we don't # parse response bodies in some cases for performance reasons, e.g. when using the bulk API). self._verified_elasticsearch = True - if distro is not None: + if distro: self.distribution_version = versions.Version.from_string(distro) else: self.distribution_version = None diff --git a/esrally/client/synchronous.py b/esrally/client/synchronous.py index 1c7eb1b44..714c596b2 100644 --- a/esrally/client/synchronous.py +++ b/esrally/client/synchronous.py @@ -121,11 +121,12 @@ def check_product(cls, headers, response): class RallySyncElasticsearch(elasticsearch.Elasticsearch): - def __init__(self, distro=None, *args, **kwargs): + def __init__(self, *args, **kwargs): + distro = kwargs.pop("distro", None) super().__init__(*args, **kwargs) self._verified_elasticsearch = None - if distro is not None: + if distro: self.distribution_version = versions.Version.from_string(distro) else: self.distribution_version = None @@ -156,16 +157,6 @@ def perform_request( else: request_headers = self._headers - if self._verified_elasticsearch is None: - info = self.transport.perform_request(method="GET", target="/", headers=request_headers) - info_meta = info.meta - info_body = info.body - - self._verified_elasticsearch = _ProductChecker.check_product(info_meta.headers, info_body) - - if self._verified_elasticsearch is not True: - _ProductChecker.raise_error(self._verified_elasticsearch, info_meta, info_body) - # Converts all parts of a Accept/Content-Type headers # from application/X -> application/vnd.elasticsearch+X # see https://github.com/elastic/elasticsearch/issues/51816 @@ -211,6 +202,15 @@ def perform_request( raise HTTP_EXCEPTIONS.get(meta.status, ApiError)(message=message, meta=meta, body=resp_body) + if self._verified_elasticsearch is None: + info = self.transport.perform_request(method="GET", target="/", headers=request_headers) + info_meta = info.meta + info_body = info.body + self._verified_elasticsearch = _ProductChecker.check_product(info_meta.headers, info_body) + + if self._verified_elasticsearch is not True: + _ProductChecker.raise_error(self._verified_elasticsearch, info_meta, info_body) + # 'Warning' headers should be reraised as 'ElasticsearchWarning' if "warning" in meta.headers: warning_header = (meta.headers.get("warning") or "").strip() From 86aed7818939065d8c58e526cbc6c8db7127139f Mon Sep 17 00:00:00 2001 From: Brad Deam Date: Fri, 10 Feb 2023 16:13:42 +1030 Subject: [PATCH 28/68] Disable pylint on client version test --- tests/client/common_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/client/common_test.py b/tests/client/common_test.py index 4b2d003b4..dac449c60 100644 --- a/tests/client/common_test.py +++ b/tests/client/common_test.py @@ -18,6 +18,7 @@ from esrally.client import common +# pylint: disable=protected-access def test_client_major_version_to_str(): version = (8, 2, 0) assert common._client_major_version_to_str(version) == "8" From 4ba96508947197543ea3120171756e82311e36b1 Mon Sep 17 00:00:00 2001 From: Brad Deam Date: Mon, 13 Feb 2023 11:05:48 +1030 Subject: [PATCH 29/68] Convert create snapshot repo runner to kwargs --- esrally/driver/runner.py | 3 +-- tests/driver/runner_test.py | 4 +--- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/esrally/driver/runner.py b/esrally/driver/runner.py index 7a40a7cb9..acede017e 100644 --- a/esrally/driver/runner.py +++ b/esrally/driver/runner.py @@ -1963,7 +1963,7 @@ class CreateSnapshotRepository(Runner): async def __call__(self, es, params): request_params = params.get("request-params", {}) await es.snapshot.create_repository( - repository=mandatory(params, "repository", repr(self)), body=mandatory(params, "body", repr(self)), params=request_params + name=mandatory(params, "repository", repr(self)), body=mandatory(params, "body", repr(self)), params=request_params ) def __repr__(self, *args, **kwargs): @@ -2699,7 +2699,6 @@ class Downsample(Runner): """ async def __call__(self, es, params): - request_params, request_headers = self._transport_request_params(params) fixed_interval = mandatory(params, "fixed-interval", self) diff --git a/tests/driver/runner_test.py b/tests/driver/runner_test.py index 6e84f6040..eebc97751 100644 --- a/tests/driver/runner_test.py +++ b/tests/driver/runner_test.py @@ -3765,7 +3765,7 @@ async def test_create_snapshot_repository(self, es): await r(es, params) es.snapshot.create_repository.assert_called_once_with( - repository="backups", body={"type": "fs", "settings": {"location": "/var/backups"}}, params={} + name="backups", body={"type": "fs", "settings": {"location": "/var/backups"}}, params={} ) @@ -5101,7 +5101,6 @@ async def test_transform_stats_with_non_existing_path(self, es): class TestCreateIlmPolicyRunner: - params = { "policy-name": "my-ilm-policy", "request-params": {"master_timeout": "30s", "timeout": "30s"}, @@ -5147,7 +5146,6 @@ async def test_create_ilm_policy_without_request_params(self, es): class TestDeleteIlmPolicyRunner: - params = {"policy-name": "my-ilm-policy", "request-params": {"master_timeout": "30s", "timeout": "30s"}} @mock.patch("elasticsearch.Elasticsearch") From 568f481293c4cd6aee7cb64efa846c72a4600144 Mon Sep 17 00:00:00 2001 From: Brad Deam Date: Mon, 13 Feb 2023 16:18:47 +1030 Subject: [PATCH 30/68] Remove 'ClientResponse' class override because 'RallyAiohttpHttpNode' always calls 'read()' over 'text()' --- esrally/client/asynchronous.py | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/esrally/client/asynchronous.py b/esrally/client/asynchronous.py index 9e838c494..43ef9c791 100644 --- a/esrally/client/asynchronous.py +++ b/esrally/client/asynchronous.py @@ -130,19 +130,6 @@ async def read(self): return self.static_body -class RawClientResponse(aiohttp.ClientResponse): - """ - Returns the body as bytes object (instead of a str) to avoid decoding overhead. - """ - - async def text(self, encoding=None, errors="strict"): - """Read response payload and decode.""" - if self._body is None: - await self.read() - - return self._body - - class ResponseMatcher: def __init__(self, responses): self.logger = logging.getLogger(__name__) @@ -212,6 +199,8 @@ def __init__(self, config): self._trace_configs = None self._enable_cleanup_closed = None self._static_responses = None + self._request_class = aiohttp.ClientRequest + self._response_class = aiohttp.ClientResponse @property def trace_configs(self): @@ -244,9 +233,6 @@ def static_responses(self, static_responses): self._request_class = StaticRequest self._response_class = StaticResponse - else: - self._request_class = aiohttp.ClientRequest - self._response_class = RawClientResponse def _create_aiohttp_session(self): if self._loop is None: From 71d536be962ac3070d7371555eee6fc18bb2c70a Mon Sep 17 00:00:00 2001 From: Brad Deam Date: Mon, 13 Feb 2023 17:07:11 +1030 Subject: [PATCH 31/68] Refactor 'static_responses' to always return encoded responses --- docs/command_line_reference.rst | 29 ++++++++++++----------------- esrally/client/asynchronous.py | 14 ++------------ 2 files changed, 14 insertions(+), 29 deletions(-) diff --git a/docs/command_line_reference.rst b/docs/command_line_reference.rst index 4e96a1abb..b4595de87 100644 --- a/docs/command_line_reference.rst +++ b/docs/command_line_reference.rst @@ -765,34 +765,31 @@ Define a JSON file containing a list of objects with the following properties: * ``path``: A path or path pattern that should be matched. Only leading and trailing wildcards (``*``) are supported. A path containing only a wildcard acts matches any path. * ``body``: The respective response body. -* ``body-encoding``: Either ``raw`` or ``json``. Use ``json`` by default and ``raw`` for the operation-type ``bulk`` and ``search``. Here we define the necessary responses for a track that bulk-indexes data:: [ + { + "path": "/_cluster/settings", + "body": { + "transient": { + "action.destructive_requires_name": "true" + } + } + }, { "path": "*/_bulk", "body": { "errors": false, "took": 1 - }, - "body-encoding": "raw" + } }, { "path": "/_cluster/health*", "body": { "status": "green", "relocating_shards": 0 - }, - "body-encoding": "json" - }, - { - "path": "/_cluster/settings", - "body": { - "persistent": {}, - "transient": {} - }, - "body-encoding": "json" + } }, { "path": "/_all/_stats/_all", @@ -804,13 +801,11 @@ Here we define the necessary responses for a track that bulk-indexes data:: } } } - }, - "body-encoding": "json" + } }, { "path": "*", - "body": {}, - "body-encoding": "json" + "body": {} } ] diff --git a/esrally/client/asynchronous.py b/esrally/client/asynchronous.py index 43ef9c791..02fdc5ebd 100644 --- a/esrally/client/asynchronous.py +++ b/esrally/client/asynchronous.py @@ -123,11 +123,8 @@ async def start(self, connection: "Connection") -> "ClientResponse": self.status = 200 return self - async def text(self, encoding=None, errors="strict"): - return self.static_body - async def read(self): - return self.static_body + return self.static_body.encode("utf-8") class ResponseMatcher: @@ -146,14 +143,7 @@ def __init__(self, responses): else: matcher = ResponseMatcher.equals(path) - body = response["body"] - body_encoding = response.get("body-encoding", "json") - if body_encoding == "raw": - body = json.dumps(body).encode("utf-8") - elif body_encoding == "json": - body = json.dumps(body) - else: - raise ValueError(f"Unknown body encoding [{body_encoding}] for path [{path}]") + body = json.dumps(response["body"]) self.responses.append((path, matcher, body)) From 8a6aab652e86c91e1fc32fb66d5e3c96d054de82 Mon Sep 17 00:00:00 2001 From: Brad Deam Date: Mon, 13 Feb 2023 17:08:59 +1030 Subject: [PATCH 32/68] Run isort on common.py --- esrally/client/common.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/esrally/client/common.py b/esrally/client/common.py index 96082afeb..9620c2833 100644 --- a/esrally/client/common.py +++ b/esrally/client/common.py @@ -1,8 +1,9 @@ import re from datetime import date, datetime from typing import Any, Mapping -from elasticsearch import VERSION + from elastic_transport.client_utils import percent_encode +from elasticsearch import VERSION def _client_major_version_to_str(version: tuple) -> str: From b62d79362ee713f587ffee618234ac2475d10806 Mon Sep 17 00:00:00 2001 From: Brad Deam Date: Tue, 14 Feb 2023 14:02:21 +1030 Subject: [PATCH 33/68] Make ILM client BWC --- esrally/client/asynchronous.py | 17 ++++++++++++ esrally/driver/runner.py | 24 +++++++++++++++-- tests/driver/runner_test.py | 49 +++++++++++++++++++++++++++++++--- 3 files changed, 84 insertions(+), 6 deletions(-) diff --git a/esrally/client/asynchronous.py b/esrally/client/asynchronous.py index 02fdc5ebd..66c7a18e3 100644 --- a/esrally/client/asynchronous.py +++ b/esrally/client/asynchronous.py @@ -36,6 +36,7 @@ TextApiResponse, ) from elastic_transport.client_utils import DEFAULT +from elasticsearch._async.client import IlmClient from elasticsearch.compat import warn_stacklevel from elasticsearch.exceptions import HTTP_EXCEPTIONS, ApiError, ElasticsearchWarning from multidict import CIMultiDict, CIMultiDictProxy @@ -255,6 +256,17 @@ def __init__(self, *args, **kwargs): super().__init__(*args, node_class=RallyAiohttpHttpNode, **kwargs) +class RallyIlmClient(IlmClient): + async def put_lifecycle(self, *args, **kwargs): + if args: + kwargs["name"] = args[0] + + if body := kwargs.pop("body", None): + kwargs["policy"] = body.get("policy", {}) + # pylint: disable=missing-kwoa + return await IlmClient.put_lifecycle(self, **kwargs) + + class RallyAsyncElasticsearch(elasticsearch.AsyncElasticsearch, RequestContextHolder): def __init__(self, *args, **kwargs): distro = kwargs.pop("distro", None) @@ -268,6 +280,11 @@ def __init__(self, *args, **kwargs): else: self.distribution_version = None + # some ILM method signatures changed in 'elasticsearch-py' 8.x, + # so we override method(s) here to provide BWC for any custom + # runners that aren't using the new kwargs + self.ilm = RallyIlmClient(self) + async def perform_request( self, method: str, diff --git a/esrally/driver/runner.py b/esrally/driver/runner.py index acede017e..14a17aaaa 100644 --- a/esrally/driver/runner.py +++ b/esrally/driver/runner.py @@ -2625,8 +2625,21 @@ class CreateIlmPolicy(Runner): async def __call__(self, es, params): policy_name = mandatory(params, "policy-name", self) body = mandatory(params, "body", self) + policy = mandatory(body, "policy", self) request_params = params.get("request-params", {}) - await es.ilm.put_lifecycle(policy=policy_name, body=body, params=request_params) + error_trace = request_params.get("error_trace", None) + filter_path = request_params.get("filter_path", None) + master_timeout = request_params.get("master_timeout", None) + timeout = request_params.get("timeout", None) + + await es.ilm.put_lifecycle( + name=policy_name, + policy=policy, + error_trace=error_trace, + filter_path=filter_path, + master_timeout=master_timeout, + timeout=timeout, + ) return { "weight": 1, "unit": "ops", @@ -2646,7 +2659,14 @@ class DeleteIlmPolicy(Runner): async def __call__(self, es, params): policy_name = mandatory(params, "policy-name", self) request_params = params.get("request-params", {}) - await es.ilm.delete_lifecycle(policy=policy_name, params=request_params) + error_trace = request_params.get("error_trace", None) + filter_path = request_params.get("filter_path", None) + master_timeout = request_params.get("master_timeout", None) + timeout = request_params.get("timeout", None) + + await es.ilm.delete_lifecycle( + name=policy_name, error_trace=error_trace, filter_path=filter_path, master_timeout=master_timeout, timeout=timeout + ) return { "weight": 1, "unit": "ops", diff --git a/tests/driver/runner_test.py b/tests/driver/runner_test.py index eebc97751..5b284dddb 100644 --- a/tests/driver/runner_test.py +++ b/tests/driver/runner_test.py @@ -29,6 +29,7 @@ import pytest from esrally import client, exceptions +from esrally.client.asynchronous import RallyAsyncElasticsearch from esrally.driver import runner @@ -5125,7 +5126,12 @@ async def test_create_ilm_policy_with_request_params(self, es): } es.ilm.put_lifecycle.assert_awaited_once_with( - policy=self.params["policy-name"], body=self.params["body"], params=self.params["request-params"] + name=self.params["policy-name"], + policy=self.params["body"]["policy"], + master_timeout=self.params["request-params"].get("master_timeout"), + timeout=self.params["request-params"].get("timeout"), + error_trace=None, + filter_path=None, ) @mock.patch("elasticsearch.Elasticsearch") @@ -5142,7 +5148,30 @@ async def test_create_ilm_policy_without_request_params(self, es): "success": True, } - es.ilm.put_lifecycle.assert_awaited_once_with(policy=params["policy-name"], body=params["body"], params={}) + es.ilm.put_lifecycle.assert_awaited_once_with( + name=params["policy-name"], + policy=self.params["body"]["policy"], + master_timeout=None, + timeout=None, + error_trace=None, + filter_path=None, + ) + + @mock.patch("esrally.client.asynchronous.IlmClient") + @pytest.mark.asyncio + async def test_asyncRallyIlmClient_rewrites_kwargs(self, es_ilm): + es = RallyAsyncElasticsearch(hosts=["http://localhost:9200"]) + es_ilm.put_lifecycle = mock.AsyncMock(return_value={}) + + # simulating a custom runner that hasn't been refactored + # to suit the new 'elasticsearch-py' 8.x kwarg only method signature + await es.ilm.put_lifecycle("test-name", body=self.params["body"]) + + es_ilm.put_lifecycle.assert_awaited_once_with( + es.ilm, + policy=self.params["body"]["policy"], + name="test-name", + ) class TestDeleteIlmPolicyRunner: @@ -5160,7 +5189,13 @@ async def test_delete_ilm_policy_with_request_params(self, es): "success": True, } - es.ilm.delete_lifecycle.assert_awaited_once_with(policy=self.params["policy-name"], params=self.params["request-params"]) + es.ilm.delete_lifecycle.assert_awaited_once_with( + name=self.params["policy-name"], + master_timeout=self.params["request-params"].get("master_timeout"), + timeout=self.params["request-params"].get("timeout"), + error_trace=None, + filter_path=None, + ) @mock.patch("elasticsearch.Elasticsearch") @pytest.mark.asyncio @@ -5176,7 +5211,13 @@ async def test_delete_ilm_policy_without_request_params(self, es): "success": True, } - es.ilm.delete_lifecycle.assert_awaited_once_with(policy=params["policy-name"], params={}) + es.ilm.delete_lifecycle.assert_awaited_once_with( + name=self.params["policy-name"], + master_timeout=None, + timeout=None, + error_trace=None, + filter_path=None, + ) class TestSqlRunner: From 54fe81470db6a8163b77ee0d52406ca573342b3a Mon Sep 17 00:00:00 2001 From: Brad Deam Date: Tue, 14 Feb 2023 15:38:50 +1030 Subject: [PATCH 34/68] Add new 'elastic_transport' logger to configuration file --- docs/migrate.rst | 4 +++ esrally/log.py | 40 +++++++++++++++++++++++ esrally/resources/logging.json | 5 +++ tests/log_test.py | 60 ++++++++++++++++++++++++++++++++++ 4 files changed, 109 insertions(+) create mode 100644 tests/log_test.py diff --git a/docs/migrate.rst b/docs/migrate.rst index c33322a20..24742fbbd 100644 --- a/docs/migrate.rst +++ b/docs/migrate.rst @@ -4,6 +4,10 @@ Migration Guide Migrating to Rally 2.7.1 ------------------------ +Elasticsearch client logs are now captured by the ``elastic_transport`` logger +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +The 8.x version of ``elasticsearch-py`` client version Rally uses has moved the underlying network requests into a new module (and subsequently a new logger) named ``elastic_transport``. New installations of Rally will automatically be configured, and if you are upgrading in-place (i.e. there is a pre-existing ``logging.json``), Rally will automatically add this new logger to the configuration file. + Snapshot repository plugins are no longer built from source ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/esrally/log.py b/esrally/log.py index 0bc8409e4..cf08dafc6 100644 --- a/esrally/log.py +++ b/esrally/log.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. +import copy import json import logging import logging.config @@ -43,6 +44,44 @@ def log_config_path(): return os.path.join(paths.rally_confdir(), "logging.json") +def add_missing_loggers_to_config(): + """ + Ensures that any missing top level loggers in resources/logging.json are + appended to an existing log configuration + """ + logger = logging.getLogger(__name__) + + def missing_keys(source, target): + """ + Returns any top-level dicts present in 'source', but not in 'target' + :return: A dict of all keys present in 'source', but not in 'target' + """ + missing_keys = {} + for k in source: + if k in source and k in target: + continue + else: + missing_keys[k] = source[k] + return missing_keys + + source_path = io.normalize_path(os.path.join(os.path.dirname(__file__), "resources", "logging.json")) + with open(log_config_path(), "r+", encoding="UTF-8") as target: + with open(source_path, "r+", encoding="UTF-8") as src: + template = json.load(src) + existing_logging_config = json.load(target) + existing_logging_config_copy = copy.deepcopy(existing_logging_config) + if missing_loggers := missing_keys(source=template["loggers"], target=existing_logging_config_copy["loggers"]): + logger.info( + "Found loggers [%s] in source template that weren't present in the existing configuration, adding them.", + str(missing_loggers), + ) + existing_logging_config["loggers"].update(missing_loggers) + updated_config = json.dumps(existing_logging_config, indent=2) + target.seek(0) + target.write(updated_config) + target.truncate() + + def install_default_log_config(): """ Ensures a log configuration file is present on this machine. The default @@ -63,6 +102,7 @@ def install_default_log_config(): log_path = io.escape_path(log_path) contents = src.read().replace("${LOG_PATH}", log_path) target.write(contents) + add_missing_loggers_to_config() io.ensure_dir(paths.logs()) diff --git a/esrally/resources/logging.json b/esrally/resources/logging.json index b58721060..ba559e235 100644 --- a/esrally/resources/logging.json +++ b/esrally/resources/logging.json @@ -47,6 +47,11 @@ "handlers": ["rally_profile_handler"], "level": "INFO", "propagate": false + }, + "elastic_transport": { + "handlers": ["rally_log_handler"], + "level": "WARNING", + "propagate": false } } } diff --git a/tests/log_test.py b/tests/log_test.py new file mode 100644 index 000000000..1b854af11 --- /dev/null +++ b/tests/log_test.py @@ -0,0 +1,60 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from unittest import mock + +import pytest +import os +import copy + +from esrally import log +import json +from unittest.mock import patch, mock_open + + +class TestLog: + def _load_logging_template(self, p): + with open(p) as f: + return json.load(f) + + @pytest.fixture(autouse=True) + def set_configuration(self): + p = os.path.join(os.path.join(os.path.dirname(__file__), "..", "esrally", "resources", "logging.json")) + self.configuration = self._load_logging_template(p) + + @mock.patch("json.load") + def test_add_missing_loggers_to_config_missing(self, mock_json_load, caplog): + source_template = copy.deepcopy(self.configuration) + existing_configuration = copy.deepcopy(self.configuration) + # change existing to differ from source template, showing that we don't overwrite any existing loggers config + existing_configuration["loggers"]["rally.profile"]["level"] = "DEBUG" + expected_configuration = json.dumps(copy.deepcopy(existing_configuration), indent=2) + # simulate user missing 'elastic_transport' in logging.json + del existing_configuration["loggers"]["elastic_transport"] + + # first loads template, then existing configuration + mock_json_load.side_effect = [source_template, existing_configuration] + + with patch("builtins.open", mock_open()) as mock_file: + log.add_missing_loggers_to_config() + handle = mock_file() + handle.write.assert_called_once_with(expected_configuration) + + assert ( + "Found loggers [{'elastic_transport': {'handlers': ['rally_log_handler'], 'level': 'WARNING', 'propagate': " + "False}}] in source template that weren't present in the existing configuration, adding them." in caplog.text + ) From c85a5ace7ddce84ae56def87beafff6361875138 Mon Sep 17 00:00:00 2001 From: Brad Deam Date: Tue, 14 Feb 2023 15:47:29 +1030 Subject: [PATCH 35/68] Remove 'trace_configs' getters and setters --- esrally/client/asynchronous.py | 12 ++---------- esrally/client/factory.py | 2 +- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/esrally/client/asynchronous.py b/esrally/client/asynchronous.py index 66c7a18e3..3258e340d 100644 --- a/esrally/client/asynchronous.py +++ b/esrally/client/asynchronous.py @@ -187,20 +187,12 @@ class RallyAiohttpHttpNode(elastic_transport.AiohttpHttpNode): def __init__(self, config): super().__init__(config) self._loop = None - self._trace_configs = None + self.trace_configs = None self._enable_cleanup_closed = None self._static_responses = None self._request_class = aiohttp.ClientRequest self._response_class = aiohttp.ClientResponse - @property - def trace_configs(self): - return self._trace_configs - - @trace_configs.setter - def trace_configs(self, trace_configs): - self._trace_configs = [trace_configs] - @property def enable_cleanup_closed(self): return self._enable_cleanup_closed @@ -247,7 +239,7 @@ def _create_aiohttp_session(self): request_class=self._request_class, response_class=self._response_class, connector=connector, - trace_configs=self._trace_configs, + trace_configs=self.trace_configs, ) diff --git a/esrally/client/factory.py b/esrally/client/factory.py index 326caca87..7fa8cfdd5 100644 --- a/esrally/client/factory.py +++ b/esrally/client/factory.py @@ -242,7 +242,7 @@ async def on_request_end(session, trace_config_ctx, params): # the AsyncElasticsearch constructor automatically creates the corresponding NodeConfig objects, so we set # their instance attributes after they've been instantiated for node_connection in async_client.transport.node_pool.all(): - node_connection.trace_configs = trace_config + node_connection.trace_configs = [trace_config] node_connection.enable_cleanup_closed = self.enable_cleanup_closed node_connection.static_responses = self.static_responses From b99b77308894e0d28c9b824bc37ed94d62f041bd Mon Sep 17 00:00:00 2001 From: Brad Deam Date: Tue, 14 Feb 2023 15:52:14 +1030 Subject: [PATCH 36/68] Remove 'enable_cleanup_closed' getters and setters --- esrally/client/asynchronous.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/esrally/client/asynchronous.py b/esrally/client/asynchronous.py index 3258e340d..7be2678a9 100644 --- a/esrally/client/asynchronous.py +++ b/esrally/client/asynchronous.py @@ -188,19 +188,11 @@ def __init__(self, config): super().__init__(config) self._loop = None self.trace_configs = None - self._enable_cleanup_closed = None + self.enable_cleanup_closed = None self._static_responses = None self._request_class = aiohttp.ClientRequest self._response_class = aiohttp.ClientResponse - @property - def enable_cleanup_closed(self): - return self._enable_cleanup_closed - - @enable_cleanup_closed.setter - def enable_cleanup_closed(self, enable_cleanup_closed): - self._enable_cleanup_closed = enable_cleanup_closed - @property def static_responses(self): return self._static_responses @@ -222,13 +214,13 @@ def _create_aiohttp_session(self): self._loop = asyncio.get_running_loop() if self._static_responses: - connector = StaticConnector(limit_per_host=self._connections_per_node, enable_cleanup_closed=self._enable_cleanup_closed) + connector = StaticConnector(limit_per_host=self._connections_per_node, enable_cleanup_closed=self.enable_cleanup_closed) else: connector = aiohttp.TCPConnector( limit_per_host=self._connections_per_node, use_dns_cache=True, ssl=self._ssl_context, - enable_cleanup_closed=self._enable_cleanup_closed, + enable_cleanup_closed=self.enable_cleanup_closed, ) self.session = aiohttp.ClientSession( From d1a911fec02dd4b6d48dce8ccb4bb076630f0e2f Mon Sep 17 00:00:00 2001 From: Brad Deam Date: Tue, 14 Feb 2023 15:54:45 +1030 Subject: [PATCH 37/68] Simplify '_client_major_version_to_str' --- esrally/client/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esrally/client/common.py b/esrally/client/common.py index 9620c2833..7702fb4de 100644 --- a/esrally/client/common.py +++ b/esrally/client/common.py @@ -7,7 +7,7 @@ def _client_major_version_to_str(version: tuple) -> str: - return ".".join(map(str, version)).partition(".")[0] + return str(version[0]) _WARNING_RE = re.compile(r"\"([^\"]*)\"") From 2d3270ca898e0d0101e4020535c84b86941f5207 Mon Sep 17 00:00:00 2001 From: Brad Deam Date: Tue, 14 Feb 2023 15:58:05 +1030 Subject: [PATCH 38/68] Remove 'host_string' 'elasticsearch-py' comment --- esrally/client/factory.py | 1 - 1 file changed, 1 deletion(-) diff --git a/esrally/client/factory.py b/esrally/client/factory.py index 7fa8cfdd5..03548ebb7 100644 --- a/esrally/client/factory.py +++ b/esrally/client/factory.py @@ -33,7 +33,6 @@ class EsClientFactory: """ def __init__(self, hosts, client_options, distribution_version=None): - # We need to pass a list of connection strings to the client as of elasticsearch-py 8.0 def host_string(host): # protocol can be set at either host or client opts level protocol = "https" if client_options.get("use_ssl") or host.get("use_ssl") else "http" From 32b5f9a33e2edca399f45dd3363ddf51f4c8fb23 Mon Sep 17 00:00:00 2001 From: Brad Deam Date: Wed, 15 Feb 2023 17:30:40 +1030 Subject: [PATCH 39/68] Make EQL custom runner BWC --- esrally/client/asynchronous.py | 36 +++++++++++++++++++++++++++++++++- tests/driver/runner_test.py | 32 ++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 1 deletion(-) diff --git a/esrally/client/asynchronous.py b/esrally/client/asynchronous.py index 7be2678a9..9917a1c1a 100644 --- a/esrally/client/asynchronous.py +++ b/esrally/client/asynchronous.py @@ -36,7 +36,7 @@ TextApiResponse, ) from elastic_transport.client_utils import DEFAULT -from elasticsearch._async.client import IlmClient +from elasticsearch._async.client import EqlClient, IlmClient from elasticsearch.compat import warn_stacklevel from elasticsearch.exceptions import HTTP_EXCEPTIONS, ApiError, ElasticsearchWarning from multidict import CIMultiDict, CIMultiDictProxy @@ -251,6 +251,39 @@ async def put_lifecycle(self, *args, **kwargs): return await IlmClient.put_lifecycle(self, **kwargs) +class RallyEqlClient(EqlClient): + async def search(self, *args, **kwargs): + if args: + kwargs["index"] = args[0] + + if body := kwargs.pop("body", None): + params = [ + "query", + "allow_no_indices", + "case_sensitive", + "event_category_field", + "expand_wildcards", + "fetch_size", + "fields", + "filter", + "ignore_unavailable", + "keep_alive", + "keep_on_completion", + "result_position", + "runtime_mappings", + "size", + "tiebreaker_field", + "timestamp_field", + "wait_for_completion_timeout", + ] + + for p in params: + if param := body.get(p): + kwargs[p] = param + + return await EqlClient.search(self, **kwargs) + + class RallyAsyncElasticsearch(elasticsearch.AsyncElasticsearch, RequestContextHolder): def __init__(self, *args, **kwargs): distro = kwargs.pop("distro", None) @@ -268,6 +301,7 @@ def __init__(self, *args, **kwargs): # so we override method(s) here to provide BWC for any custom # runners that aren't using the new kwargs self.ilm = RallyIlmClient(self) + self.eql = RallyEqlClient(self) async def perform_request( self, diff --git a/tests/driver/runner_test.py b/tests/driver/runner_test.py index 5b284dddb..ac9a8d1f4 100644 --- a/tests/driver/runner_test.py +++ b/tests/driver/runner_test.py @@ -7030,3 +7030,35 @@ async def test_field_caps_with_index_filter(self, es): expected_body = {"index_filter": index_filter} es.field_caps.assert_awaited_once_with(index="_all", fields="time-*", body=expected_body, params=None) + + +class TestEqlRunnerOverride: + params = { + "cluster": "my-cluster", + "index": "my-index", + "request-timeout": 3600, + "body": { + "query": "sequence by source.ip, destination.ip with maxspan=5m [process where true] [process where true] [process where true] [network where true] |head 100 | tail 50", + "fetch_size": 1000, + "size": 100, + }, + } + + @mock.patch("esrally.client.asynchronous.EqlClient") + @pytest.mark.asyncio + async def test_RallyEqlClient_rewrites_kwargs(self, es_eql): + es = RallyAsyncElasticsearch(hosts=["http://localhost:9200"]) + es_eql.search = mock.AsyncMock(return_value={}) + index = f"{self.params['cluster']}:{self.params['index']}" + # simulating a custom runner that hasn't been refactored + # to suit the new 'elasticsearch-py' 8.x kwarg only method signature + await es.eql.search(index=index, body=self.params["body"], request_timeout=self.params["request-timeout"]) + + es_eql.search.assert_awaited_once_with( + es.eql, + index=index, + query=self.params["body"]["query"], + fetch_size=self.params["body"]["fetch_size"], + size=self.params["body"]["size"], + request_timeout=self.params["request-timeout"], + ) From e72a13bd2199da1f5306cd5d85822efa2c46bafa Mon Sep 17 00:00:00 2001 From: Brad Deam Date: Wed, 15 Feb 2023 17:32:45 +1030 Subject: [PATCH 40/68] Linting and spelling --- esrally/client/asynchronous.py | 2 +- tests/driver/runner_test.py | 5 +++-- tests/log_test.py | 8 ++++---- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/esrally/client/asynchronous.py b/esrally/client/asynchronous.py index 9917a1c1a..a01c3c044 100644 --- a/esrally/client/asynchronous.py +++ b/esrally/client/asynchronous.py @@ -280,7 +280,7 @@ async def search(self, *args, **kwargs): for p in params: if param := body.get(p): kwargs[p] = param - + # pylint: disable=missing-kwoa return await EqlClient.search(self, **kwargs) diff --git a/tests/driver/runner_test.py b/tests/driver/runner_test.py index ac9a8d1f4..198e30880 100644 --- a/tests/driver/runner_test.py +++ b/tests/driver/runner_test.py @@ -5159,7 +5159,7 @@ async def test_create_ilm_policy_without_request_params(self, es): @mock.patch("esrally.client.asynchronous.IlmClient") @pytest.mark.asyncio - async def test_asyncRallyIlmClient_rewrites_kwargs(self, es_ilm): + async def test_RallyIlmClient_rewrites_kwargs(self, es_ilm): es = RallyAsyncElasticsearch(hosts=["http://localhost:9200"]) es_ilm.put_lifecycle = mock.AsyncMock(return_value={}) @@ -7038,7 +7038,8 @@ class TestEqlRunnerOverride: "index": "my-index", "request-timeout": 3600, "body": { - "query": "sequence by source.ip, destination.ip with maxspan=5m [process where true] [process where true] [process where true] [network where true] |head 100 | tail 50", + "query": "sequence by source.ip, destination.ip with maxspan=5m [process where true] [process where true] " + "[process where true] [network where true] |head 100 | tail 50", "fetch_size": 1000, "size": 100, }, diff --git a/tests/log_test.py b/tests/log_test.py index 1b854af11..f70d0712f 100644 --- a/tests/log_test.py +++ b/tests/log_test.py @@ -15,15 +15,15 @@ # specific language governing permissions and limitations # under the License. +import copy +import json +import os from unittest import mock +from unittest.mock import mock_open, patch import pytest -import os -import copy from esrally import log -import json -from unittest.mock import patch, mock_open class TestLog: From 071d8869cfa76b75bcd30b1856342e1656f2424a Mon Sep 17 00:00:00 2001 From: Brad Deam Date: Thu, 16 Feb 2023 11:34:11 +1030 Subject: [PATCH 41/68] Bump 'elasticsearch[async]' to 8.6.1 and 'elastic-transport' to 8.4.0 --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 9006ca1fd..cae50b529 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,8 +43,8 @@ dependencies = [ # urllib3: MIT # aiohttp: Apache 2.0 - "elasticsearch[async]==8.2.0", - "elastic-transport==8.1.2", + "elasticsearch[async]==8.6.1", + "elastic-transport==8.4.0", "urllib3==1.26.9", "docker==6.0.0", # License: BSD From 3c665a9dd900798a7277624a1d5c4eeb8dc16886 Mon Sep 17 00:00:00 2001 From: Brad Deam Date: Thu, 16 Feb 2023 11:35:32 +1030 Subject: [PATCH 42/68] Make searchable snapshots client backwards compatible --- esrally/client/asynchronous.py | 40 +++++++++++++-- tests/driver/runner_test.py | 91 ++++++++++++++++++++++++++++++++++ 2 files changed, 128 insertions(+), 3 deletions(-) diff --git a/esrally/client/asynchronous.py b/esrally/client/asynchronous.py index a01c3c044..0fc09a4e5 100644 --- a/esrally/client/asynchronous.py +++ b/esrally/client/asynchronous.py @@ -36,7 +36,7 @@ TextApiResponse, ) from elastic_transport.client_utils import DEFAULT -from elasticsearch._async.client import EqlClient, IlmClient +from elasticsearch._async.client import EqlClient, IlmClient, SearchableSnapshotsClient from elasticsearch.compat import warn_stacklevel from elasticsearch.exceptions import HTTP_EXCEPTIONS, ApiError, ElasticsearchWarning from multidict import CIMultiDict, CIMultiDictProxy @@ -284,6 +284,39 @@ async def search(self, *args, **kwargs): return await EqlClient.search(self, **kwargs) +class RallySearchableSnapshotsClient(SearchableSnapshotsClient): + async def mount(self, *args, **kwargs): + old_params = ["repository", "snapshot", "body", "master_timeout", "storage", "wait_for_completion"] + + if args: + for i, arg in enumerate(args): + kwargs[old_params[i]] = arg + + if body := kwargs.pop("body", None): + params = [ + "repository", + "snapshot", + "index", + "error_trace", + "filter_path", + "human", + "ignore_index_settings", + "index_settings", + "master_timeout", + "pretty", + "renamed_index", + "storage", + "wait_for_completion", + ] + + for p in params: + if param := body.get(p): + kwargs[p] = param + + # pylint: disable=missing-kwoa + return await SearchableSnapshotsClient.mount(self, **kwargs) + + class RallyAsyncElasticsearch(elasticsearch.AsyncElasticsearch, RequestContextHolder): def __init__(self, *args, **kwargs): distro = kwargs.pop("distro", None) @@ -297,11 +330,12 @@ def __init__(self, *args, **kwargs): else: self.distribution_version = None - # some ILM method signatures changed in 'elasticsearch-py' 8.x, + # method signatures changed in 'elasticsearch-py' 8.x, # so we override method(s) here to provide BWC for any custom - # runners that aren't using the new kwargs + # runners (i.e. in rally-tracks) that aren't using the new kwargs self.ilm = RallyIlmClient(self) self.eql = RallyEqlClient(self) + self.searchable_snapshots = RallySearchableSnapshotsClient(self) async def perform_request( self, diff --git a/tests/driver/runner_test.py b/tests/driver/runner_test.py index 198e30880..a8833cf69 100644 --- a/tests/driver/runner_test.py +++ b/tests/driver/runner_test.py @@ -7063,3 +7063,94 @@ async def test_RallyEqlClient_rewrites_kwargs(self, es_eql): size=self.params["body"]["size"], request_timeout=self.params["request-timeout"], ) + + +class TestSearchableSnapshotsOverride: + params = { + "repository": "my-repository", + "snapshot": "my-snapshot", + "master_timeout": 1, + "body": {"index": "my-index", "renamed_index": "my-renamed-index", "ignore_index_settings": True}, + "storage": "full_copy", + "wait_for_completion": True, + } + + @mock.patch("esrally.client.asynchronous.SearchableSnapshotsClient") + @pytest.mark.asyncio + async def test_RallySearchableSnapshotsClient_rewrites_kwargs(self, es_searchable_snapshots): + es = RallyAsyncElasticsearch(hosts=["http://localhost:9200"]) + es_searchable_snapshots.mount = mock.AsyncMock(return_value={}) + # simulating a custom runner that hasn't been refactored + # to suit the new 'elasticsearch-py' 8.x kwarg only method signature + await es.searchable_snapshots.mount( + repository=self.params["repository"], + snapshot=self.params["snapshot"], + body=self.params["body"], + storage=self.params["storage"], + wait_for_completion=self.params["wait_for_completion"], + ) + + es_searchable_snapshots.mount.assert_awaited_once_with( + es.searchable_snapshots, + repository=self.params["repository"], + snapshot=self.params["snapshot"], + index=self.params["body"]["index"], + renamed_index=self.params["body"]["renamed_index"], + ignore_index_settings=self.params["body"]["ignore_index_settings"], + storage=self.params["storage"], + wait_for_completion=self.params["wait_for_completion"], + ) + + @mock.patch("esrally.client.asynchronous.SearchableSnapshotsClient") + @pytest.mark.asyncio + async def test_RallySearchableSnapshotsClient_rewrites_args(self, es_searchable_snapshots): + es = RallyAsyncElasticsearch(hosts=["http://localhost:9200"]) + es_searchable_snapshots.mount = mock.AsyncMock(return_value={}) + # simulating a custom runner that hasn't been refactored + # to suit the new 'elasticsearch-py' 8.x kwarg only method signature + await es.searchable_snapshots.mount( + self.params["repository"], + self.params["snapshot"], + self.params["body"], + self.params["master_timeout"], + self.params["storage"], + self.params["wait_for_completion"], + ) + + es_searchable_snapshots.mount.assert_awaited_once_with( + es.searchable_snapshots, + repository=self.params["repository"], + snapshot=self.params["snapshot"], + index=self.params["body"]["index"], + renamed_index=self.params["body"]["renamed_index"], + ignore_index_settings=self.params["body"]["ignore_index_settings"], + storage=self.params["storage"], + wait_for_completion=self.params["wait_for_completion"], + master_timeout=self.params["master_timeout"], + ) + + @mock.patch("esrally.client.asynchronous.SearchableSnapshotsClient") + @pytest.mark.asyncio + async def test_RallySearchableSnapshotsClient_rewrites_missing_args(self, es_searchable_snapshots): + es = RallyAsyncElasticsearch(hosts=["http://localhost:9200"]) + es_searchable_snapshots.mount = mock.AsyncMock(return_value={}) + # simulating a custom runner that hasn't been refactored + # to suit the new 'elasticsearch-py' 8.x kwarg only method signature + await es.searchable_snapshots.mount( + self.params["repository"], + self.params["snapshot"], + self.params["body"], + storage=self.params["storage"], + wait_for_completion=self.params["wait_for_completion"], + ) + + es_searchable_snapshots.mount.assert_awaited_once_with( + es.searchable_snapshots, + repository=self.params["repository"], + snapshot=self.params["snapshot"], + index=self.params["body"]["index"], + renamed_index=self.params["body"]["renamed_index"], + ignore_index_settings=self.params["body"]["ignore_index_settings"], + storage=self.params["storage"], + wait_for_completion=self.params["wait_for_completion"], + ) From 5057f00869280aac6af0db2d1c1316a9ad900cd7 Mon Sep 17 00:00:00 2001 From: Brad Deam Date: Thu, 16 Feb 2023 14:59:15 +1030 Subject: [PATCH 43/68] Refactor 'DiskUsageStats' to use 'disk_usage' client method --- esrally/telemetry.py | 6 +- tests/telemetry_test.py | 331 ++++++++++++++++++---------------------- 2 files changed, 150 insertions(+), 187 deletions(-) diff --git a/esrally/telemetry.py b/esrally/telemetry.py index 7eecdc626..5d09bd412 100644 --- a/esrally/telemetry.py +++ b/esrally/telemetry.py @@ -2284,7 +2284,7 @@ def on_benchmark_stop(self): for index in self.indices.split(","): self.logger.debug("Gathering disk usage for [%s]", index) try: - response = self.client.perform_request(method="POST", path=f"/{index}/_disk_usage", params={"run_expensive_tasks": "true"}) + response = self.client.indices.disk_usage(index=index, run_expensive_tasks=True) except elasticsearch.RequestError: msg = f"A transport error occurred while collecting disk usage for {index}" self.logger.exception(msg) @@ -2307,8 +2307,10 @@ def handle_telemetry_usage(self, response): self.logger.exception(msg) raise exceptions.RallyError(msg) - del response["_shards"] for index, idx_fields in response.items(): + if index == "_shards": + continue + for field, field_info in idx_fields["fields"].items(): meta = {"index": index, "field": field} self.metrics_store.put_value_cluster_level("disk_usage_total", field_info["total_in_bytes"], meta_data=meta, unit="byte") diff --git a/tests/telemetry_test.py b/tests/telemetry_test.py index f74fc7bd9..5d2498b4d 100644 --- a/tests/telemetry_test.py +++ b/tests/telemetry_test.py @@ -4357,168 +4357,152 @@ class TestDiskUsageStats: def test_uses_indices_by_default(self, es): cfg = create_config() metrics_store = metrics.EsMetricsStore(cfg) - tc = TransportClient(response={"_shards": {"failed": 0}}) - c = Client(transport_client=tc) - device = telemetry.DiskUsageStats({}, c, metrics_store, index_names=["foo", "bar"], data_stream_names=[]) + es.indices.disk_usage.return_value = {"_shards": {"failed": 0}} + device = telemetry.DiskUsageStats({}, es, metrics_store, index_names=["foo", "bar"], data_stream_names=[]) t = telemetry.Telemetry(enabled_devices=[device.command], devices=[device]) t.on_benchmark_start() t.on_benchmark_stop() - assert tc.kwargs == [ - {"method": "POST", "path": "/foo/_disk_usage", "params": {"run_expensive_tasks": "true"}}, - {"method": "POST", "path": "/bar/_disk_usage", "params": {"run_expensive_tasks": "true"}}, - ] + es.indices.disk_usage.assert_has_calls( + [ + call(index="foo", run_expensive_tasks=True), + call(index="bar", run_expensive_tasks=True), + ] + ) @mock.patch("elasticsearch.Elasticsearch") def test_uses_data_streams_by_default(self, es): cfg = create_config() metrics_store = metrics.EsMetricsStore(cfg) - tc = TransportClient(response={"_shards": {"failed": 0}}) - es = Client(transport_client=tc) + es.indices.disk_usage.return_value = {"_shards": {"failed": 0}} device = telemetry.DiskUsageStats({}, es, metrics_store, index_names=[], data_stream_names=["foo", "bar"]) t = telemetry.Telemetry(enabled_devices=[device.command], devices=[device]) t.on_benchmark_start() t.on_benchmark_stop() - assert tc.kwargs == [ - {"method": "POST", "path": "/foo/_disk_usage", "params": {"run_expensive_tasks": "true"}}, - {"method": "POST", "path": "/bar/_disk_usage", "params": {"run_expensive_tasks": "true"}}, - ] + es.indices.disk_usage.assert_has_calls( + [ + call(index="foo", run_expensive_tasks=True), + call(index="bar", run_expensive_tasks=True), + ] + ) @mock.patch("elasticsearch.Elasticsearch") def test_uses_indices_param_if_specified_instead_of_index_names(self, es): cfg = create_config() metrics_store = metrics.EsMetricsStore(cfg) - tc = TransportClient(response={"_shards": {"failed": 0}}) - es = Client(transport_client=tc) + es.indices.disk_usage.return_value = {"_shards": {"failed": 0}} device = telemetry.DiskUsageStats( {"disk-usage-stats-indices": "foo,bar"}, es, metrics_store, index_names=["baz"], data_stream_names=[] ) t = telemetry.Telemetry(enabled_devices=[device.command], devices=[device]) t.on_benchmark_start() t.on_benchmark_stop() - assert tc.kwargs == [ - {"method": "POST", "path": "/foo/_disk_usage", "params": {"run_expensive_tasks": "true"}}, - {"method": "POST", "path": "/bar/_disk_usage", "params": {"run_expensive_tasks": "true"}}, - ] + es.indices.disk_usage.assert_has_calls( + [ + call(index="foo", run_expensive_tasks=True), + call(index="bar", run_expensive_tasks=True), + ] + ) @mock.patch("elasticsearch.Elasticsearch") def test_uses_indices_param_if_specified_instead_of_data_stream_names(self, es): cfg = create_config() metrics_store = metrics.EsMetricsStore(cfg) - tc = TransportClient(response={"_shards": {"failed": 0}}) - es = Client(transport_client=tc) + es.indices.disk_usage.return_value = {"_shards": {"failed": 0}} device = telemetry.DiskUsageStats( {"disk-usage-stats-indices": "foo,bar"}, es, metrics_store, index_names=[], data_stream_names=["baz"] ) t = telemetry.Telemetry(enabled_devices=[device.command], devices=[device]) t.on_benchmark_start() t.on_benchmark_stop() - assert tc.kwargs == [ - {"method": "POST", "path": "/foo/_disk_usage", "params": {"run_expensive_tasks": "true"}}, - {"method": "POST", "path": "/bar/_disk_usage", "params": {"run_expensive_tasks": "true"}}, - ] + es.indices.disk_usage.assert_has_calls( + [ + call(index="foo", run_expensive_tasks=True), + call(index="bar", run_expensive_tasks=True), + ] + ) @mock.patch("esrally.metrics.EsMetricsStore.put_value_cluster_level") - def test_error_on_retrieval_does_not_store_metrics(self, metrics_store_cluster_level): + @mock.patch("elasticsearch.Elasticsearch") + def test_error_on_retrieval_does_not_store_metrics(self, es, metrics_store_cluster_level, caplog): cfg = create_config() metrics_store = metrics.EsMetricsStore(cfg) - es = Client( - transport_client=TransportClient( - force_error=True, - error=elasticsearch.RequestError(message="400", meta=None, body=None), - ) - ) + es.indices.disk_usage.side_effect = elasticsearch.RequestError(message="error", meta=None, body=None) device = telemetry.DiskUsageStats({}, es, metrics_store, index_names=["foo"], data_stream_names=[]) t = telemetry.Telemetry(enabled_devices=[device.command], devices=[device]) t.on_benchmark_start() with pytest.raises(exceptions.RallyError): t.on_benchmark_stop() + assert "A transport error occurred while collecting disk usage for foo" in caplog.text assert metrics_store_cluster_level.call_count == 0 @mock.patch("esrally.metrics.EsMetricsStore.put_value_cluster_level") - def test_no_indices_fails(self, metrics_store_cluster_level): + @mock.patch("elasticsearch.Elasticsearch") + def test_no_indices_fails(self, es, metrics_store_cluster_level, caplog): cfg = create_config() metrics_store = metrics.EsMetricsStore(cfg) - es = Client( - transport_client=TransportClient( - force_error=True, - error=elasticsearch.RequestError(message="400", meta=None, body=None), - ) - ) + es.indices.disk_usage.return_value = {"_shards": {"failed": 0}} device = telemetry.DiskUsageStats({}, es, metrics_store, index_names=[], data_stream_names=[]) t = telemetry.Telemetry(enabled_devices=[device.command], devices=[device]) with pytest.raises(exceptions.RallyError): t.on_benchmark_start() + msg = ( + "No indices defined for disk-usage-stats. Set disk-usage-stats-indices " + "telemetry param or add indices or data streams to the track config." + ) + assert msg in caplog.text assert metrics_store_cluster_level.call_count == 0 @mock.patch("esrally.metrics.EsMetricsStore.put_value_cluster_level") - def test_missing_all_fails(self, metrics_store_cluster_level): + @mock.patch("elasticsearch.Elasticsearch") + def test_missing_all_fails(self, es, metrics_store_cluster_level, caplog): cfg = create_config() metrics_store = metrics.EsMetricsStore(cfg) - es = Client( - transport_client=TransportClient(force_error=True, error=elasticsearch.RequestError(message="400", meta=None, body=None)) - ) + es.indices.disk_usage.side_effect = elasticsearch.NotFoundError(message="error", meta=None, body=None) device = telemetry.DiskUsageStats({}, es, metrics_store, index_names=["foo", "bar"], data_stream_names=[]) t = telemetry.Telemetry(enabled_devices=[device.command], devices=[device]) t.on_benchmark_start() with pytest.raises(exceptions.RallyError): t.on_benchmark_stop() + + assert "Requested disk usage for missing index foo" in caplog.text + assert "Requested disk usage for missing index bar" in caplog.text + assert "Couldn't find any indices for disk usage foo,bar" in caplog.text assert metrics_store_cluster_level.call_count == 0 @mock.patch("esrally.metrics.EsMetricsStore.put_value_cluster_level") - def test_some_mising_succeeds(self, metrics_store_cluster_level): + @mock.patch("elasticsearch.Elasticsearch") + def test_some_mising_succeeds(self, es, metrics_store_cluster_level, caplog): cfg = create_config() metrics_store = metrics.EsMetricsStore(cfg) - - class TwoTransportClients: - def __init__(self, first, rest): - self.first = first - self.rest = rest - - def perform_request(self, *args, **kwargs): - if self.first: - first = self.first - self.first = None - return first.perform_request(args, kwargs) - else: - return self.rest.perform_request(args, kwargs) - - not_found_transport_client = TransportClient( - force_error=True, - error=elasticsearch.NotFoundError(message="404", meta=None, body=None), - ) - successful_client = TransportClient( - response={ - "_shards": {"failed": 0}, - "foo": { - "fields": { - "_id": { - "total_in_bytes": 21079, - "inverted_index": {"total_in_bytes": 17110}, - "stored_fields_in_bytes": 3969, - } + not_found_response = elasticsearch.NotFoundError(message="error", meta=None, body=None) + successful_response = { + "_shards": {"failed": 0}, + "foo": { + "fields": { + "_id": { + "total_in_bytes": 21079, + "inverted_index": {"total_in_bytes": 17110}, + "stored_fields_in_bytes": 3969, } - }, - } - ) - - es = Client(transport_client=TwoTransportClients(not_found_transport_client, successful_client)) + } + }, + } + es.indices.disk_usage.side_effect = [not_found_response, successful_response] device = telemetry.DiskUsageStats({}, es, metrics_store, index_names=["foo", "bar"], data_stream_names=[]) t = telemetry.Telemetry(enabled_devices=[device.command], devices=[device]) t.on_benchmark_start() t.on_benchmark_stop() + assert "Requested disk usage for missing index foo" in caplog.text assert metrics_store_cluster_level.call_count == 3 @mock.patch("esrally.metrics.EsMetricsStore.put_value_cluster_level") - def test_successful_shards(self, metrics_store_cluster_level): + @mock.patch("elasticsearch.Elasticsearch") + def test_successful_shards(self, es, metrics_store_cluster_level): cfg = create_config() metrics_store = metrics.EsMetricsStore(cfg) - es = Client( - transport_client=TransportClient( - response={ - "_shards": {"total": 1, "successful": 1, "failed": 0}, - } - ) - ) + es.indices.disk_usage.return_value = { + "_shards": {"total": 1, "successful": 1, "failed": 0}, + } device = telemetry.DiskUsageStats({}, es, metrics_store, index_names=["foo"], data_stream_names=[]) t = telemetry.Telemetry(enabled_devices=[device.command], devices=[device]) t.on_benchmark_start() @@ -4526,16 +4510,13 @@ def test_successful_shards(self, metrics_store_cluster_level): assert metrics_store_cluster_level.call_count == 0 @mock.patch("esrally.metrics.EsMetricsStore.put_value_cluster_level") - def test_unsuccessful_shards(self, metrics_store_cluster_level): + @mock.patch("elasticsearch.Elasticsearch") + def test_unsuccessful_shards(self, es, metrics_store_cluster_level): cfg = create_config() metrics_store = metrics.EsMetricsStore(cfg) - es = Client( - transport_client=TransportClient( - response={ - "_shards": {"total": 1, "successful": 0, "failed": 1, "failures": "hello there!"}, - } - ) - ) + es.indices.disk_usage.return_value = { + "_shards": {"total": 1, "successful": 0, "failed": 1, "failures": "hello there!"}, + } device = telemetry.DiskUsageStats({}, es, metrics_store, index_names=["foo"], data_stream_names=[]) t = telemetry.Telemetry(enabled_devices=[device.command], devices=[device]) t.on_benchmark_start() @@ -4544,24 +4525,21 @@ def test_unsuccessful_shards(self, metrics_store_cluster_level): assert metrics_store_cluster_level.call_count == 0 @mock.patch("esrally.metrics.EsMetricsStore.put_value_cluster_level") - def test_source(self, metrics_store_cluster_level): + @mock.patch("elasticsearch.Elasticsearch") + def test_source(self, es, metrics_store_cluster_level): cfg = create_config() metrics_store = metrics.EsMetricsStore(cfg) - es = Client( - transport_client=TransportClient( - response={ - "_shards": {"failed": 0}, - "foo": { - "fields": { - "_source": { - "total_in_bytes": 40676, - "stored_fields_in_bytes": 40676, - } - } - }, + es.indices.disk_usage.return_value = { + "_shards": {"failed": 0}, + "foo": { + "fields": { + "_source": { + "total_in_bytes": 40676, + "stored_fields_in_bytes": 40676, + } } - ) - ) + }, + } device = telemetry.DiskUsageStats({}, es, metrics_store, index_names=["foo"], data_stream_names=[]) t = telemetry.Telemetry(enabled_devices=[device.command], devices=[device]) t.on_benchmark_start() @@ -4572,25 +4550,22 @@ def test_source(self, metrics_store_cluster_level): ] @mock.patch("esrally.metrics.EsMetricsStore.put_value_cluster_level") - def test_id(self, metrics_store_cluster_level): + @mock.patch("elasticsearch.Elasticsearch") + def test_id(self, es, metrics_store_cluster_level): cfg = create_config() metrics_store = metrics.EsMetricsStore(cfg) - es = Client( - transport_client=TransportClient( - response={ - "_shards": {"failed": 0}, - "foo": { - "fields": { - "_id": { - "total_in_bytes": 21079, - "inverted_index": {"total_in_bytes": 17110}, - "stored_fields_in_bytes": 3969, - } - } - }, + es.indices.disk_usage.return_value = { + "_shards": {"failed": 0}, + "foo": { + "fields": { + "_id": { + "total_in_bytes": 21079, + "inverted_index": {"total_in_bytes": 17110}, + "stored_fields_in_bytes": 3969, + } } - ) - ) + }, + } device = telemetry.DiskUsageStats({}, es, metrics_store, index_names=["foo"], data_stream_names=[]) t = telemetry.Telemetry(enabled_devices=[device.command], devices=[device]) t.on_benchmark_start() @@ -4602,7 +4577,8 @@ def test_id(self, metrics_store_cluster_level): ] @mock.patch("esrally.metrics.EsMetricsStore.put_value_cluster_level") - def test_empty_field(self, metrics_store_cluster_level): + @mock.patch("elasticsearch.Elasticsearch") + def test_empty_field(self, es, metrics_store_cluster_level): """ Fields like _primary_term commonly have take 0 bytes at all. But they are declared fields so we return their total size just so no one @@ -4610,24 +4586,20 @@ def test_empty_field(self, metrics_store_cluster_level): """ cfg = create_config() metrics_store = metrics.EsMetricsStore(cfg) - es = Client( - transport_client=TransportClient( - response={ - "_shards": {"failed": 0}, - "foo": { - "fields": { - "_primary_term": { - "total_in_bytes": 0, - "inverted_index": {"total_in_bytes": 0}, - "stored_fields_in_bytes": 0, - "doc_values_in_bytes": 0, - "points_in_bytes": 0, - } - } - }, + es.indices.disk_usage.return_value = { + "_shards": {"failed": 0}, + "foo": { + "fields": { + "_primary_term": { + "total_in_bytes": 0, + "inverted_index": {"total_in_bytes": 0}, + "stored_fields_in_bytes": 0, + "doc_values_in_bytes": 0, + "points_in_bytes": 0, + } } - ) - ) + }, + } device = telemetry.DiskUsageStats({"disk-usage-stats-indices": "foo"}, es, metrics_store, index_names=["foo"], data_stream_names=[]) t = telemetry.Telemetry(enabled_devices=[device.command], devices=[device]) t.on_benchmark_start() @@ -4637,25 +4609,22 @@ def test_empty_field(self, metrics_store_cluster_level): ] @mock.patch("esrally.metrics.EsMetricsStore.put_value_cluster_level") - def test_number(self, metrics_store_cluster_level): + @mock.patch("elasticsearch.Elasticsearch") + def test_number(self, es, metrics_store_cluster_level): cfg = create_config() metrics_store = metrics.EsMetricsStore(cfg) - es = Client( - transport_client=TransportClient( - response={ - "_shards": {"failed": 0}, - "foo": { - "fields": { - "prcp": { - "total_in_bytes": 1498, - "doc_values_in_bytes": 748, - "points_in_bytes": 750, - } - } - }, + es.indices.disk_usage.return_value = { + "_shards": {"failed": 0}, + "foo": { + "fields": { + "prcp": { + "total_in_bytes": 1498, + "doc_values_in_bytes": 748, + "points_in_bytes": 750, + } } - ) - ) + }, + } device = telemetry.DiskUsageStats({}, es, metrics_store, index_names=["foo"], data_stream_names=[]) t = telemetry.Telemetry(enabled_devices=[device.command], devices=[device]) t.on_benchmark_start() @@ -4667,25 +4636,22 @@ def test_number(self, metrics_store_cluster_level): ] @mock.patch("esrally.metrics.EsMetricsStore.put_value_cluster_level") - def test_keyword(self, metrics_store_cluster_level): + @mock.patch("elasticsearch.Elasticsearch") + def test_keyword(self, es, metrics_store_cluster_level): cfg = create_config() metrics_store = metrics.EsMetricsStore(cfg) - es = Client( - transport_client=TransportClient( - response={ - "_shards": {"failed": 0}, - "foo": { - "fields": { - "station.country_code": { - "total_in_bytes": 346, - "doc_values_in_bytes": 328, - "points_in_bytes": 18, - } - } - }, + es.indices.disk_usage.return_value = { + "_shards": {"failed": 0}, + "foo": { + "fields": { + "station.country_code": { + "total_in_bytes": 346, + "doc_values_in_bytes": 328, + "points_in_bytes": 18, + } } - ) - ) + }, + } device = telemetry.DiskUsageStats({}, es, metrics_store, index_names=["foo"], data_stream_names=[]) t = telemetry.Telemetry(enabled_devices=[device.command], devices=[device]) t.on_benchmark_start() @@ -4697,19 +4663,14 @@ def test_keyword(self, metrics_store_cluster_level): ] @mock.patch("esrally.metrics.EsMetricsStore.put_value_cluster_level") - def test_indexed_vector(self, metrics_store_cluster_level): + @mock.patch("elasticsearch.Elasticsearch") + def test_indexed_vector(self, es, metrics_store_cluster_level): cfg = create_config() metrics_store = metrics.EsMetricsStore(cfg) - es = Client( - transport_client=TransportClient( - response={ - "_shards": {"failed": 0}, - "foo": { - "fields": {"title_vector": {"total_in_bytes": 64179820, "doc_values_in_bytes": 0, "knn_vectors_in_bytes": 64179820}} - }, - } - ) - ) + es.indices.disk_usage.return_value = { + "_shards": {"failed": 0}, + "foo": {"fields": {"title_vector": {"total_in_bytes": 64179820, "doc_values_in_bytes": 0, "knn_vectors_in_bytes": 64179820}}}, + } device = telemetry.DiskUsageStats({}, es, metrics_store, index_names=["foo"], data_stream_names=[]) t = telemetry.Telemetry(enabled_devices=[device.command], devices=[device]) t.on_benchmark_start() From 92ce7ba9f4acacac963516d54d3c89556fef1aba Mon Sep 17 00:00:00 2001 From: Brad Deam Date: Thu, 16 Feb 2023 14:59:29 +1030 Subject: [PATCH 44/68] Revert "Make searchable snapshots client backwards compatible" This reverts commit 3c665a9dd900798a7277624a1d5c4eeb8dc16886. --- esrally/client/asynchronous.py | 40 ++------------- tests/driver/runner_test.py | 91 ---------------------------------- 2 files changed, 3 insertions(+), 128 deletions(-) diff --git a/esrally/client/asynchronous.py b/esrally/client/asynchronous.py index 0fc09a4e5..a01c3c044 100644 --- a/esrally/client/asynchronous.py +++ b/esrally/client/asynchronous.py @@ -36,7 +36,7 @@ TextApiResponse, ) from elastic_transport.client_utils import DEFAULT -from elasticsearch._async.client import EqlClient, IlmClient, SearchableSnapshotsClient +from elasticsearch._async.client import EqlClient, IlmClient from elasticsearch.compat import warn_stacklevel from elasticsearch.exceptions import HTTP_EXCEPTIONS, ApiError, ElasticsearchWarning from multidict import CIMultiDict, CIMultiDictProxy @@ -284,39 +284,6 @@ async def search(self, *args, **kwargs): return await EqlClient.search(self, **kwargs) -class RallySearchableSnapshotsClient(SearchableSnapshotsClient): - async def mount(self, *args, **kwargs): - old_params = ["repository", "snapshot", "body", "master_timeout", "storage", "wait_for_completion"] - - if args: - for i, arg in enumerate(args): - kwargs[old_params[i]] = arg - - if body := kwargs.pop("body", None): - params = [ - "repository", - "snapshot", - "index", - "error_trace", - "filter_path", - "human", - "ignore_index_settings", - "index_settings", - "master_timeout", - "pretty", - "renamed_index", - "storage", - "wait_for_completion", - ] - - for p in params: - if param := body.get(p): - kwargs[p] = param - - # pylint: disable=missing-kwoa - return await SearchableSnapshotsClient.mount(self, **kwargs) - - class RallyAsyncElasticsearch(elasticsearch.AsyncElasticsearch, RequestContextHolder): def __init__(self, *args, **kwargs): distro = kwargs.pop("distro", None) @@ -330,12 +297,11 @@ def __init__(self, *args, **kwargs): else: self.distribution_version = None - # method signatures changed in 'elasticsearch-py' 8.x, + # some ILM method signatures changed in 'elasticsearch-py' 8.x, # so we override method(s) here to provide BWC for any custom - # runners (i.e. in rally-tracks) that aren't using the new kwargs + # runners that aren't using the new kwargs self.ilm = RallyIlmClient(self) self.eql = RallyEqlClient(self) - self.searchable_snapshots = RallySearchableSnapshotsClient(self) async def perform_request( self, diff --git a/tests/driver/runner_test.py b/tests/driver/runner_test.py index a8833cf69..198e30880 100644 --- a/tests/driver/runner_test.py +++ b/tests/driver/runner_test.py @@ -7063,94 +7063,3 @@ async def test_RallyEqlClient_rewrites_kwargs(self, es_eql): size=self.params["body"]["size"], request_timeout=self.params["request-timeout"], ) - - -class TestSearchableSnapshotsOverride: - params = { - "repository": "my-repository", - "snapshot": "my-snapshot", - "master_timeout": 1, - "body": {"index": "my-index", "renamed_index": "my-renamed-index", "ignore_index_settings": True}, - "storage": "full_copy", - "wait_for_completion": True, - } - - @mock.patch("esrally.client.asynchronous.SearchableSnapshotsClient") - @pytest.mark.asyncio - async def test_RallySearchableSnapshotsClient_rewrites_kwargs(self, es_searchable_snapshots): - es = RallyAsyncElasticsearch(hosts=["http://localhost:9200"]) - es_searchable_snapshots.mount = mock.AsyncMock(return_value={}) - # simulating a custom runner that hasn't been refactored - # to suit the new 'elasticsearch-py' 8.x kwarg only method signature - await es.searchable_snapshots.mount( - repository=self.params["repository"], - snapshot=self.params["snapshot"], - body=self.params["body"], - storage=self.params["storage"], - wait_for_completion=self.params["wait_for_completion"], - ) - - es_searchable_snapshots.mount.assert_awaited_once_with( - es.searchable_snapshots, - repository=self.params["repository"], - snapshot=self.params["snapshot"], - index=self.params["body"]["index"], - renamed_index=self.params["body"]["renamed_index"], - ignore_index_settings=self.params["body"]["ignore_index_settings"], - storage=self.params["storage"], - wait_for_completion=self.params["wait_for_completion"], - ) - - @mock.patch("esrally.client.asynchronous.SearchableSnapshotsClient") - @pytest.mark.asyncio - async def test_RallySearchableSnapshotsClient_rewrites_args(self, es_searchable_snapshots): - es = RallyAsyncElasticsearch(hosts=["http://localhost:9200"]) - es_searchable_snapshots.mount = mock.AsyncMock(return_value={}) - # simulating a custom runner that hasn't been refactored - # to suit the new 'elasticsearch-py' 8.x kwarg only method signature - await es.searchable_snapshots.mount( - self.params["repository"], - self.params["snapshot"], - self.params["body"], - self.params["master_timeout"], - self.params["storage"], - self.params["wait_for_completion"], - ) - - es_searchable_snapshots.mount.assert_awaited_once_with( - es.searchable_snapshots, - repository=self.params["repository"], - snapshot=self.params["snapshot"], - index=self.params["body"]["index"], - renamed_index=self.params["body"]["renamed_index"], - ignore_index_settings=self.params["body"]["ignore_index_settings"], - storage=self.params["storage"], - wait_for_completion=self.params["wait_for_completion"], - master_timeout=self.params["master_timeout"], - ) - - @mock.patch("esrally.client.asynchronous.SearchableSnapshotsClient") - @pytest.mark.asyncio - async def test_RallySearchableSnapshotsClient_rewrites_missing_args(self, es_searchable_snapshots): - es = RallyAsyncElasticsearch(hosts=["http://localhost:9200"]) - es_searchable_snapshots.mount = mock.AsyncMock(return_value={}) - # simulating a custom runner that hasn't been refactored - # to suit the new 'elasticsearch-py' 8.x kwarg only method signature - await es.searchable_snapshots.mount( - self.params["repository"], - self.params["snapshot"], - self.params["body"], - storage=self.params["storage"], - wait_for_completion=self.params["wait_for_completion"], - ) - - es_searchable_snapshots.mount.assert_awaited_once_with( - es.searchable_snapshots, - repository=self.params["repository"], - snapshot=self.params["snapshot"], - index=self.params["body"]["index"], - renamed_index=self.params["body"]["renamed_index"], - ignore_index_settings=self.params["body"]["ignore_index_settings"], - storage=self.params["storage"], - wait_for_completion=self.params["wait_for_completion"], - ) From 99f77319390653605a043a3036e16071acf9a595 Mon Sep 17 00:00:00 2001 From: Brad Deam Date: Thu, 16 Feb 2023 15:01:20 +1030 Subject: [PATCH 45/68] Revert "Make EQL custom runner BWC" This reverts commit 32b5f9a33e2edca399f45dd3363ddf51f4c8fb23. --- esrally/client/asynchronous.py | 36 +--------------------------------- tests/driver/runner_test.py | 33 ------------------------------- 2 files changed, 1 insertion(+), 68 deletions(-) diff --git a/esrally/client/asynchronous.py b/esrally/client/asynchronous.py index a01c3c044..7be2678a9 100644 --- a/esrally/client/asynchronous.py +++ b/esrally/client/asynchronous.py @@ -36,7 +36,7 @@ TextApiResponse, ) from elastic_transport.client_utils import DEFAULT -from elasticsearch._async.client import EqlClient, IlmClient +from elasticsearch._async.client import IlmClient from elasticsearch.compat import warn_stacklevel from elasticsearch.exceptions import HTTP_EXCEPTIONS, ApiError, ElasticsearchWarning from multidict import CIMultiDict, CIMultiDictProxy @@ -251,39 +251,6 @@ async def put_lifecycle(self, *args, **kwargs): return await IlmClient.put_lifecycle(self, **kwargs) -class RallyEqlClient(EqlClient): - async def search(self, *args, **kwargs): - if args: - kwargs["index"] = args[0] - - if body := kwargs.pop("body", None): - params = [ - "query", - "allow_no_indices", - "case_sensitive", - "event_category_field", - "expand_wildcards", - "fetch_size", - "fields", - "filter", - "ignore_unavailable", - "keep_alive", - "keep_on_completion", - "result_position", - "runtime_mappings", - "size", - "tiebreaker_field", - "timestamp_field", - "wait_for_completion_timeout", - ] - - for p in params: - if param := body.get(p): - kwargs[p] = param - # pylint: disable=missing-kwoa - return await EqlClient.search(self, **kwargs) - - class RallyAsyncElasticsearch(elasticsearch.AsyncElasticsearch, RequestContextHolder): def __init__(self, *args, **kwargs): distro = kwargs.pop("distro", None) @@ -301,7 +268,6 @@ def __init__(self, *args, **kwargs): # so we override method(s) here to provide BWC for any custom # runners that aren't using the new kwargs self.ilm = RallyIlmClient(self) - self.eql = RallyEqlClient(self) async def perform_request( self, diff --git a/tests/driver/runner_test.py b/tests/driver/runner_test.py index 198e30880..4c97edd84 100644 --- a/tests/driver/runner_test.py +++ b/tests/driver/runner_test.py @@ -7030,36 +7030,3 @@ async def test_field_caps_with_index_filter(self, es): expected_body = {"index_filter": index_filter} es.field_caps.assert_awaited_once_with(index="_all", fields="time-*", body=expected_body, params=None) - - -class TestEqlRunnerOverride: - params = { - "cluster": "my-cluster", - "index": "my-index", - "request-timeout": 3600, - "body": { - "query": "sequence by source.ip, destination.ip with maxspan=5m [process where true] [process where true] " - "[process where true] [network where true] |head 100 | tail 50", - "fetch_size": 1000, - "size": 100, - }, - } - - @mock.patch("esrally.client.asynchronous.EqlClient") - @pytest.mark.asyncio - async def test_RallyEqlClient_rewrites_kwargs(self, es_eql): - es = RallyAsyncElasticsearch(hosts=["http://localhost:9200"]) - es_eql.search = mock.AsyncMock(return_value={}) - index = f"{self.params['cluster']}:{self.params['index']}" - # simulating a custom runner that hasn't been refactored - # to suit the new 'elasticsearch-py' 8.x kwarg only method signature - await es.eql.search(index=index, body=self.params["body"], request_timeout=self.params["request-timeout"]) - - es_eql.search.assert_awaited_once_with( - es.eql, - index=index, - query=self.params["body"]["query"], - fetch_size=self.params["body"]["fetch_size"], - size=self.params["body"]["size"], - request_timeout=self.params["request-timeout"], - ) From 34936a55aebe53ba470c47bec79f4000e366551e Mon Sep 17 00:00:00 2001 From: Brad Deam Date: Tue, 21 Feb 2023 17:04:10 +1030 Subject: [PATCH 46/68] Fix 'migrate.rst' wording on logger change --- docs/migrate.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/migrate.rst b/docs/migrate.rst index 24742fbbd..f6fdf25f2 100644 --- a/docs/migrate.rst +++ b/docs/migrate.rst @@ -6,7 +6,7 @@ Migrating to Rally 2.7.1 Elasticsearch client logs are now captured by the ``elastic_transport`` logger ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The 8.x version of ``elasticsearch-py`` client version Rally uses has moved the underlying network requests into a new module (and subsequently a new logger) named ``elastic_transport``. New installations of Rally will automatically be configured, and if you are upgrading in-place (i.e. there is a pre-existing ``logging.json``), Rally will automatically add this new logger to the configuration file. +The 8.x version of the ``elasticsearch-py`` client Rally uses has moved the underlying network requests into a new module (and subsequently a new logger) named ``elastic_transport``. Both new and existing installations of Rally will automatically be configured to use this logger. Snapshot repository plugins are no longer built from source ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ From 80623af64fce687b94513f1c309d73cdafa1db2b Mon Sep 17 00:00:00 2001 From: Brad Deam Date: Tue, 21 Feb 2023 17:07:20 +1030 Subject: [PATCH 47/68] Add trailing comma to 'test_create_async_client_with_api_key_auth_override' --- tests/client/factory_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/client/factory_test.py b/tests/client/factory_test.py index f3431a206..8cc7bd39d 100644 --- a/tests/client/factory_test.py +++ b/tests/client/factory_test.py @@ -322,7 +322,7 @@ def test_check_hostname_false_when_host_is_ip(self): @mock.patch("esrally.client.asynchronous.RallyAsyncElasticsearch") def test_create_async_client_with_api_key_auth_override(self, es): hosts = [{"host": "localhost", "port": 9200}] - client_options = {"use_ssl": True, "verify_certs": True, "http_auth": ("user", "password"), "max_connections": 600} + client_options = {"use_ssl": True, "verify_certs": True, "http_auth": ("user", "password"), "max_connections": 600,} # make a copy so we can verify later that the factory did not modify it original_client_options = deepcopy(client_options) api_key = ("id", "secret") From f2040840bb009776d12c8ad717680af5acab20e4 Mon Sep 17 00:00:00 2001 From: Brad Deam Date: Wed, 22 Feb 2023 16:47:32 +1030 Subject: [PATCH 48/68] Linting --- tests/client/factory_test.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/client/factory_test.py b/tests/client/factory_test.py index 8cc7bd39d..d9134e242 100644 --- a/tests/client/factory_test.py +++ b/tests/client/factory_test.py @@ -322,7 +322,12 @@ def test_check_hostname_false_when_host_is_ip(self): @mock.patch("esrally.client.asynchronous.RallyAsyncElasticsearch") def test_create_async_client_with_api_key_auth_override(self, es): hosts = [{"host": "localhost", "port": 9200}] - client_options = {"use_ssl": True, "verify_certs": True, "http_auth": ("user", "password"), "max_connections": 600,} + client_options = { + "use_ssl": True, + "verify_certs": True, + "http_auth": ("user", "password"), + "max_connections": 600, + } # make a copy so we can verify later that the factory did not modify it original_client_options = deepcopy(client_options) api_key = ("id", "secret") From 98ac18830728756033635633aeecfb2a54290001 Mon Sep 17 00:00:00 2001 From: Brad Deam Date: Thu, 23 Feb 2023 16:20:37 +1030 Subject: [PATCH 49/68] Silence aiohttp SSL cert verify warnings via es client constructor --- esrally/client/factory.py | 1 + 1 file changed, 1 insertion(+) diff --git a/esrally/client/factory.py b/esrally/client/factory.py index 03548ebb7..ab3e71ba1 100644 --- a/esrally/client/factory.py +++ b/esrally/client/factory.py @@ -72,6 +72,7 @@ def host_string(host): # order matters to avoid ValueError: check_hostname needs a SSL context with either CERT_OPTIONAL or CERT_REQUIRED self.ssl_context.check_hostname = False self.ssl_context.verify_mode = ssl.CERT_NONE + self.client_options["ssl_show_warn"] = False self.logger.warning( "User has enabled SSL but disabled certificate verification. This is dangerous but may be ok for a " From 199e5572607fc87f505f622505cdc4c0821fa9c4 Mon Sep 17 00:00:00 2001 From: Brad Deam Date: Fri, 24 Feb 2023 11:33:50 +1030 Subject: [PATCH 50/68] Remove 'urllib3.disable_warnings()' from client factory as it's now surpressed via the client constructor --- esrally/client/factory.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/esrally/client/factory.py b/esrally/client/factory.py index ab3e71ba1..5582eac56 100644 --- a/esrally/client/factory.py +++ b/esrally/client/factory.py @@ -19,7 +19,6 @@ import time import certifi -import urllib3 from urllib3.connection import is_ipaddress from esrally import doc_link, exceptions @@ -75,13 +74,8 @@ def host_string(host): self.client_options["ssl_show_warn"] = False self.logger.warning( - "User has enabled SSL but disabled certificate verification. This is dangerous but may be ok for a " - "benchmark. Disabling urllib warnings now to avoid a logging storm. " - "See https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings for details." + "User has enabled SSL but disabled certificate verification. This is dangerous but may be ok for a benchmark." ) - # disable: "InsecureRequestWarning: Unverified HTTPS request is being made. Adding certificate verification is strongly \ - # advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings" - urllib3.disable_warnings() else: # check_hostname should not be set when host is an IP address self.ssl_context.check_hostname = self._only_hostnames(hosts) From 56506610ae37633843ed8064f0c9e376f6d1a4d5 Mon Sep 17 00:00:00 2001 From: Brad Deam Date: Fri, 24 Feb 2023 14:28:52 +1030 Subject: [PATCH 51/68] Tighten up import of 'elastic_transport' to avoid Thespian's double forking overriding the respective logger configuration --- esrally/metrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esrally/metrics.py b/esrally/metrics.py index 9c98fad16..849ac88fa 100644 --- a/esrally/metrics.py +++ b/esrally/metrics.py @@ -32,7 +32,6 @@ from enum import Enum, IntEnum import tabulate -from elasticsearch import ApiError, TransportError from esrally import client, config, exceptions, paths, time, version from esrally.utils import console, convert, io, versions @@ -104,6 +103,7 @@ def search(self, index, body): def guarded(self, target, *args, **kwargs): # pylint: disable=import-outside-toplevel import elasticsearch + from elastic_transport import ApiError, TransportError max_execution_count = 10 execution_count = 0 From d7e4c6162a889ac256482b4a7c74a0624c22251e Mon Sep 17 00:00:00 2001 From: Brad Deam Date: Mon, 27 Feb 2023 10:48:43 +1030 Subject: [PATCH 52/68] Further tightening of 'elasticsearch' and 'elastic_transport' imports --- esrally/client/asynchronous.py | 11 ++++++----- esrally/client/factory.py | 18 ++++++++++++------ esrally/client/synchronous.py | 5 ++--- esrally/tracker/tracker.py | 2 +- 4 files changed, 21 insertions(+), 15 deletions(-) diff --git a/esrally/client/asynchronous.py b/esrally/client/asynchronous.py index 7be2678a9..9144ca022 100644 --- a/esrally/client/asynchronous.py +++ b/esrally/client/asynchronous.py @@ -22,13 +22,13 @@ from typing import Any, Iterable, List, Mapping, Optional import aiohttp -import elastic_transport -import elasticsearch from aiohttp import BaseConnector, RequestInfo from aiohttp.client_proto import ResponseHandler from aiohttp.helpers import BaseTimerContext from elastic_transport import ( + AiohttpHttpNode, ApiResponse, + AsyncTransport, BinaryApiResponse, HeadApiResponse, ListApiResponse, @@ -36,6 +36,7 @@ TextApiResponse, ) from elastic_transport.client_utils import DEFAULT +from elasticsearch import AsyncElasticsearch from elasticsearch._async.client import IlmClient from elasticsearch.compat import warn_stacklevel from elasticsearch.exceptions import HTTP_EXCEPTIONS, ApiError, ElasticsearchWarning @@ -183,7 +184,7 @@ def response(self, path): return body -class RallyAiohttpHttpNode(elastic_transport.AiohttpHttpNode): +class RallyAiohttpHttpNode(AiohttpHttpNode): def __init__(self, config): super().__init__(config) self._loop = None @@ -235,7 +236,7 @@ def _create_aiohttp_session(self): ) -class RallyAsyncTransport(elastic_transport.AsyncTransport): +class RallyAsyncTransport(AsyncTransport): def __init__(self, *args, **kwargs): super().__init__(*args, node_class=RallyAiohttpHttpNode, **kwargs) @@ -251,7 +252,7 @@ async def put_lifecycle(self, *args, **kwargs): return await IlmClient.put_lifecycle(self, **kwargs) -class RallyAsyncElasticsearch(elasticsearch.AsyncElasticsearch, RequestContextHolder): +class RallyAsyncElasticsearch(AsyncElasticsearch, RequestContextHolder): def __init__(self, *args, **kwargs): distro = kwargs.pop("distro", None) super().__init__(*args, **kwargs) diff --git a/esrally/client/factory.py b/esrally/client/factory.py index 5582eac56..7b5fcfec7 100644 --- a/esrally/client/factory.py +++ b/esrally/client/factory.py @@ -258,7 +258,13 @@ def wait_for_rest_layer(es, max_attempts=40): logger = logging.getLogger(__name__) for attempt in range(max_attempts): # pylint: disable=import-outside-toplevel - import elasticsearch + from elastic_transport import ( + ApiError, + ConnectionError, + SerializationError, + TlsError, + TransportError, + ) try: # see also WaitForHttpResource in Elasticsearch tests. Contrary to the ES tests we consider the API also @@ -266,7 +272,7 @@ def wait_for_rest_layer(es, max_attempts=40): es.cluster.health(wait_for_nodes=f">={expected_node_count}") logger.debug("REST API is available for >= [%s] nodes after [%s] attempts.", expected_node_count, attempt) return True - except elasticsearch.SerializationError as e: + except SerializationError as e: if "Client sent an HTTP request to an HTTPS server" in str(e): raise exceptions.SystemSetupError( "Rally sent an HTTP request to an HTTPS server. Are you sure this is an HTTP endpoint?", e @@ -274,9 +280,9 @@ def wait_for_rest_layer(es, max_attempts=40): logger.debug("Got serialization error [%s] on attempt [%s]. Sleeping...", e, attempt) time.sleep(3) - except elasticsearch.SSLError as e: + except TlsError as e: raise exceptions.SystemSetupError("Could not connect to cluster via HTTPS. Are you sure this is an HTTPS endpoint?", e) - except elasticsearch.exceptions.ConnectionError as e: + except ConnectionError as e: if "ProtocolError" in str(e): raise exceptions.SystemSetupError( "Received a protocol error. Are you sure you're using the correct scheme (HTTP or HTTPS)?", e @@ -284,10 +290,10 @@ def wait_for_rest_layer(es, max_attempts=40): logger.debug("Got connection error on attempt [%s]. Sleeping...", attempt) time.sleep(3) - except elasticsearch.TransportError as e: + except TransportError as e: logger.debug("Got transport error on attempt [%s]. Sleeping...", attempt) time.sleep(3) - except elasticsearch.ApiError as e: + except ApiError as e: # cluster block, x-pack not initialized yet, our wait condition is not reached if e.status_code in (503, 401, 408): logger.debug("Got status code [%s] on attempt [%s]. Sleeping...", e.message, attempt) diff --git a/esrally/client/synchronous.py b/esrally/client/synchronous.py index 714c596b2..2618a9b5d 100644 --- a/esrally/client/synchronous.py +++ b/esrally/client/synchronous.py @@ -19,7 +19,6 @@ import warnings from typing import Any, Iterable, Mapping, Optional -import elasticsearch from elastic_transport import ( ApiResponse, BinaryApiResponse, @@ -29,6 +28,7 @@ TextApiResponse, ) from elastic_transport.client_utils import DEFAULT +from elasticsearch import Elasticsearch from elasticsearch.compat import warn_stacklevel from elasticsearch.exceptions import ( HTTP_EXCEPTIONS, @@ -120,7 +120,7 @@ def check_product(cls, headers, response): return True -class RallySyncElasticsearch(elasticsearch.Elasticsearch): +class RallySyncElasticsearch(Elasticsearch): def __init__(self, *args, **kwargs): distro = kwargs.pop("distro", None) super().__init__(*args, **kwargs) @@ -140,7 +140,6 @@ def perform_request( headers: Optional[Mapping[str, str]] = None, body: Optional[Any] = None, ) -> ApiResponse[Any]: - # We need to ensure that we provide content-type and accept headers if body is not None: if headers is None: diff --git a/esrally/tracker/tracker.py b/esrally/tracker/tracker.py index 93250f001..cc0a30db5 100644 --- a/esrally/tracker/tracker.py +++ b/esrally/tracker/tracker.py @@ -18,7 +18,7 @@ import logging import os -from elasticsearch import ApiError, TransportError +from elastic_transport import ApiError, TransportError from jinja2 import Environment, FileSystemLoader from esrally import PROGRAM_NAME From 2a15ab5a1c95560e05632c9d007b71f8af4dd45a Mon Sep 17 00:00:00 2001 From: Brad Deam Date: Mon, 27 Feb 2023 12:07:58 +1030 Subject: [PATCH 53/68] Add 'elastic_transport' logger IT test --- it/__init__.py | 26 ++++++++++++++++++++++ it/esrallyd_test.py | 54 +++++++++++++++++++++++++++++++++++++++++++++ it/proxy_test.py | 31 ++++---------------------- 3 files changed, 84 insertions(+), 27 deletions(-) create mode 100644 it/esrallyd_test.py diff --git a/it/__init__.py b/it/__init__.py index f3d77b619..9e6d5b4c8 100644 --- a/it/__init__.py +++ b/it/__init__.py @@ -21,8 +21,10 @@ import os import platform import random +import shutil import socket import subprocess +import tempfile import time import pytest @@ -265,3 +267,27 @@ def setup_module(): def teardown_module(): ES_METRICS_STORE.stop() remove_integration_test_config() + + +# ensures that a fresh log file is available +@pytest.fixture(scope="function") +def fresh_log_file(): + cfg = ConfigFile(config_name=None) + log_file = os.path.join(cfg.rally_home, "logs", "rally.log") + + if os.path.exists(log_file): + bak = os.path.join(tempfile.mkdtemp(), "rally.log") + shutil.move(log_file, bak) + yield log_file + # append log lines to the original file and move it back to its original + with open(log_file) as src: + with open(bak, "a") as dst: + dst.write(src.read()) + shutil.move(bak, log_file) + else: + yield log_file + + +def check_log_line_present(log_file, text): + with open(log_file) as f: + return any(text in line for line in f) diff --git a/it/esrallyd_test.py b/it/esrallyd_test.py new file mode 100644 index 000000000..2afda1da8 --- /dev/null +++ b/it/esrallyd_test.py @@ -0,0 +1,54 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import it +from it import fresh_log_file + +import pytest + + +@pytest.fixture(autouse=True) +def setup_esrallyd(): + it.wait_until_port_is_free(1900) + it.shell_cmd("esrallyd start --node-ip 127.0.0.1 --coordinator-ip 127.0.0.1") + yield + it.shell_cmd("esrallyd stop") + + +@it.rally_in_mem +def test_elastic_transport_module_does_not_log_at_info_level(cfg, fresh_log_file): + """ + The 'elastic_transport' module logs at 'INFO' by default and is _very_ noisy, so we explicitly set the threshold to + 'WARNING' to avoid perturbing benchmarking results due to the high volume of logging calls by the client itself. + + Unfortunately, due to the underlying double-fork behaviour of the ActorSystem, it's possible for this module's logger + threshold to be overriden and reset to the default 'INFO' level via eager top level imports (i.e at the top of a module). + + Therefore we try to tightly control the imports of `elastic_transport` and `elasticsearch` throughout the codebase, but + it is very easy to reintroduce this 'bug' by simply putting the import statement in the 'wrong' spot, thus this IT + attempts to ensure this doesn't happen. + + See https://github.com/elastic/rally/pull/1669#issuecomment-1442783985 for more details. + """ + port = 19200 + it.wait_until_port_is_free(port_number=port) + dist = it.DISTRIBUTIONS[-1] + it.race( + cfg, + f'--distribution-version={dist} --track="geonames" --include-tasks=delete-index ' + f"--test-mode --car=4gheap,trial-license --target-hosts=127.0.0.1:{port} ", + ) + assert not it.check_log_line_present(fresh_log_file, "elastic_transport.transport INFO") diff --git a/it/proxy_test.py b/it/proxy_test.py index 7c73fb502..e38750221 100644 --- a/it/proxy_test.py +++ b/it/proxy_test.py @@ -17,14 +17,15 @@ import collections import os -import shutil -import tempfile import pytest import it from esrally.utils import process +# pylint: disable=unused-import +from it import fresh_log_file + HttpProxy = collections.namedtuple("HttpProxy", ["authenticated_url", "anonymous_url"]) @@ -44,34 +45,10 @@ def http_proxy(): process.run_subprocess(f"docker stop {proxy_container_id}") -# ensures that a fresh log file is available -@pytest.fixture(scope="function") -def fresh_log_file(): - cfg = it.ConfigFile(config_name=None) - log_file = os.path.join(cfg.rally_home, "logs", "rally.log") - - if os.path.exists(log_file): - bak = os.path.join(tempfile.mkdtemp(), "rally.log") - shutil.move(log_file, bak) - yield log_file - # append log lines to the original file and move it back to its original - with open(log_file) as src: - with open(bak, "a") as dst: - dst.write(src.read()) - shutil.move(bak, log_file) - else: - yield log_file - - -def assert_log_line_present(log_file, text): - with open(log_file) as f: - assert any(text in line for line in f), f"Could not find [{text}] in [{log_file}]." - - @it.rally_in_mem def test_run_with_direct_internet_connection(cfg, http_proxy, fresh_log_file): assert it.esrally(cfg, "list tracks") == 0 - assert_log_line_present(fresh_log_file, "Connecting directly to the Internet") + assert it.check_log_line_present(fresh_log_file, "Connecting directly to the Internet") @it.rally_in_mem From 09206b42bb6fa678d239c6372b2a1585513fdf23 Mon Sep 17 00:00:00 2001 From: Brad Deam Date: Mon, 27 Feb 2023 12:41:22 +1030 Subject: [PATCH 54/68] New test linting --- it/esrallyd_test.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/it/esrallyd_test.py b/it/esrallyd_test.py index 2afda1da8..b523fafa1 100644 --- a/it/esrallyd_test.py +++ b/it/esrallyd_test.py @@ -14,10 +14,12 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +import pytest + import it -from it import fresh_log_file -import pytest +# pylint: disable=unused-import +from it import fresh_log_file @pytest.fixture(autouse=True) From e4144bf14672656961a527df5bd91c942cb6f66d Mon Sep 17 00:00:00 2001 From: Brad Deam <54515790+b-deam@users.noreply.github.com> Date: Tue, 28 Feb 2023 14:49:38 +1030 Subject: [PATCH 55/68] Update docs/migrate.rst Co-authored-by: Quentin Pradet --- docs/migrate.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/migrate.rst b/docs/migrate.rst index f6fdf25f2..c09488f0d 100644 --- a/docs/migrate.rst +++ b/docs/migrate.rst @@ -6,7 +6,7 @@ Migrating to Rally 2.7.1 Elasticsearch client logs are now captured by the ``elastic_transport`` logger ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The 8.x version of the ``elasticsearch-py`` client Rally uses has moved the underlying network requests into a new module (and subsequently a new logger) named ``elastic_transport``. Both new and existing installations of Rally will automatically be configured to use this logger. +Rally migrated to the 8.x version of the ``elasticsearch-py`` library which uses a new logger named ``elastic_transport``. Rally will automatically configure this logger to only emit logs of level ``WARNING`` and above, even if a past Rally version configured logging using the ``~./rally/logging.json`` file without that logger. Snapshot repository plugins are no longer built from source ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ From 14d209f769fbf5e2b5b70a9467ce6375f80b369c Mon Sep 17 00:00:00 2001 From: Brad Deam Date: Tue, 28 Feb 2023 12:50:39 +1030 Subject: [PATCH 56/68] Make '_normalize_hosts' class method --- esrally/utils/opts.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/esrally/utils/opts.py b/esrally/utils/opts.py index e81213278..a51a0f883 100644 --- a/esrally/utils/opts.py +++ b/esrally/utils/opts.py @@ -171,7 +171,8 @@ def __init__(self, argvalue): self.parse_options() - def _normalize_hosts(self, hosts): + @classmethod + def _normalize_hosts(cls, hosts): # pylint: disable=import-outside-toplevel from urllib.parse import unquote, urlparse @@ -264,7 +265,6 @@ def parse_options(self): @staticmethod def normalize_to_dict(arg): - """ When --client-options is a non-json csv string (single cluster mode), return parsed client options as dict with "default" key From 55a2e1fabd646ad8f234a8b459cad34f1b64a796 Mon Sep 17 00:00:00 2001 From: Brad Deam Date: Tue, 28 Feb 2023 13:20:17 +1030 Subject: [PATCH 57/68] Rename 'distro' to 'distribution_version' --- esrally/client/asynchronous.py | 6 +++--- esrally/client/factory.py | 4 ++-- esrally/client/synchronous.py | 6 +++--- tests/client/factory_test.py | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/esrally/client/asynchronous.py b/esrally/client/asynchronous.py index 9144ca022..7aa0a5d6e 100644 --- a/esrally/client/asynchronous.py +++ b/esrally/client/asynchronous.py @@ -254,14 +254,14 @@ async def put_lifecycle(self, *args, **kwargs): class RallyAsyncElasticsearch(AsyncElasticsearch, RequestContextHolder): def __init__(self, *args, **kwargs): - distro = kwargs.pop("distro", None) + distribution_version = kwargs.pop("distribution_version", None) super().__init__(*args, **kwargs) # skip verification at this point; we've already verified this earlier with the synchronous client. # The async client is used in the hot code path and we use customized overrides (such as that we don't # parse response bodies in some cases for performance reasons, e.g. when using the bulk API). self._verified_elasticsearch = True - if distro: - self.distribution_version = versions.Version.from_string(distro) + if distribution_version: + self.distribution_version = versions.Version.from_string(distribution_version) else: self.distribution_version = None diff --git a/esrally/client/factory.py b/esrally/client/factory.py index 7b5fcfec7..769c3b7b4 100644 --- a/esrally/client/factory.py +++ b/esrally/client/factory.py @@ -181,7 +181,7 @@ def create(self): from esrally.client.synchronous import RallySyncElasticsearch return RallySyncElasticsearch( - distro=self.distribution_version, hosts=self.hosts, ssl_context=self.ssl_context, **self.client_options + distribution_version=self.distribution_version, hosts=self.hosts, ssl_context=self.ssl_context, **self.client_options ) def create_async(self, api_key=None): @@ -225,7 +225,7 @@ async def on_request_end(session, trace_config_ctx, params): self.client_options["api_key"] = api_key async_client = RallyAsyncElasticsearch( - distro=self.distribution_version, + distribution_version=self.distribution_version, hosts=self.hosts, transport_class=RallyAsyncTransport, ssl_context=self.ssl_context, diff --git a/esrally/client/synchronous.py b/esrally/client/synchronous.py index 2618a9b5d..bfd331297 100644 --- a/esrally/client/synchronous.py +++ b/esrally/client/synchronous.py @@ -122,12 +122,12 @@ def check_product(cls, headers, response): class RallySyncElasticsearch(Elasticsearch): def __init__(self, *args, **kwargs): - distro = kwargs.pop("distro", None) + distribution_version = kwargs.pop("distribution_version", None) super().__init__(*args, **kwargs) self._verified_elasticsearch = None - if distro: - self.distribution_version = versions.Version.from_string(distro) + if distribution_version: + self.distribution_version = versions.Version.from_string(distribution_version) else: self.distribution_version = None diff --git a/tests/client/factory_test.py b/tests/client/factory_test.py index d9134e242..a4c9d42ed 100644 --- a/tests/client/factory_test.py +++ b/tests/client/factory_test.py @@ -341,7 +341,7 @@ def test_create_async_client_with_api_key_auth_override(self, es): assert client_options == original_client_options es.assert_called_once_with( - distro=None, + distribution_version=None, hosts=["https://localhost:9200"], transport_class=RallyAsyncTransport, ssl_context=f.ssl_context, From f0a56933c0e01db522b9626cfb976a29b962434a Mon Sep 17 00:00:00 2001 From: Brad Deam Date: Tue, 28 Feb 2023 14:40:33 +1030 Subject: [PATCH 58/68] Move product verification prior to first request --- esrally/client/synchronous.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/esrally/client/synchronous.py b/esrally/client/synchronous.py index bfd331297..486ccb7a2 100644 --- a/esrally/client/synchronous.py +++ b/esrally/client/synchronous.py @@ -156,6 +156,19 @@ def perform_request( else: request_headers = self._headers + if self._verified_elasticsearch is None: + info = self.transport.perform_request(method="GET", target="/", headers=request_headers) + info_meta = info.meta + info_body = info.body + + if not 200 <= info_meta.status < 299: + raise HTTP_EXCEPTIONS.get(info_meta.status, ApiError)(message=str(info_body), meta=info_meta, body=info_body) + + self._verified_elasticsearch = _ProductChecker.check_product(info_meta.headers, info_body) + + if self._verified_elasticsearch is not True: + _ProductChecker.raise_error(self._verified_elasticsearch, info_meta, info_body) + # Converts all parts of a Accept/Content-Type headers # from application/X -> application/vnd.elasticsearch+X # see https://github.com/elastic/elasticsearch/issues/51816 @@ -201,15 +214,6 @@ def perform_request( raise HTTP_EXCEPTIONS.get(meta.status, ApiError)(message=message, meta=meta, body=resp_body) - if self._verified_elasticsearch is None: - info = self.transport.perform_request(method="GET", target="/", headers=request_headers) - info_meta = info.meta - info_body = info.body - self._verified_elasticsearch = _ProductChecker.check_product(info_meta.headers, info_body) - - if self._verified_elasticsearch is not True: - _ProductChecker.raise_error(self._verified_elasticsearch, info_meta, info_body) - # 'Warning' headers should be reraised as 'ElasticsearchWarning' if "warning" in meta.headers: warning_header = (meta.headers.get("warning") or "").strip() From f59421876b32aa6ae56b0da16f68e4610e3f022d Mon Sep 17 00:00:00 2001 From: Brad Deam Date: Tue, 28 Feb 2023 14:46:58 +1030 Subject: [PATCH 59/68] Comment 'RallyIlmClient' method signature changes --- esrally/client/asynchronous.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/esrally/client/asynchronous.py b/esrally/client/asynchronous.py index 7aa0a5d6e..7ca1c3f9b 100644 --- a/esrally/client/asynchronous.py +++ b/esrally/client/asynchronous.py @@ -243,6 +243,10 @@ def __init__(self, *args, **kwargs): class RallyIlmClient(IlmClient): async def put_lifecycle(self, *args, **kwargs): + """ + The 'elasticsearch-py' 8.x method signature renames the 'policy' param to 'name', and the previously so-called + 'body' param becomes 'policy' + """ if args: kwargs["name"] = args[0] From a16450dcc7751db09aa5118fc7b8941808d8821c Mon Sep 17 00:00:00 2001 From: Brad Deam <54515790+b-deam@users.noreply.github.com> Date: Tue, 28 Feb 2023 14:59:08 +1030 Subject: [PATCH 60/68] Update esrally/log.py Co-authored-by: Quentin Pradet --- esrally/log.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/esrally/log.py b/esrally/log.py index cf08dafc6..42f5ee4af 100644 --- a/esrally/log.py +++ b/esrally/log.py @@ -52,17 +52,8 @@ def add_missing_loggers_to_config(): logger = logging.getLogger(__name__) def missing_keys(source, target): - """ - Returns any top-level dicts present in 'source', but not in 'target' - :return: A dict of all keys present in 'source', but not in 'target' - """ - missing_keys = {} - for k in source: - if k in source and k in target: - continue - else: - missing_keys[k] = source[k] - return missing_keys + """Returns any top-level dicts present in 'source', but not in 'target'""" + return set(source.keys()).difference(target.keys()) source_path = io.normalize_path(os.path.join(os.path.dirname(__file__), "resources", "logging.json")) with open(log_config_path(), "r+", encoding="UTF-8") as target: From c3936a1f432f73f838b16c8050b964ea60873d0c Mon Sep 17 00:00:00 2001 From: Brad Deam <54515790+b-deam@users.noreply.github.com> Date: Tue, 28 Feb 2023 14:59:59 +1030 Subject: [PATCH 61/68] Update esrally/log.py Co-authored-by: Quentin Pradet --- esrally/log.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esrally/log.py b/esrally/log.py index 42f5ee4af..0382ca53a 100644 --- a/esrally/log.py +++ b/esrally/log.py @@ -57,7 +57,7 @@ def missing_keys(source, target): source_path = io.normalize_path(os.path.join(os.path.dirname(__file__), "resources", "logging.json")) with open(log_config_path(), "r+", encoding="UTF-8") as target: - with open(source_path, "r+", encoding="UTF-8") as src: + with open(source_path, "r", encoding="UTF-8") as src: template = json.load(src) existing_logging_config = json.load(target) existing_logging_config_copy = copy.deepcopy(existing_logging_config) From 5da718a44d18ab22d3c9a1f9cf9c6ac400ff6767 Mon Sep 17 00:00:00 2001 From: Brad Deam Date: Tue, 28 Feb 2023 15:09:09 +1030 Subject: [PATCH 62/68] Revert "Update esrally/log.py" This reverts commit a16450dcc7751db09aa5118fc7b8941808d8821c. --- esrally/log.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/esrally/log.py b/esrally/log.py index 0382ca53a..e51690898 100644 --- a/esrally/log.py +++ b/esrally/log.py @@ -52,8 +52,17 @@ def add_missing_loggers_to_config(): logger = logging.getLogger(__name__) def missing_keys(source, target): - """Returns any top-level dicts present in 'source', but not in 'target'""" - return set(source.keys()).difference(target.keys()) + """ + Returns any top-level dicts present in 'source', but not in 'target' + :return: A dict of all keys present in 'source', but not in 'target' + """ + missing_keys = {} + for k in source: + if k in source and k in target: + continue + else: + missing_keys[k] = source[k] + return missing_keys source_path = io.normalize_path(os.path.join(os.path.dirname(__file__), "resources", "logging.json")) with open(log_config_path(), "r+", encoding="UTF-8") as target: From 527b44ea0036a04789cd989cfd6f775e190e57b0 Mon Sep 17 00:00:00 2001 From: Brad Deam Date: Tue, 28 Feb 2023 15:13:47 +1030 Subject: [PATCH 63/68] Rename 'missing_keys' to '_missing_loggers' --- esrally/log.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/esrally/log.py b/esrally/log.py index e51690898..6b87da136 100644 --- a/esrally/log.py +++ b/esrally/log.py @@ -51,18 +51,18 @@ def add_missing_loggers_to_config(): """ logger = logging.getLogger(__name__) - def missing_keys(source, target): + def _missing_loggers(source, target): """ - Returns any top-level dicts present in 'source', but not in 'target' - :return: A dict of all keys present in 'source', but not in 'target' + Returns any top-level loggers present in 'source', but not in 'target' + :return: A dict of all loggers present in 'source', but not in 'target' """ - missing_keys = {} - for k in source: - if k in source and k in target: + missing_loggers = {} + for logger in source: + if logger in source and logger in target: continue else: - missing_keys[k] = source[k] - return missing_keys + missing_loggers[logger] = source[logger] + return missing_loggers source_path = io.normalize_path(os.path.join(os.path.dirname(__file__), "resources", "logging.json")) with open(log_config_path(), "r+", encoding="UTF-8") as target: @@ -70,7 +70,7 @@ def missing_keys(source, target): template = json.load(src) existing_logging_config = json.load(target) existing_logging_config_copy = copy.deepcopy(existing_logging_config) - if missing_loggers := missing_keys(source=template["loggers"], target=existing_logging_config_copy["loggers"]): + if missing_loggers := _missing_loggers(source=template["loggers"], target=existing_logging_config_copy["loggers"]): logger.info( "Found loggers [%s] in source template that weren't present in the existing configuration, adding them.", str(missing_loggers), From cb52f45a85855619ff7f290a8f8a71ffd5d3559d Mon Sep 17 00:00:00 2001 From: Brad Deam Date: Tue, 28 Feb 2023 15:58:48 +1030 Subject: [PATCH 64/68] Re-open 'target' in write mode to avoid tricky code --- esrally/log.py | 21 ++++++++------------- tests/log_test.py | 5 ----- 2 files changed, 8 insertions(+), 18 deletions(-) diff --git a/esrally/log.py b/esrally/log.py index 6b87da136..189a1b8c5 100644 --- a/esrally/log.py +++ b/esrally/log.py @@ -15,7 +15,6 @@ # specific language governing permissions and limitations # under the License. -import copy import json import logging import logging.config @@ -49,7 +48,6 @@ def add_missing_loggers_to_config(): Ensures that any missing top level loggers in resources/logging.json are appended to an existing log configuration """ - logger = logging.getLogger(__name__) def _missing_loggers(source, target): """ @@ -65,21 +63,18 @@ def _missing_loggers(source, target): return missing_loggers source_path = io.normalize_path(os.path.join(os.path.dirname(__file__), "resources", "logging.json")) - with open(log_config_path(), "r+", encoding="UTF-8") as target: - with open(source_path, "r", encoding="UTF-8") as src: + + with open(log_config_path(), encoding="UTF-8") as target: + with open(source_path, encoding="UTF-8") as src: template = json.load(src) existing_logging_config = json.load(target) - existing_logging_config_copy = copy.deepcopy(existing_logging_config) - if missing_loggers := _missing_loggers(source=template["loggers"], target=existing_logging_config_copy["loggers"]): - logger.info( - "Found loggers [%s] in source template that weren't present in the existing configuration, adding them.", - str(missing_loggers), - ) + if missing_loggers := _missing_loggers(source=template["loggers"], target=existing_logging_config["loggers"]): existing_logging_config["loggers"].update(missing_loggers) updated_config = json.dumps(existing_logging_config, indent=2) - target.seek(0) - target.write(updated_config) - target.truncate() + + if missing_loggers: + with open(log_config_path(), "w", encoding="UTF-8") as target: + target.write(updated_config) def install_default_log_config(): diff --git a/tests/log_test.py b/tests/log_test.py index f70d0712f..f78579512 100644 --- a/tests/log_test.py +++ b/tests/log_test.py @@ -53,8 +53,3 @@ def test_add_missing_loggers_to_config_missing(self, mock_json_load, caplog): log.add_missing_loggers_to_config() handle = mock_file() handle.write.assert_called_once_with(expected_configuration) - - assert ( - "Found loggers [{'elastic_transport': {'handlers': ['rally_log_handler'], 'level': 'WARNING', 'propagate': " - "False}}] in source template that weren't present in the existing configuration, adding them." in caplog.text - ) From 57c8bfbaa5d2c5e925153f019c0fc5890ea51f8f Mon Sep 17 00:00:00 2001 From: Brad Deam Date: Wed, 1 Mar 2023 10:18:03 +1030 Subject: [PATCH 65/68] Add '6.8' TODOs for all runner fallbacks --- esrally/driver/runner.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/esrally/driver/runner.py b/esrally/driver/runner.py index 14a17aaaa..61d22da0c 100644 --- a/esrally/driver/runner.py +++ b/esrally/driver/runner.py @@ -204,7 +204,6 @@ def _default_kw_params(self, params): # filter Nones return dict(filter(lambda kv: kv[1] is not None, full_result.items())) - # TODO: have this function call options() on the es instance? def _transport_request_params(self, params): request_params = params.get("request-params", {}) request_timeout = params.get("request-timeout") @@ -1703,8 +1702,7 @@ async def __call__(self, es, params): try: await es.ml.put_datafeed(datafeed_id=datafeed_id, body=body) except elasticsearch.BadRequestError: - # TODO: when we drop support for Elasticsearch 6.8, all Datafeed classes - # will be able to stop using this _xpack path + # TODO: remove the fallback to '_xpack' path when we drop support for Elasticsearch 6.8 await es.perform_request( method="PUT", path=f"/_xpack/ml/datafeeds/{datafeed_id}", @@ -1730,6 +1728,7 @@ async def __call__(self, es, params): # we don't want to fail if a datafeed does not exist, thus we ignore 404s. await es.ml.delete_datafeed(datafeed_id=datafeed_id, force=force, ignore=[404]) except elasticsearch.BadRequestError: + # TODO: remove the fallback to '_xpack' path when we drop support for Elasticsearch 6.8 await es.perform_request( method="DELETE", path=f"/_xpack/ml/datafeeds/{datafeed_id}", @@ -1757,6 +1756,7 @@ async def __call__(self, es, params): try: await es.ml.start_datafeed(datafeed_id=datafeed_id, body=body, start=start, end=end, timeout=timeout) except elasticsearch.BadRequestError: + # TODO: remove the fallback to '_xpack' path when we drop support for Elasticsearch 6.8 await es.perform_request( method="POST", path=f"/_xpack/ml/datafeeds/{datafeed_id}/_start", @@ -1782,7 +1782,7 @@ async def __call__(self, es, params): try: await es.ml.stop_datafeed(datafeed_id=datafeed_id, force=force, timeout=timeout) except elasticsearch.BadRequestError: - # fallback to old path (ES < 7) + # TODO: remove the fallback to '_xpack' path when we drop support for Elasticsearch 6.8 request_params = { "force": escape(force), } @@ -1812,7 +1812,7 @@ async def __call__(self, es, params): try: await es.ml.put_job(job_id=job_id, body=body) except elasticsearch.BadRequestError: - # fallback to old path (ES < 7) + # TODO: remove the fallback to '_xpack' path when we drop support for Elasticsearch 6.8 await es.perform_request( method="PUT", path=f"/_xpack/ml/anomaly_detectors/{job_id}", @@ -1838,7 +1838,7 @@ async def __call__(self, es, params): try: await es.ml.delete_job(job_id=job_id, force=force, ignore=[404]) except elasticsearch.BadRequestError: - # fallback to old path (ES < 7) + # TODO: remove the fallback to '_xpack' path when we drop support for Elasticsearch 6.8 await es.perform_request( method="DELETE", path=f"/_xpack/ml/anomaly_detectors/{job_id}", @@ -1862,7 +1862,7 @@ async def __call__(self, es, params): try: await es.ml.open_job(job_id=job_id) except elasticsearch.BadRequestError: - # fallback to old path (ES < 7) + # TODO: remove the fallback to '_xpack' path when we drop support for Elasticsearch 6.8 await es.perform_request( method="POST", path=f"/_xpack/ml/anomaly_detectors/{job_id}/_open", @@ -1887,7 +1887,7 @@ async def __call__(self, es, params): try: await es.ml.close_job(job_id=job_id, force=force, timeout=timeout) except elasticsearch.BadRequestError: - # fallback to old path (ES < 7) + # TODO: remove the fallback to '_xpack' path when we drop support for Elasticsearch 6.8 request_params = { "force": escape(force), } From 758c95ca892a00dabe25b69bffa4eb64be79fba1 Mon Sep 17 00:00:00 2001 From: Brad Deam Date: Wed, 1 Mar 2023 11:49:04 +1030 Subject: [PATCH 66/68] Refactor exception handling in client factory's 'wait_for_rest_layer' method --- esrally/client/factory.py | 29 ++++++++++++++++++++--------- tests/client/factory_test.py | 9 +++++++-- 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/esrally/client/factory.py b/esrally/client/factory.py index 769c3b7b4..201fc0e53 100644 --- a/esrally/client/factory.py +++ b/esrally/client/factory.py @@ -256,7 +256,9 @@ def wait_for_rest_layer(es, max_attempts=40): # but this is still better than just checking for any random node's REST API being reachable. expected_node_count = len(es.transport.node_pool) logger = logging.getLogger(__name__) - for attempt in range(max_attempts): + attempt = 0 + while attempt <= max_attempts: + attempt += 1 # pylint: disable=import-outside-toplevel from elastic_transport import ( ApiError, @@ -278,8 +280,11 @@ def wait_for_rest_layer(es, max_attempts=40): "Rally sent an HTTP request to an HTTPS server. Are you sure this is an HTTP endpoint?", e ) - logger.debug("Got serialization error [%s] on attempt [%s]. Sleeping...", e, attempt) - time.sleep(3) + if attempt <= max_attempts: + logger.debug("Got serialization error [%s] on attempt [%s]. Sleeping...", e, attempt) + time.sleep(3) + else: + raise except TlsError as e: raise exceptions.SystemSetupError("Could not connect to cluster via HTTPS. Are you sure this is an HTTPS endpoint?", e) except ConnectionError as e: @@ -288,19 +293,25 @@ def wait_for_rest_layer(es, max_attempts=40): "Received a protocol error. Are you sure you're using the correct scheme (HTTP or HTTPS)?", e ) - logger.debug("Got connection error on attempt [%s]. Sleeping...", attempt) - time.sleep(3) + if attempt <= max_attempts: + logger.debug("Got connection error on attempt [%s]. Sleeping...", attempt) + time.sleep(3) + else: + raise except TransportError as e: - logger.debug("Got transport error on attempt [%s]. Sleeping...", attempt) - time.sleep(3) + if attempt <= max_attempts: + logger.debug("Got transport error on attempt [%s]. Sleeping...", attempt) + time.sleep(3) + else: + raise except ApiError as e: # cluster block, x-pack not initialized yet, our wait condition is not reached - if e.status_code in (503, 401, 408): + if e.status_code in (503, 401, 408) and attempt <= max_attempts: logger.debug("Got status code [%s] on attempt [%s]. Sleeping...", e.message, attempt) time.sleep(3) else: logger.warning("Got unexpected status code [%s] on attempt [%s].", e.message, attempt) - raise e + raise return False diff --git a/tests/client/factory_test.py b/tests/client/factory_test.py index a4c9d42ed..5cb9d0f56 100644 --- a/tests/client/factory_test.py +++ b/tests/client/factory_test.py @@ -471,9 +471,14 @@ def test_retries_on_transport_errors(self, es, sleep): # don't sleep in realtime @mock.patch("time.sleep") @mock.patch("elasticsearch.Elasticsearch") - def test_dont_retry_eternally_on_transport_errors(self, es, sleep): + def test_dont_retry_eternally_on_api_errors(self, es, sleep): es.cluster.health.side_effect = _api_error(401, "Unauthorized") - assert not client.wait_for_rest_layer(es, max_attempts=3) + es.transport.node_pool = ["node_1"] + with pytest.raises(elasticsearch.ApiError, match=r"Unauthorized"): + client.wait_for_rest_layer(es, max_attempts=3) + es.cluster.health.assert_has_calls( + [mock.call(wait_for_nodes=">=1"), mock.call(wait_for_nodes=">=1"), mock.call(wait_for_nodes=">=1")] + ) @mock.patch("elasticsearch.Elasticsearch") def test_ssl_serialization_error(self, es): From b9808fc7950e1f2fd5e514a29455ef4bf1e1b0e5 Mon Sep 17 00:00:00 2001 From: Brad Deam Date: Thu, 2 Mar 2023 12:14:14 +1030 Subject: [PATCH 67/68] Use 'es'snapshot.get' for 'WaitForCurrentSnapshotsCreate' runner --- esrally/driver/runner.py | 21 ++------------------- tests/driver/runner_test.py | 11 +++-------- 2 files changed, 5 insertions(+), 27 deletions(-) diff --git a/esrally/driver/runner.py b/esrally/driver/runner.py index 61d22da0c..bd8167453 100644 --- a/esrally/driver/runner.py +++ b/esrally/driver/runner.py @@ -2053,32 +2053,15 @@ async def __call__(self, es, params): wait_period = params.get("completion-recheck-wait-period", 1) es_info = await es.info() es_version = Version.from_string(es_info["version"]["number"]) - api = es.snapshot.get request_args = {"repository": repository, "snapshot": "_current", "verbose": False} # significantly reduce response size when lots of snapshots have been taken # only available since ES 8.3.0 (https://github.com/elastic/elasticsearch/pull/86269) if (es_version.major, es_version.minor) >= (8, 3): - request_params, headers = self._transport_request_params(params) - headers["Content-Type"] = "application/json" - - request_params["index_names"] = "false" - request_params["verbose"] = "false" - - request_args = { - "method": "GET", - "path": f"_snapshot/{repository}/_current", - "headers": headers, - "params": request_params, - } - - # TODO: Switch to native es.snapshot.get once `index_names` becomes supported in - # `es.snapshot.get` of the elasticsearch-py client and we've upgraded the client in Rally, see: - # https://elasticsearch-py.readthedocs.io/en/latest/api.html#elasticsearch.client.SnapshotClient.get - api = es.perform_request + request_args["index_names"] = False while True: - response = await api(**request_args) + response = await es.snapshot.get(**request_args) if int(response.get("total")) == 0: break diff --git a/tests/driver/runner_test.py b/tests/driver/runner_test.py index 4c97edd84..c34878764 100644 --- a/tests/driver/runner_test.py +++ b/tests/driver/runner_test.py @@ -4159,7 +4159,7 @@ async def test_wait_for_current_snapshots_create_after_8_3_0(self, es): "completion-recheck-wait-period": 0, } - es.perform_request = mock.AsyncMock( + es.snapshot.get = mock.AsyncMock( side_effect=[ { "snapshots": [ @@ -4202,14 +4202,9 @@ async def test_wait_for_current_snapshots_create_after_8_3_0(self, es): r = runner.WaitForCurrentSnapshotsCreate() result = await r(es, task_params) - es.perform_request.assert_awaited_with( - method="GET", - path=f"_snapshot/{repository}/_current", - headers={"Content-Type": "application/json"}, - params={"index_names": "false", "verbose": "false"}, - ) + es.snapshot.get.assert_awaited_with(repository=repository, snapshot="_current", verbose=False, index_names=False) - assert es.perform_request.await_count == 2 + assert es.snapshot.get.await_count == 2 assert result is None From 407b5f5b7a5c54e2c53eea9be48671ae689cca71 Mon Sep 17 00:00:00 2001 From: Brad Deam Date: Thu, 2 Mar 2023 16:04:17 +1030 Subject: [PATCH 68/68] Link to 'elastic_transport' repo in docs --- docs/migrate.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/migrate.rst b/docs/migrate.rst index c09488f0d..5401bfadd 100644 --- a/docs/migrate.rst +++ b/docs/migrate.rst @@ -4,8 +4,8 @@ Migration Guide Migrating to Rally 2.7.1 ------------------------ -Elasticsearch client logs are now captured by the ``elastic_transport`` logger -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Elasticsearch client logs are now captured by the `elastic_transport `_ logger +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Rally migrated to the 8.x version of the ``elasticsearch-py`` library which uses a new logger named ``elastic_transport``. Rally will automatically configure this logger to only emit logs of level ``WARNING`` and above, even if a past Rally version configured logging using the ``~./rally/logging.json`` file without that logger. Snapshot repository plugins are no longer built from source