From eb930bfdbc5cc0db10d85fc0453b663188ccf1df Mon Sep 17 00:00:00 2001 From: Mark Kurtz Date: Wed, 21 Aug 2024 13:02:33 -0400 Subject: [PATCH 1/4] Refactor and Optimize Backend, Scheduler, and Test Suites for Improved Performance and Maintainability (#27) ## Summary Introduces significant refactors and optimizations across the backend, scheduler, and test suites to enhance performance, maintainability, and code clarity. Key changes include modularization of code, simplifying complex components, and expanding unit tests to ensure robust functionality. ## Details - **Backend Enhancements:** - Refactored `base.py` and `openai.py` for better separation of concerns and modularity. - Introduced comprehensive logging and error handling improvements. - Simplified configuration handling by merging `config/base.py` into `config.py`. - **Scheduler Improvements:** - Added `base.py` and optimized `load_generator.py` to handle complex scheduling logic more efficiently. - Removed deprecated methods in `scheduler.py` and transferred necessary logic to new base classes. - **Test Suite Overhaul:** - Expanded and reorganized unit tests, covering new and existing functionality. - Migrated and refactored integration tests for better alignment with the updated codebase. - Introduced more thorough test coverage for backend and scheduler components, ensuring reliability. ## Test Plan - **Automated Testing:** - All existing unit tests have been updated to reflect the changes. - New tests have been added to cover additional edge cases and new functionality. - **Manual Testing:** - Verified that core functionalities of the backend and scheduler work as expected. - Ensured no regressions were introduced by the refactor. ## Unittest Coverage Report ``` Name Stmts Miss Cover Missing -------------------------------------------------------------------------- src/guidellm/__init__.py 3 0 100% src/guidellm/backend/__init__.py 3 0 100% src/guidellm/backend/base.py 76 8 89% 150-155, 183, 197, 219-221 src/guidellm/backend/openai.py 59 7 88% 103, 162-164, 178-180 src/guidellm/config.py 55 0 100% src/guidellm/core/__init__.py 6 0 100% src/guidellm/core/distribution.py 92 0 100% src/guidellm/core/report.py 7 0 100% src/guidellm/core/request.py 10 0 100% src/guidellm/core/result.py 125 2 98% 104, 259 src/guidellm/core/serializable.py 64 1 98% 105 src/guidellm/executor/__init__.py 3 0 100% src/guidellm/executor/base.py 55 0 100% src/guidellm/executor/profile_generator.py 113 16 86% 98, 149-151, 173-175, 207-209, 287-289, 295-297 src/guidellm/logger.py 17 0 100% src/guidellm/main.py 55 55 0% 1-172 src/guidellm/request/__init__.py 5 0 100% src/guidellm/request/base.py 67 0 100% src/guidellm/request/emulated.py 130 3 98% 64, 79, 90 src/guidellm/request/file.py 26 0 100% src/guidellm/request/transformers.py 31 0 100% src/guidellm/scheduler/__init__.py 3 0 100% src/guidellm/scheduler/base.py 138 24 83% 294-328, 334, 352-355 src/guidellm/scheduler/load_generator.py 74 5 93% 104-105, 139, 165, 175 src/guidellm/utils/__init__.py 4 0 100% src/guidellm/utils/injector.py 20 0 100% src/guidellm/utils/text.py 201 22 89% 79-81, 86-88, 155-156, 166, 186, 190-191, 212-213, 245, 249, 304-305, 324, 344, 385, 443 src/guidellm/utils/transformers.py 53 0 100% -------------------------------------------------------------------------- TOTAL 1495 143 90% ``` --- .pre-commit-config.yaml | 2 + pyproject.toml | 30 +- src/guidellm/__init__.py | 5 +- src/guidellm/backend/base.py | 169 +- src/guidellm/backend/openai.py | 193 +- src/guidellm/{config/base.py => config.py} | 83 +- src/guidellm/config/__init__.py | 3 - src/guidellm/core/__init__.py | 15 +- src/guidellm/core/distribution.py | 9 +- src/guidellm/core/request.py | 2 +- src/guidellm/core/result.py | 112 +- src/guidellm/core/serializable.py | 56 +- src/guidellm/executor/__init__.py | 18 +- src/guidellm/executor/base.py | 189 ++ src/guidellm/executor/executor.py | 64 - src/guidellm/executor/profile_generator.py | 426 ++-- src/guidellm/logger.py | 3 +- src/guidellm/main.py | 60 +- src/guidellm/request/__init__.py | 2 +- src/guidellm/request/base.py | 36 +- src/guidellm/request/emulated.py | 454 ++-- src/guidellm/request/file.py | 112 +- src/guidellm/request/transformers.py | 114 +- src/guidellm/scheduler/__init__.py | 4 +- src/guidellm/scheduler/base.py | 355 +++ src/guidellm/scheduler/load_generator.py | 193 +- src/guidellm/scheduler/scheduler.py | 236 -- src/guidellm/utils/__init__.py | 43 +- src/guidellm/utils/constants.py | 26 - src/guidellm/utils/injector.py | 38 +- src/guidellm/utils/text.py | 455 ++++ src/guidellm/utils/transformers.py | 151 ++ tests/conftest.py | 31 - tests/dummy/data/pride_and_prejudice.txt | 2015 +++++++++++++++++ tests/dummy/data/transformers.py | 50 + tests/e2e/{core => cli}/__init__.py | 0 tests/{unit => e2e}/cli/conftest.py | 0 .../cli/test_application_entrypoint.py | 7 +- .../{unit => e2e}/cli/test_main_validation.py | 1 + tests/integration/backend/__init__.py | 0 .../backend/test_openai_backend_submit.py | 71 - tests/integration/executor/__init__.py | 0 tests/integration/executor/conftest.py | 36 - .../executor/test_report_generation.py | 179 -- tests/integration/request/__init__.py | 0 tests/integration/request/test_base.py | 23 - tests/integration/test_guidellm.py | 8 + tests/unit/backend/test_base.py | 199 +- tests/unit/backend/test_openai_backend.py | 328 ++- tests/unit/cli/__init__.py | 0 tests/unit/config/__init__.py | 0 tests/unit/conftest.py | 51 +- tests/unit/core/test_distribution.py | 48 +- tests/unit/core/test_report.py | 5 +- tests/unit/core/test_request.py | 21 +- tests/unit/core/test_result.py | 187 +- tests/unit/core/test_serializable.py | 87 +- tests/unit/executor/test_base.py | 278 +++ tests/unit/executor/test_executor.py | 86 - ...test_fixed_rate_profile_generation_mode.py | 49 - tests/unit/executor/test_profile_generator.py | 340 +-- .../test_sweep_profile_generation_mode.py | 45 - tests/unit/request/test_base.py | 114 +- tests/unit/request/test_emulated.py | 365 +++ tests/unit/request/test_file.py | 98 + tests/unit/request/test_transformers.py | 95 + tests/unit/scheduler/conftest.py | 15 - tests/unit/scheduler/test_base.py | 249 ++ tests/unit/scheduler/test_basics.py | 50 - tests/unit/scheduler/test_constant_mode.py | 60 - tests/unit/scheduler/test_load_generator.py | 153 ++ tests/unit/scheduler/test_poission_mode.py | 63 - tests/unit/scheduler/test_sync_mode.py | 62 - .../{config/test_base.py => test_config.py} | 3 +- tests/unit/test_logger.py | 9 +- tests/unit/utils/test_injector.py | 58 +- tests/unit/utils/test_text.py | 394 ++++ tests/unit/utils/test_transformers.py | 236 ++ tox.ini | 2 +- 79 files changed, 7429 insertions(+), 2400 deletions(-) rename src/guidellm/{config/base.py => config.py} (55%) delete mode 100644 src/guidellm/config/__init__.py create mode 100644 src/guidellm/executor/base.py delete mode 100644 src/guidellm/executor/executor.py create mode 100644 src/guidellm/scheduler/base.py delete mode 100644 src/guidellm/scheduler/scheduler.py delete mode 100644 src/guidellm/utils/constants.py create mode 100644 src/guidellm/utils/text.py create mode 100644 src/guidellm/utils/transformers.py delete mode 100644 tests/conftest.py create mode 100644 tests/dummy/data/pride_and_prejudice.txt create mode 100644 tests/dummy/data/transformers.py rename tests/e2e/{core => cli}/__init__.py (100%) rename tests/{unit => e2e}/cli/conftest.py (100%) rename tests/{unit => e2e}/cli/test_application_entrypoint.py (93%) rename tests/{unit => e2e}/cli/test_main_validation.py (99%) delete mode 100644 tests/integration/backend/__init__.py delete mode 100644 tests/integration/backend/test_openai_backend_submit.py delete mode 100644 tests/integration/executor/__init__.py delete mode 100644 tests/integration/executor/conftest.py delete mode 100644 tests/integration/executor/test_report_generation.py delete mode 100644 tests/integration/request/__init__.py delete mode 100644 tests/integration/request/test_base.py create mode 100644 tests/integration/test_guidellm.py delete mode 100644 tests/unit/cli/__init__.py delete mode 100644 tests/unit/config/__init__.py create mode 100644 tests/unit/executor/test_base.py delete mode 100644 tests/unit/executor/test_executor.py delete mode 100644 tests/unit/executor/test_fixed_rate_profile_generation_mode.py delete mode 100644 tests/unit/executor/test_sweep_profile_generation_mode.py create mode 100644 tests/unit/request/test_emulated.py create mode 100644 tests/unit/request/test_file.py create mode 100644 tests/unit/request/test_transformers.py delete mode 100644 tests/unit/scheduler/conftest.py create mode 100644 tests/unit/scheduler/test_base.py delete mode 100644 tests/unit/scheduler/test_basics.py delete mode 100644 tests/unit/scheduler/test_constant_mode.py create mode 100644 tests/unit/scheduler/test_load_generator.py delete mode 100644 tests/unit/scheduler/test_poission_mode.py delete mode 100644 tests/unit/scheduler/test_sync_mode.py rename tests/unit/{config/test_base.py => test_config.py} (98%) create mode 100644 tests/unit/utils/test_text.py create mode 100644 tests/unit/utils/test_transformers.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a7be4d9..4bd9dc5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,6 +18,7 @@ repos: # main dependencies click, datasets, + ftfy, loguru, numpy, openai, @@ -30,6 +31,7 @@ repos: # dev dependencies pytest, pydantic_settings, + requests-mock, # types types-click, diff --git a/pyproject.toml b/pyproject.toml index 6d07637..4d54edc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,7 @@ urls = { homepage = "https://github.com/neuralmagic/guidellm" } dependencies = [ "click", "datasets", + "ftfy>=6.0.0", "loguru", "numpy", "openai", @@ -41,13 +42,16 @@ dependencies = [ dev = [ # general and configurations "pre-commit~=3.5.0", + "scipy~=1.10", "sphinx~=7.1.2", "tox~=4.16.0", # testing "pytest~=8.2.2", + "pytest-asyncio~=0.23.8", "pytest-cov~=5.0.0", "pytest-mock~=3.14.0", + "pytest-rerunfailures~=14.0", "requests-mock~=1.12.1", # code quality @@ -83,7 +87,7 @@ profile = "black" files = ["src/guidellm", "tests"] python_version = '3.8' warn_redundant_casts = true -warn_unused_ignores = true +warn_unused_ignores = false show_error_codes = true namespace_packages = true exclude = ["venv", ".tox"] @@ -92,22 +96,27 @@ exclude = ["venv", ".tox"] # Check: https://mypy.readthedocs.io/en/latest/config_file.html#import-discovery follow_imports = 'silent' -[[tool.mypy.overrides]] -module = ["transformers.*", "datasets.*"] -ignore_missing_imports=true - [tool.ruff] line-length = 88 +indent-width = 4 exclude = ["build", "dist", "env", ".venv"] -lint.ignore = [ + +[tool.ruff.format] +quote-style = "double" +indent-style = "space" + +[tool.ruff.lint] +ignore = [ "PLR0913", "TCH001", "COM812", "ISC001", "TCH002", + "PLW1514", # allow Path.open without encoding + ] -lint.select = [ +select = [ # Rules reference: https://docs.astral.sh/ruff/rules/ # Code Style / Formatting @@ -127,11 +136,11 @@ lint.select = [ "Q", # flake8-quotes: enforces consistent use of single or double quotes "TCH", # flake8-type-checking: enforces type checking practices and standards "TID", # flake8-tidy-imports: enforces tidy and well-organized imports + "RUF022", # flake8-ruff: enforce sorting of __all__ in modules # Code Structure / Complexity "C4", # flake8-comprehensions: improves readability and performance of list, set, and dict comprehensions "C90", # mccabe: checks for overly complex code using cyclomatic complexity - "FBT", # flake8-boolean-trap: prevents the use of boolean traps in function arguments and calls "ISC", # flake8-implicit-str-concat: prevents implicit string concatenation "PIE", # flake8-pie: identifies and corrects common code inefficiencies and mistakes "R", # Refactor: suggests improvements to code structure and readability @@ -164,7 +173,6 @@ lint.select = [ "tests/**/*.py" = [ "S101", # asserts allowed in tests "ARG", # Unused function args allowed in tests - "FBT", # Booleans as positional arguments in tests, e.g. via @pytest.mark.parametrize() "PLR2004", # Magic value used in comparison "TCH002", # No import only type checking in tests "SLF001", # enable private member access in tests @@ -173,8 +181,12 @@ lint.select = [ "PT011", # allow generic exceptions in tests "N806", # allow uppercase variable names in tests "PGH003", # allow general ignores in tests + "S106", # allow hardcoded passwords in tests ] +[tool.ruff.lint.isort] +known-first-party = ["guidellm", "tests"] + [tool.pytest.ini_options] addopts = '-s -vvv --cache-clear' diff --git a/src/guidellm/__init__.py b/src/guidellm/__init__.py index 25747ff..de2a220 100644 --- a/src/guidellm/__init__.py +++ b/src/guidellm/__init__.py @@ -3,4 +3,7 @@ evaluating and benchmarking large language models (LLMs). """ -from .logger import configure_logger, logger # noqa: F401 +from .config import settings +from .logger import configure_logger, logger + +__all__ = ["configure_logger", "logger", "settings"] diff --git a/src/guidellm/backend/base.py b/src/guidellm/backend/base.py index 0c11e5b..a5fc35e 100644 --- a/src/guidellm/backend/base.py +++ b/src/guidellm/backend/base.py @@ -1,35 +1,39 @@ import functools from abc import ABC, abstractmethod -from dataclasses import dataclass -from enum import Enum -from typing import Dict, Iterator, List, Optional, Type +from typing import AsyncGenerator, Dict, List, Literal, Optional, Type from loguru import logger +from pydantic import BaseModel from guidellm.core import TextGenerationRequest, TextGenerationResult __all__ = ["Backend", "BackendEngine", "GenerativeResponse"] -class BackendEngine(str, Enum): - """ - Determines the Engine of the LLM Backend. - All the implemented backends in the project have the engine. - - NOTE: the `TEST` engine has to be used only for testing purposes. - """ +BackendEngine = Literal["test", "openai_server"] - TEST = "test" - OPENAI_SERVER = "openai_server" - -@dataclass -class GenerativeResponse: +class GenerativeResponse(BaseModel): """ - A dataclass to represent a response from a generative AI backend. + A model representing a response from a generative AI backend. + + :param type_: The type of response, either 'token_iter' for intermediate + token output or 'final' for the final result. + :type type_: Literal["token_iter", "final"] + :param add_token: The token to add to the output + (only applicable if type_ is 'token_iter'). + :type add_token: Optional[str] + :param prompt: The original prompt sent to the backend. + :type prompt: Optional[str] + :param output: The final generated output (only applicable if type_ is 'final'). + :type output: Optional[str] + :param prompt_token_count: The number of tokens in the prompt. + :type prompt_token_count: Optional[int] + :param output_token_count: The number of tokens in the output. + :type output_token_count: Optional[int] """ - type_: str # One of 'token_iter', 'final' + type_: Literal["token_iter", "final"] add_token: Optional[str] = None prompt: Optional[str] = None output: Optional[str] = None @@ -39,7 +43,14 @@ class GenerativeResponse: class Backend(ABC): """ - An abstract base class with template methods for generative AI backends. + Abstract base class for generative AI backends. + + This class provides a common interface for creating and interacting with different + generative AI backends. Subclasses should implement the abstract methods to + define specific backend behavior. + + :cvar _registry: A dictionary that maps BackendEngine types to backend classes. + :type _registry: Dict[BackendEngine, Type[Backend]] """ _registry: Dict[BackendEngine, "Type[Backend]"] = {} @@ -50,11 +61,14 @@ def register(cls, backend_type: BackendEngine): A decorator to register a backend class in the backend registry. :param backend_type: The type of backend to register. - :type backend_type: BackendType + :type backend_type: BackendEngine + :return: The decorated backend class. + :rtype: Type[Backend] """ def inner_wrapper(wrapped_class: Type["Backend"]): cls._registry[backend_type] = wrapped_class + logger.info("Registered backend type: {}", backend_type) return wrapped_class return inner_wrapper @@ -62,21 +76,23 @@ def inner_wrapper(wrapped_class: Type["Backend"]): @classmethod def create(cls, backend_type: BackendEngine, **kwargs) -> "Backend": """ - Factory method to create a backend based on the backend type. + Factory method to create a backend instance based on the backend type. :param backend_type: The type of backend to create. - :type backend_type: BackendType + :type backend_type: BackendEngine :param kwargs: Additional arguments for backend initialization. :type kwargs: dict :return: An instance of a subclass of Backend. :rtype: Backend + :raises ValueError: If the backend type is not registered. """ - logger.info(f"Creating backend of type {backend_type}") + logger.info("Creating backend of type {}", backend_type) if backend_type not in cls._registry: - logger.error(f"Unsupported backend type: {backend_type}") - raise ValueError(f"Unsupported backend type: {backend_type}") + err = ValueError(f"Unsupported backend type: {backend_type}") + logger.error("{}", err) + raise err return Backend._registry[backend_type](**kwargs) @@ -87,82 +103,119 @@ def default_model(self) -> str: :return: The default model. :rtype: str + :raises ValueError: If no models are available. """ return _cachable_default_model(self) - def submit(self, request: TextGenerationRequest) -> TextGenerationResult: + async def submit(self, request: TextGenerationRequest) -> TextGenerationResult: """ - Submit a result request and populate the BenchmarkResult. + Submit a text generation request and return the result. - :param request: The result request to submit. + This method handles the request submission to the backend and processes + the response in a streaming fashion if applicable. + + :param request: The request object containing the prompt + and other configurations. :type request: TextGenerationRequest - :return: The populated result result. + :return: The result of the text generation request. :rtype: TextGenerationResult + :raises ValueError: If no response is received from the backend. """ - logger.info(f"Submitting request with prompt: {request.prompt}") + logger.debug("Submitting request with prompt: {}", request.prompt) - result = TextGenerationResult( - request=TextGenerationRequest(prompt=request.prompt), - ) + result = TextGenerationResult(request=request) result.start(request.prompt) + received_final = False - for response in self.make_request(request): # GenerativeResponse - if response.type_ == "token_iter" and response.add_token: - result.output_token(response.add_token) + async for response in self.make_request(request): + logger.debug("Received response: {}", response) + if response.type_ == "token_iter": + result.output_token(response.add_token if response.add_token else "") elif response.type_ == "final": + if received_final: + err = ValueError( + "Received multiple final responses from the backend." + ) + logger.error(err) + raise err + result.end( + output=response.output, prompt_token_count=response.prompt_token_count, output_token_count=response.output_token_count, ) + received_final = True + else: + err = ValueError( + f"Invalid response received from the backend of type: " + f"{response.type_} for {response}" + ) + logger.error(err) + raise err - logger.info(f"Request completed with output: {result.output}") + if not received_final: + err = ValueError("No final response received from the backend.") + logger.error(err) + raise err + + logger.info("Request completed with output: {}", result.output) return result @abstractmethod - def make_request( + async def make_request( self, request: TextGenerationRequest, - ) -> Iterator[GenerativeResponse]: + ) -> AsyncGenerator[GenerativeResponse, None]: """ Abstract method to make a request to the backend. - :param request: The result request to submit. + Subclasses must implement this method to define how requests are handled + by the backend. + + :param request: The request object containing the prompt and + other configurations. :type request: TextGenerationRequest - :return: An iterator over the generative responses. - :rtype: Iterator[GenerativeResponse] + :yield: A generator yielding responses from the backend. + :rtype: AsyncGenerator[GenerativeResponse, None] """ - raise NotImplementedError + yield None # type: ignore # noqa: PGH003 @abstractmethod def available_models(self) -> List[str]: """ Abstract method to get the available models for the backend. + Subclasses must implement this method to provide the list of models + supported by the backend. + :return: A list of available models. :rtype: List[str] - """ - raise NotImplementedError - - @abstractmethod - def model_tokenizer(self, model: str) -> Optional[str]: - """ - Abstract method to get the tokenizer for a model. - - :param model: The model to get the tokenizer for. - :type model: str - :return: The tokenizer for the model, or None if it cannot be created. - :rtype: Optional[str] + :raises NotImplementedError: If the method is not implemented by a subclass. """ raise NotImplementedError @functools.lru_cache(maxsize=1) def _cachable_default_model(backend: Backend) -> str: - if models := backend.available_models(): - logger.debug(f"Default model: {models[0]}") + """ + Get the default model for a backend using LRU caching. + + This function caches the default model to optimize repeated lookups. + + :param backend: The backend instance for which to get the default model. + :type backend: Backend + :return: The default model. + :rtype: str + :raises ValueError: If no models are available. + """ + logger.debug("Getting default model for backend: {}", backend) + models = backend.available_models() + if models: + logger.debug("Default model: {}", models[0]) return models[0] - logger.error("No models available.") - raise ValueError("No models available.") + err = ValueError("No models available.") + logger.error(err) + raise err diff --git a/src/guidellm/backend/openai.py b/src/guidellm/backend/openai.py index af91709..3bebb7e 100644 --- a/src/guidellm/backend/openai.py +++ b/src/guidellm/backend/openai.py @@ -1,36 +1,35 @@ -from typing import Any, Dict, Generator, List, Optional +from typing import AsyncGenerator, Dict, List, Optional -import openai from loguru import logger -from openai import OpenAI, Stream -from openai.types import Completion -from transformers import AutoTokenizer +from openai import AsyncOpenAI, OpenAI -from guidellm.backend import Backend, BackendEngine, GenerativeResponse +from guidellm.backend.base import Backend, GenerativeResponse from guidellm.config import settings from guidellm.core import TextGenerationRequest __all__ = ["OpenAIBackend"] -@Backend.register(BackendEngine.OPENAI_SERVER) +@Backend.register("openai_server") class OpenAIBackend(Backend): """ - An OpenAI backend implementation for the generative AI result. + An OpenAI backend implementation for generative AI results. + This class provides an interface to communicate with the + OpenAI server for generating responses based on given prompts. + + :param openai_api_key: The API key for OpenAI. + If not provided, it will default to the key from settings. + :type openai_api_key: Optional[str] :param target: The target URL string for the OpenAI server. - :type target: str + :type target: Optional[str] :param host: Optional host for the OpenAI server. :type host: Optional[str] :param port: Optional port for the OpenAI server. :type port: Optional[int] - :param path: Optional path for the OpenAI server. - :type path: Optional[str] :param model: The OpenAI model to use, defaults to the first available model. :type model: Optional[str] - :param api_key: The OpenAI API key to use. - :type api_key: Optional[str] - :param request_args: Optional arguments for the OpenAI request. + :param request_args: Additional arguments for the OpenAI request. :type request_args: Dict[str, Any] """ @@ -43,125 +42,139 @@ def __init__( model: Optional[str] = None, **request_args, ): - """ - Initialize an OpenAI Client - """ - - self.request_args = request_args - - if not (_api_key := (openai_api_key or settings.openai.api_key)): - raise ValueError( - "`GUIDELLM__OPENAI__API_KEY` environment variable " - "or --openai-api-key CLI parameter " - "must be specify for the OpenAI backend", - ) + self._request_args: Dict = request_args + api_key: str = openai_api_key or settings.openai.api_key - if target is not None: + if target: base_url = target elif host and port: base_url = f"{host}:{port}" - elif settings.openai.base_url is not None: + elif settings.openai.base_url: base_url = settings.openai.base_url else: - raise ValueError( - "`GUIDELLM__OPENAI__BASE_URL` environment variable " - "or --target CLI parameter must be specified for the OpenAI backend." + err = ValueError( + "`GUIDELLM__OPENAI__BASE_URL` environment variable or " + "--target CLI parameter must be specified for the OpenAI backend." ) + logger.error("{}", err) + raise err - self.openai_client = OpenAI(api_key=_api_key, base_url=base_url) - self.model = model or self.default_model + self._async_client = AsyncOpenAI(api_key=api_key, base_url=base_url) + self._client = OpenAI(api_key=api_key, base_url=base_url) - logger.info("OpenAI {} Backend listening on {}", self.model, target) + self.validate_connection() + self._model = model or self.default_model - def make_request( + logger.info("OpenAI {} Backend listening on {}", self._model, base_url) + + @property + def model(self) -> str: + """ + Get the model used by this backend. + + :return: The model name. + :rtype: str + """ + return self._model + + async def make_request( self, request: TextGenerationRequest, - ) -> Generator[GenerativeResponse, None, None]: + ) -> AsyncGenerator[GenerativeResponse, None]: """ Make a request to the OpenAI backend. - :param request: The result request to submit. + This method sends a prompt to the OpenAI backend and streams + the response tokens back. + + :param request: The text generation request to submit. :type request: TextGenerationRequest - :return: An iterator over the generative responses. - :rtype: Iterator[GenerativeResponse] + :yield: A stream of GenerativeResponse objects. + :rtype: AsyncGenerator[GenerativeResponse, None] """ - logger.debug(f"Making request to OpenAI backend with prompt: {request.prompt}") + logger.debug("Making request to OpenAI backend with prompt: {}", request.prompt) - # How many completions to generate for each prompt - request_args: Dict = {"n": 1} + request_args: Dict = { + "n": 1, # Number of completions for each prompt + } - num_gen_tokens: int = ( - request.params.get("generated_tokens", None) - or settings.openai.max_gen_tokens - ) - request_args.update({"max_tokens": num_gen_tokens, "stop": None}) + if request.output_token_count is not None: + request_args.update( + { + "max_tokens": request.output_token_count, + "stop": None, + } + ) + elif settings.openai.max_gen_tokens and settings.openai.max_gen_tokens > 0: + request_args.update( + { + "max_tokens": settings.openai.max_gen_tokens, + } + ) - if self.request_args: - request_args.update(self.request_args) + request_args.update(self._request_args) - response: Stream[Completion] = self.openai_client.completions.create( + stream = await self._async_client.chat.completions.create( model=self.model, - prompt=request.prompt, + messages=[ + {"role": "system", "content": request.prompt}, + ], stream=True, **request_args, ) + token_count = 0 + async for chunk in stream: + choice = chunk.choices[0] + token = choice.delta.content or "" - for chunk in response: - chunk_content: str = getattr(chunk, "content", "") - - if getattr(chunk, "stop", True) is True: - logger.debug("Received final response from OpenAI backend") - + if choice.finish_reason is not None: yield GenerativeResponse( type_="final", - prompt=getattr(chunk, "prompt", request.prompt), - prompt_token_count=( - request.prompt_token_count or self._token_count(request.prompt) - ), - output_token_count=(self._token_count(chunk_content)), + prompt=request.prompt, + prompt_token_count=request.prompt_token_count, + output_token_count=token_count, ) - else: - logger.debug("Received token from OpenAI backend") - yield GenerativeResponse(type_="token_iter", add_token=chunk_content) + break + + token_count += 1 + yield GenerativeResponse( + type_="token_iter", + add_token=token, + prompt=request.prompt, + prompt_token_count=request.prompt_token_count, + output_token_count=token_count, + ) def available_models(self) -> List[str]: """ Get the available models for the backend. + This method queries the OpenAI API to retrieve a list of available models. + :return: A list of available models. :rtype: List[str] + :raises openai.OpenAIError: If an error occurs while retrieving models. """ try: - models: List[str] = [ - model.id for model in self.openai_client.models.list().data - ] - except openai.NotFoundError as error: - logger.error("No available models for OpenAI Backend") + return [model.id for model in self._client.models.list().data] + except Exception as error: + logger.error("Failed to retrieve available models: {}", error) raise error - else: - logger.info(f"Available models: {models}") - return models - def model_tokenizer(self, model: str) -> Optional[Any]: + def validate_connection(self): """ - Get the tokenizer for a model. + Validate the connection to the OpenAI backend. + + This method checks that the OpenAI backend is reachable and + the API key is valid. - :param model: The model to get the tokenizer for. - :type model: str - :return: The tokenizer for the model, or None if it cannot be created. - :rtype: Optional[Any] + :raises openai.OpenAIError: If the connection is invalid. """ + try: - tokenizer = AutoTokenizer.from_pretrained(model) - logger.info(f"Tokenizer created for model: {model}") - return tokenizer - except Exception as err: # noqa: BLE001 - logger.warning(f"Could not create tokenizer for model {model}: {err}") - return None - - def _token_count(self, text: str) -> int: - token_count = len(text.split()) - logger.debug(f"Token count for text '{text}': {token_count}") - return token_count + self._client.models.list() + except Exception as error: + logger.error("Failed to validate OpenAI connection: {}", error) + raise error diff --git a/src/guidellm/config/base.py b/src/guidellm/config.py similarity index 55% rename from src/guidellm/config/base.py rename to src/guidellm/config.py index 0cfab32..fc15eff 100644 --- a/src/guidellm/config/base.py +++ b/src/guidellm/config.py @@ -1,16 +1,19 @@ from enum import Enum -from typing import Optional +from typing import Dict, List, Optional -from pydantic import BaseModel, model_validator +from pydantic import BaseModel, Field, model_validator from pydantic_settings import BaseSettings, SettingsConfigDict __all__ = [ - "settings", - "Settings", + "DatasetSettings", + "EmulatedDataSettings", "Environment", "LoggingSettings", "OpenAISettings", "ReportGenerationSettings", + "Settings", + "reload_settings", + "settings", ] @@ -45,6 +48,49 @@ class LoggingSettings(BaseModel): log_file_level: Optional[str] = None +class DatasetSettings(BaseModel): + """ + Dataset settings for the application + """ + + preferred_data_columns: List[str] = Field( + default_factory=lambda: [ + "prompt", + "instruction", + "input", + "inputs", + "question", + "context", + "text", + "content", + "body", + "data", + ] + ) + preferred_data_splits: List[str] = Field( + default_factory=lambda: ["test", "tst", "validation", "val", "train"] + ) + default_tokenizer: str = "neuralmagic/Meta-Llama-3.1-8B-FP8" + + +class EmulatedDataSettings(BaseModel): + """ + Emulated data settings for the application to use + """ + + source: str = "https://www.gutenberg.org/files/1342/1342-0.txt" + filter_start: str = "It is a truth universally acknowledged, that a" + filter_end: str = "CHISWICK PRESS:--CHARLES WHITTINGHAM AND CO." + clean_text_args: Dict[str, bool] = Field( + default_factory=lambda: { + "fix_encoding": True, + "clean_whitespace": True, + "remove_empty_lines": True, + "force_new_line_punctuation": True, + } + ) + + class OpenAISettings(BaseModel): """ OpenAI settings for the application to connect to the API @@ -52,7 +98,7 @@ class OpenAISettings(BaseModel): """ # OpenAI API key. - api_key: str = "invalid" + api_key: str = "" # OpenAI-compatible server URL # NOTE: The default value is default address of llama.cpp web server @@ -62,7 +108,13 @@ class OpenAISettings(BaseModel): class ReportGenerationSettings(BaseModel): + """ + Report generation settings for the application + """ + source: str = "" + report_html_match: str = "window.report_data = {};" + report_html_placeholder: str = "{}" class Settings(BaseSettings): @@ -76,7 +128,6 @@ class Settings(BaseSettings): export GUIDELLM__LOGGING__DISABLED=true export GUIDELLM__OPENAI__API_KEY=****** ``` - """ model_config = SettingsConfigDict( @@ -87,11 +138,21 @@ class Settings(BaseSettings): env_file=".env", ) + # general settings env: Environment = Environment.PROD request_timeout: int = 30 - + max_concurrency: int = 128 + num_sweep_profiles: int = 10 logging: LoggingSettings = LoggingSettings() + + # Data settings + dataset: DatasetSettings = DatasetSettings() + emulated_data: EmulatedDataSettings = EmulatedDataSettings() + + # Request settings openai: OpenAISettings = OpenAISettings() + + # Report settings report_generation: ReportGenerationSettings = ReportGenerationSettings() @model_validator(mode="after") @@ -104,3 +165,11 @@ def set_default_source(cls, values): settings = Settings() + + +def reload_settings(): + """ + Reload the settings from the environment variables + """ + new_settings = Settings() + settings.__dict__.update(new_settings.__dict__) diff --git a/src/guidellm/config/__init__.py b/src/guidellm/config/__init__.py deleted file mode 100644 index 47544ef..0000000 --- a/src/guidellm/config/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .base import settings - -__all__ = ["settings"] diff --git a/src/guidellm/core/__init__.py b/src/guidellm/core/__init__.py index 9ada6c8..e738aa7 100644 --- a/src/guidellm/core/__init__.py +++ b/src/guidellm/core/__init__.py @@ -8,16 +8,17 @@ TextGenerationError, TextGenerationResult, ) -from .serializable import Serializable +from .serializable import Serializable, SerializableFileType __all__ = [ "Distribution", - "TextGenerationRequest", - "TextGenerationResult", - "TextGenerationError", - "TextGenerationBenchmark", - "TextGenerationBenchmarkReport", + "GuidanceReport", "RequestConcurrencyMeasurement", "Serializable", - "GuidanceReport", + "SerializableFileType", + "TextGenerationBenchmark", + "TextGenerationBenchmarkReport", + "TextGenerationError", + "TextGenerationRequest", + "TextGenerationResult", ] diff --git a/src/guidellm/core/distribution.py b/src/guidellm/core/distribution.py index d2fc075..fb9b12c 100644 --- a/src/guidellm/core/distribution.py +++ b/src/guidellm/core/distribution.py @@ -23,6 +23,9 @@ class Distribution(Serializable): def __str__(self): return f"Distribution({self.describe()})" + def __len__(self): + return len(self.data) + @property def mean(self) -> float: """ @@ -103,7 +106,7 @@ def percentiles(self, percentiles: List[float]) -> List[float]: logger.warning("No data points available to calculate percentiles.") return [0.0] * len(percentiles) - percentiles_values = np.percentile(self.data, percentiles).tolist() + percentiles_values: List[float] = np.percentile(self.data, percentiles).tolist() # type: ignore # noqa: PGH003 logger.debug(f"Calculated percentiles {percentiles}: {percentiles_values}") return percentiles_values @@ -117,7 +120,7 @@ def min(self) -> float: logger.warning("No data points available to calculate minimum.") return 0.0 - min_value = np.min(self.data) + min_value: float = np.min(self.data) logger.debug(f"Calculated min: {min_value}") return min_value @@ -131,7 +134,7 @@ def max(self) -> float: logger.warning("No data points available to calculate maximum.") return 0.0 - max_value = np.max(self.data) + max_value: float = np.max(self.data) logger.debug(f"Calculated max: {max_value}") return max_value diff --git a/src/guidellm/core/request.py b/src/guidellm/core/request.py index 83cfaca..133d12e 100644 --- a/src/guidellm/core/request.py +++ b/src/guidellm/core/request.py @@ -20,7 +20,7 @@ class TextGenerationRequest(Serializable): default=None, description="The number of tokens in the input prompt.", ) - generate_token_count: Optional[int] = Field( + output_token_count: Optional[int] = Field( default=None, description="The number of tokens to generate.", ) diff --git a/src/guidellm/core/result.py b/src/guidellm/core/result.py index 0743f0a..4b04551 100644 --- a/src/guidellm/core/result.py +++ b/src/guidellm/core/result.py @@ -1,5 +1,5 @@ from time import time -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, List, Literal, Optional, Union from loguru import logger from pydantic import Field @@ -9,11 +9,11 @@ from guidellm.core.serializable import Serializable __all__ = [ - "TextGenerationResult", - "TextGenerationError", + "RequestConcurrencyMeasurement", "TextGenerationBenchmark", "TextGenerationBenchmarkReport", - "RequestConcurrencyMeasurement", + "TextGenerationError", + "TextGenerationResult", ] @@ -98,10 +98,15 @@ def output_token(self, token: str): :param token: The decoded token. :type token: str """ - current_counter = time() + self._check_recording_started() + + if self.last_time is None: + raise ValueError( + "last time is not specified. " + "Did you call `text_generation_benchmark.start()`?" + ) - if not self.last_time: - raise ValueError("Last time is not specified to get the output token.") + current_counter = time() if not self.first_token_set: self.first_token_time = current_counter - self.last_time @@ -113,7 +118,7 @@ def output_token(self, token: str): logger.debug(f"Token '{token}' decoded in {decode_time} seconds") self.last_time = current_counter - self.output += f"{token} " + self.output += token logger.debug("Added token {} to output", token) def end( @@ -134,6 +139,7 @@ def end( defaults to word count. :type output_token_count: Optional[int] """ + self._check_recording_started() self.end_time = time() if output: @@ -147,26 +153,13 @@ def end( def _check_recording_started( self, - raise_exception: bool = True, # noqa: FBT001, FBT002 - ) -> bool: - """ - Ensure that the benchmark text generation recording is started. - - We can assume that if the `self._start_time` exist, - then the `start()` has been called. - """ - - if self.start_time is not None: - return True - - if raise_exception is True: + ): + if self.start_time is None: raise ValueError( "start time is not specified. " "Did you make the `text_generation_benchmark.start()`?", ) - return False - class TextGenerationError(Serializable): """ @@ -203,7 +196,9 @@ class TextGenerationBenchmark(Serializable): This is a set of results and errors for a specific mode and rate. """ - mode: str = Field(description="The generation mode, either 'async' or 'sync'.") + mode: Literal["asynchronous", "synchronous", "throughput"] = Field( + description="The generation mode, one of 'async', 'sync', or 'throughput'." + ) rate: Optional[float] = Field( default=None, description="The requested rate of requests per second.", @@ -260,41 +255,43 @@ def completed_request_rate(self) -> float: if not self.results: return 0.0 - if not self.results[0].start_time or not self.results[-1].end_time: + if self.results[0].start_time is None or self.results[-1].end_time is None: raise ValueError("Start time and End time are not defined") - return self.request_count / ( - self.results[-1].end_time - self.results[0].start_time - ) + time_diff = self.results[-1].end_time - self.results[0].start_time + + return len(self.results) / time_diff @property def overloaded(self) -> bool: - if not self.results or not self.concurrencies: - raise ValueError("No results or concurrencies to check for overload.") - - if self.rate is None or len(self.concurrencies) < 2: # noqa: PLR2004 + if ( + self.rate is None + or not self.results + or not self.concurrencies + or len(self.concurrencies) < 2 # noqa: PLR2004 + ): # if rate was not set, sync mode is assumed, # or we have less than 2 data points, # then we cannot be overloaded by definition return False - # if the calculated rate is less than 60% of the requested rate, + # if the calculated rate is less than 75% of the requested rate, # safe to assume the system is overloaded - return self.completed_request_rate < 0.60 * self.rate + return self.completed_request_rate < 0.75 * self.rate def request_started(self): """ Record the start of a generation request. """ if not self.concurrencies: - self.concurrencies.append( + self.concurrencies = [ RequestConcurrencyMeasurement( time=time(), completed=0, errored=0, processing=1, ), - ) + ] else: last = self.concurrencies[-1] self.concurrencies.append( @@ -318,33 +315,34 @@ def request_completed( :param result: The completed result or error. :type result: Union[TextGenerationResult, TextGenerationError] """ + if not self.concurrencies: + raise ValueError("Request completed without starting") + if isinstance(result, TextGenerationError): + is_error = True self.errors.append(result) - last = self.concurrencies[-1] - self.concurrencies.append( - RequestConcurrencyMeasurement( - time=time(), - completed=last.completed, - errored=last.errored + 1, - processing=last.processing - 1, - ), - ) logger.warning( - f"Text generation request resulted in error: {result.message}", + "Text generation request resulted in error: {}", + result.message, ) else: + if not result.start_time or not result.end_time: + raise ValueError("Start time and End time are not defined") + + is_error = False self.results.append(result) - last = self.concurrencies[-1] - self.concurrencies.append( - RequestConcurrencyMeasurement( - time=time(), - completed=last.completed + 1, - errored=last.errored, - processing=last.processing - 1, - ), - ) logger.info("Text generation request completed successfully: {}", result) + last = self.concurrencies[-1] + self.concurrencies.append( + RequestConcurrencyMeasurement( + time=time(), + completed=last.completed + (not is_error), + errored=last.errored + is_error, + processing=last.processing - 1, + ) + ) + class TextGenerationBenchmarkReport(Serializable): """ @@ -357,8 +355,8 @@ class TextGenerationBenchmarkReport(Serializable): default_factory=list, description="The benchmarks of text generation requests.", ) - args: List[Dict[str, Any]] = Field( - default_factory=list, + args: Dict[str, Any] = Field( + default_factory=dict, description="The arguments used for the benchmarks.", ) diff --git a/src/guidellm/core/serializable.py b/src/guidellm/core/serializable.py index 2d81ebb..1e6b294 100644 --- a/src/guidellm/core/serializable.py +++ b/src/guidellm/core/serializable.py @@ -1,6 +1,5 @@ -from enum import Enum from pathlib import Path -from typing import Any, Union +from typing import Any, Literal, Union, get_args import yaml from loguru import logger @@ -9,13 +8,7 @@ __all__ = ["Serializable", "SerializableFileType"] -class SerializableFileType(Enum): - """ - Enum class for file types supported by Serializable. - """ - - YAML = "yaml" - JSON = "json" +SerializableFileType = Literal["yaml", "json"] class Serializable(BaseModel): @@ -86,7 +79,7 @@ def from_json(cls, data: str): def save_file( self, path: Union[str, Path], - type_: SerializableFileType = SerializableFileType.YAML, + type_: SerializableFileType = "yaml", ) -> str: """ Save the model to a file in either YAML or JSON format. @@ -107,28 +100,32 @@ def save_file( if path.suffix: # is a file - ext = path.suffix[1:].upper() - if ext not in SerializableFileType.__members__: + ext = path.suffix[1:].lower() + if type_ not in get_args(SerializableFileType): raise ValueError( - f"Unsupported file extension: {ext}. " - f"Expected one of {', '.join(SerializableFileType.__members__)}) " + f"Unsupported file extension: {type_}. " + f"Expected one of {SerializableFileType} " f"for {path}" ) - type_ = SerializableFileType[ext] + type_ = ext # type: ignore # noqa: PGH003 else: # is a directory - file_name = f"{self.__class__.__name__.lower()}.{type_.value.lower()}" + file_name = f"{self.__class__.__name__.lower()}.{type_}" path = path / file_name path.parent.mkdir(parents=True, exist_ok=True) with path.open("w") as file: - if type_ == SerializableFileType.YAML: + if type_ == "yaml": file.write(self.to_yaml()) - elif type_ == SerializableFileType.JSON: + elif type_ == "json": file.write(self.to_json()) else: - raise ValueError(f"Unsupported file format: {type_}") + raise ValueError( + f"Unsupported file extension: {type_}" + f"Expected one of {SerializableFileType} " + f"for {path}" + ) logger.info("Successfully saved {} to {}", self.__class__.__name__, path) @@ -153,25 +150,20 @@ def load_file(cls, path: Union[str, Path]): if not path.is_file(): raise ValueError(f"Path is not a file: {path}") - extension = path.suffix[1:].upper() - - if extension not in SerializableFileType.__members__: - raise ValueError( - f"Unsupported file extension: {extension}. " - f"Expected one of {', '.join(SerializableFileType.__members__)}) " - f"for {path}" - ) - - type_ = SerializableFileType[extension] + extension = path.suffix[1:].lower() with path.open() as file: data = file.read() - if type_ == SerializableFileType.YAML: + if extension == "yaml": obj = cls.from_yaml(data) - elif type_ == SerializableFileType.JSON: + elif extension == "json": obj = cls.from_json(data) else: - raise ValueError(f"Unsupported file format: {type_}") + raise ValueError( + f"Unsupported file extension: {extension}" + f"Expected one of {SerializableFileType} " + f"for {path}" + ) return obj diff --git a/src/guidellm/executor/__init__.py b/src/guidellm/executor/__init__.py index 7faa2a8..715363c 100644 --- a/src/guidellm/executor/__init__.py +++ b/src/guidellm/executor/__init__.py @@ -1,21 +1,9 @@ -from .executor import Executor -from .profile_generator import ( - RATE_TYPE_TO_LOAD_GEN_MODE_MAPPER, - RATE_TYPE_TO_PROFILE_MODE_MAPPER, - FixedRateProfileGenerator, - Profile, - ProfileGenerationMode, - ProfileGenerator, - SweepProfileGenerator, -) +from .base import Executor +from .profile_generator import Profile, ProfileGenerationMode, ProfileGenerator __all__ = [ - "RATE_TYPE_TO_LOAD_GEN_MODE_MAPPER", - "RATE_TYPE_TO_PROFILE_MODE_MAPPER", "Executor", - "ProfileGenerationMode", "Profile", + "ProfileGenerationMode", "ProfileGenerator", - "FixedRateProfileGenerator", - "SweepProfileGenerator", ] diff --git a/src/guidellm/executor/base.py b/src/guidellm/executor/base.py new file mode 100644 index 0000000..d6979a0 --- /dev/null +++ b/src/guidellm/executor/base.py @@ -0,0 +1,189 @@ +from dataclasses import dataclass +from typing import AsyncGenerator, List, Optional, Union + +from loguru import logger + +from guidellm.backend import Backend +from guidellm.core import TextGenerationBenchmarkReport +from guidellm.executor.profile_generator import ProfileGenerationMode, ProfileGenerator +from guidellm.request import RequestGenerator +from guidellm.scheduler import Scheduler, SchedulerResult + +__all__ = ["Executor", "ExecutorResult"] + + +@dataclass +class ExecutorResult: + """ + Data class representing the result of executing tasks in the Executor. + + :param completed: Indicates whether all tasks have completed. + :type completed: bool + :param count_total: Total number of profiles. + :type count_total: int + :param count_completed: Number of completed profiles. + :type count_completed: int + :param report: A benchmark report for text generation. + :type report: TextGenerationBenchmarkReport + :param scheduler_result: Optional scheduler result for the last task. + :type scheduler_result: Optional[SchedulerResult] + """ + + completed: bool + count_total: int + count_completed: int + report: TextGenerationBenchmarkReport + scheduler_result: Optional[SchedulerResult] = None + + +class Executor: + """ + The Executor class manages the execution of tasks based on a given profile + generation mode and rate. It orchestrates the interaction between the backend, + request generator, and profile generator, and runs benchmarks accordingly. + + :param backend: The backend to run tasks against. + :type backend: Backend + :param request_generator: The generator that creates requests for execution. + :type request_generator: RequestGenerator + :param mode: The mode for profile generation (e.g., sweep, synchronous). + :type mode: ProfileGenerationMode + :param rate: The list of rates for load generation, or None. + :type rate: Optional[List[float]] + :param max_number: Maximum number of requests to generate for the scheduler + (a single benchmark run), or None. + :type max_number: Optional[int] + :param max_duration: Maximum duration for generating requests for the scheduler, + (a single benchmark run), or None. + :type max_duration: Optional[float] + """ + + def __init__( + self, + backend: Backend, + request_generator: RequestGenerator, + mode: ProfileGenerationMode = "sweep", + rate: Optional[Union[float, List[float]]] = None, + max_number: Optional[int] = None, + max_duration: Optional[float] = None, + ): + self._backend = backend + self._generator = request_generator + self._max_number = max_number + self._max_duration = max_duration + self._profile_generator = ProfileGenerator(mode=mode, rate=rate) + logger.info("Executor initialized with mode: {}, rate: {}", mode, rate) + + @property + def backend(self) -> Backend: + """ + Returns the backend being used by the Executor. + + :return: Backend + :rtype: Backend + """ + return self._backend + + @property + def request_generator(self) -> RequestGenerator: + """ + Returns the request generator used by the Executor. + + :return: RequestGenerator + :rtype: RequestGenerator + """ + return self._generator + + @property + def profile_generator(self) -> ProfileGenerator: + """ + Returns the profile generator for generating profiles during execution. + + :return: ProfileGenerator + :rtype: ProfileGenerator + """ + return self._profile_generator + + @property + def max_number(self) -> Optional[int]: + """ + Returns the maximum number of requests to generate. + + :return: Maximum number of requests or None. + :rtype: Optional[int] + """ + return self._max_number + + @property + def max_duration(self) -> Optional[float]: + """ + Returns the maximum duration for generating requests. + + :return: Maximum duration in seconds or None. + :rtype: Optional[float] + """ + return self._max_duration + + async def run(self) -> AsyncGenerator[ExecutorResult, None]: + """ + Runs the Executor, generating and scheduling tasks based on the profile + generation mode. Yields results incrementally. + + :rtype: AsyncGenerator[ExecutorResult, None] + """ + report = TextGenerationBenchmarkReport() + report.args = { + "mode": self.profile_generator.mode, + "rate": self.profile_generator.rates, + "max_number": self.max_number, + "max_duration": self.max_duration, + } + logger.info("Starting Executor run") + + yield ExecutorResult( + completed=False, + count_total=len(self.profile_generator), + count_completed=0, + report=report, + ) + + while profile := self.profile_generator.next(report): + logger.debug("Generated profile: {}", profile) + scheduler = Scheduler( + generator=self.request_generator, + worker=self.backend, + mode=profile.load_gen_mode, + rate=profile.load_gen_rate, + max_number=self.max_number, + max_duration=self.max_duration, + ) + + logger.info( + "Scheduling tasks with mode: {}, rate: {}", + profile.load_gen_mode, + profile.load_gen_rate, + ) + + async for scheduler_result in scheduler.run(): + if scheduler_result.completed: + report.add_benchmark(scheduler_result.benchmark) + logger.debug( + "Benchmark added for scheduler result: {}", + scheduler_result.benchmark, + ) + + yield ExecutorResult( + completed=False, + count_total=len(self.profile_generator), + count_completed=len(report.benchmarks), + report=report, + scheduler_result=scheduler_result, + ) + + logger.info("Executor run completed") + yield ExecutorResult( + completed=True, + count_total=len(self.profile_generator), + count_completed=len(report.benchmarks), + report=report, + ) diff --git a/src/guidellm/executor/executor.py b/src/guidellm/executor/executor.py deleted file mode 100644 index e2b14c3..0000000 --- a/src/guidellm/executor/executor.py +++ /dev/null @@ -1,64 +0,0 @@ -from typing import Any, Dict, Optional - -from guidellm.backend import Backend -from guidellm.core import TextGenerationBenchmark, TextGenerationBenchmarkReport -from guidellm.request import RequestGenerator -from guidellm.scheduler import Scheduler - -from .profile_generator import ProfileGenerationMode, ProfileGenerator - -__all__ = ["Executor"] - - -class Executor: - """ - The main purpose of the `class Executor` is to dispatch running tasks according - to the Profile Generation mode - """ - - def __init__( - self, - backend: Backend, - request_generator: RequestGenerator, - profile_mode: ProfileGenerationMode = ProfileGenerationMode.SWEEP, - profile_args: Optional[Dict[str, Any]] = None, - max_requests: Optional[int] = None, - max_duration: Optional[float] = None, - ): - self.request_generator = request_generator - self.backend = backend - self.profile_generator: ProfileGenerator = ProfileGenerator.create( - profile_mode, - **(profile_args or {}), - ) - self.max_requests: Optional[int] = max_requests - self.max_duration: Optional[float] = max_duration - self._scheduler: Optional[Scheduler] = None - - @property - def scheduler(self) -> Scheduler: - if self._scheduler is None: - raise ValueError("The scheduler is not set. Did you run the execution?") - - return self._scheduler - - def run(self) -> TextGenerationBenchmarkReport: - report = TextGenerationBenchmarkReport() - - while True: - if not (profile := self.profile_generator.next(report)): - break - - scheduler = Scheduler( - request_generator=self.request_generator, - backend=self.backend, - load_gen_mode=profile.load_gen_mode, - load_gen_rate=profile.load_gen_rate, - max_requests=self.max_requests, - max_duration=self.max_duration, - ) - - benchmark: TextGenerationBenchmark = scheduler.run() - report.add_benchmark(benchmark) - - return report diff --git a/src/guidellm/executor/profile_generator.py b/src/guidellm/executor/profile_generator.py index 275237a..8a36db0 100644 --- a/src/guidellm/executor/profile_generator.py +++ b/src/guidellm/executor/profile_generator.py @@ -1,178 +1,314 @@ -from abc import ABC, abstractmethod -from dataclasses import dataclass -from enum import Enum -from typing import Dict, List, Optional, Type, Union +from typing import Dict, List, Literal, Optional, Union, get_args import numpy as np +from loguru import logger -from guidellm.core import TextGenerationBenchmarkReport +from guidellm.config import settings +from guidellm.core import TextGenerationBenchmark, TextGenerationBenchmarkReport +from guidellm.core.serializable import Serializable from guidellm.scheduler import LoadGenerationMode __all__ = [ - "ProfileGenerationMode", "Profile", + "ProfileGenerationMode", "ProfileGenerator", - "FixedRateProfileGenerator", - "SweepProfileGenerator", ] -RATE_TYPE_TO_LOAD_GEN_MODE_MAPPER = { - "synchronous": LoadGenerationMode.SYNCHRONOUS, - "constant": LoadGenerationMode.CONSTANT, - "poisson": LoadGenerationMode.POISSON, -} - - -class ProfileGenerationMode(Enum): - FIXED_RATE = "fixed_rate" - SWEEP = "sweep" +ProfileGenerationMode = Literal[ + "sweep", "synchronous", "throughput", "constant", "poisson" +] -RATE_TYPE_TO_PROFILE_MODE_MAPPER = { - "synchronous": ProfileGenerationMode.FIXED_RATE, - "constant": ProfileGenerationMode.FIXED_RATE, - "poisson": ProfileGenerationMode.FIXED_RATE, - "sweep": ProfileGenerationMode.SWEEP, -} +class Profile(Serializable): + """ + A data class representing a profile for load generation. + :param load_gen_mode: The mode of load generation (e.g., constant, poisson). + :type load_gen_mode: LoadGenerationMode + :param load_gen_rate: The rate of load generation, if applicable. + :type load_gen_rate: Optional[float] + """ -@dataclass -class Profile: load_gen_mode: LoadGenerationMode - load_gen_rate: Optional[float] + load_gen_rate: Optional[float] = None -class ProfileGenerator(ABC): - _registry: Dict[ProfileGenerationMode, "Type[ProfileGenerator]"] = {} +class ProfileGenerator: + """ + Generates profiles based on different load generation modes. - @staticmethod - def register(mode: ProfileGenerationMode): - def inner_wrapper(wrapped_class): - ProfileGenerator._registry[mode] = wrapped_class - return wrapped_class - - return inner_wrapper - - @staticmethod - def create(mode: ProfileGenerationMode, **kwargs) -> "ProfileGenerator": - if mode not in ProfileGenerator._registry: - raise ValueError(f"Invalid profile generation mode: {mode}") + :param mode: The mode for profile generation (e.g., sweep, synchronous). + :type mode: ProfileGenerationMode + :param rate: The rate(s) for load generation; could be a float or list of floats. + :type rate: Optional[Union[float, List[float]]] + """ - return ProfileGenerator._registry[mode](**kwargs) - - def __init__(self, mode: Union[str, ProfileGenerationMode]): - self._mode = ProfileGenerationMode(mode) - - @abstractmethod - def next(self, current_report: TextGenerationBenchmarkReport) -> Optional[Profile]: - """ """ - - -@ProfileGenerator.register(ProfileGenerationMode.FIXED_RATE) -class FixedRateProfileGenerator(ProfileGenerator): def __init__( self, - load_gen_mode: Optional[LoadGenerationMode], - rates: Optional[List[float]] = None, - **kwargs, # noqa: RET505, ARG002 + mode: ProfileGenerationMode, + rate: Optional[Union[float, List[float]]] = None, ): - super().__init__(ProfileGenerationMode.FIXED_RATE) - if load_gen_mode == LoadGenerationMode.SYNCHRONOUS and rates and len(rates) > 0: - raise ValueError("custom rates are not supported in synchronous mode") - - self._rates: Optional[List[float]] = rates - self._load_gen_mode = load_gen_mode - self._generated: bool = False - self._rate_index: int = 0 - - def next(self, _: TextGenerationBenchmarkReport) -> Optional[Profile]: - if self._load_gen_mode == LoadGenerationMode.SYNCHRONOUS: - if self._generated: - return None - self._generated = True - return Profile( - load_gen_mode=LoadGenerationMode.SYNCHRONOUS, - load_gen_rate=None, + if mode not in get_args(ProfileGenerationMode): + err = ValueError( + f"{mode} is not a valid Profile Generation Mode. " + f"Valid options are {get_args(ProfileGenerationMode)}" ) + logger.error(err) + raise err + + self._mode = mode + + if self._mode in ("sweep", "throughput", "synchronous"): + if rate is not None: + err = ValueError(f"Rates are not applicable for {self._mode} mode") + logger.error(err) + raise err + self._rates = None + else: + if not rate: + err = ValueError(f"Rates are required for {self._mode} mode") + logger.error(err) + raise err + self._rates = rate if isinstance(rate, list) else [rate] + + for rt in self._rates: + if rt <= 0: + err = ValueError( + f"Rate must be > 0 for mode: {self._mode}. Given: {rt}" + ) + logger.error(err) + raise err + + self._generated_count = 0 + + def __len__(self) -> int: + """ + Returns the number of profiles to generate based on the mode and rates. + + :return: The number of profiles. + :rtype: int + """ + if self._mode == "sweep": + return settings.num_sweep_profiles + + if self._mode in ("throughput", "synchronous"): + return 1 + + if not self._rates: + raise ValueError(f"Rates are required for {self._mode} mode") + + return len(self._rates) + + @property + def mode(self) -> ProfileGenerationMode: + """ + Returns the current mode of profile generation. + + :return: The profile generation mode. + :rtype: ProfileGenerationMode + """ + return self._mode + + @property + def rates(self) -> Optional[List[float]]: + """ + Returns the list of rates for load generation, if any. + + :return: List of rates or None if not applicable. + :rtype: Optional[List[float]] + """ + return self._rates + + @property + def generated_count(self) -> int: + """ + Returns the current count of generated profiles. + + :return: The current count of generated profiles. + :rtype: int + """ + return self._generated_count - if self._load_gen_mode in { - LoadGenerationMode.CONSTANT, - LoadGenerationMode.POISSON, - }: - if not self._rates: - raise ValueError( - "rates must be provided for constant and poisson modes" - ) - - if self._rate_index >= len(self._rates): - return None - current_rate = self._rates[self._rate_index] - self._rate_index += 1 - return Profile( - load_gen_mode=self._load_gen_mode, - load_gen_rate=current_rate, + def next(self, current_report: TextGenerationBenchmarkReport) -> Optional[Profile]: + """ + Generates the next profile based on the current mode and report. + + :param current_report: The current benchmark report. + :type current_report: TextGenerationBenchmarkReport + :return: The generated profile or None if no more profiles. + :rtype: Optional[Profile] + """ + logger.debug( + "Generating the next profile with mode: {}, current report: {}", + self.mode, + current_report, + ) + + if self.mode in ["constant", "poisson"]: + if not self.rates: + err = ValueError(f"Rates are required for {self.mode} mode") + logger.error(err) + raise err + + profile = self.create_fixed_rate_profile( + self.generated_count, + self.mode, + self.rates, ) + elif self.mode == "synchronous": + profile = self.create_synchronous_profile(self.generated_count) + elif self.mode == "throughput": + profile = self.create_throughput_profile(self.generated_count) + elif self.mode == "sweep": + profile = self.create_sweep_profile( + self.generated_count, + sync_benchmark=current_report.benchmarks[0] + if current_report.benchmarks + else None, + throughput_benchmark=current_report.benchmarks[1] + if len(current_report.benchmarks) > 1 + else None, + ) + else: + err = ValueError(f"Invalid mode: {self.mode}") + logger.error(err) + raise err + + self._generated_count += 1 + logger.info( + "Generated profile: {}, total generated count: {}", + profile, + self._generated_count, + ) + return profile - raise ValueError(f"Invalid rate type: {self._load_gen_mode}") - - -@ProfileGenerator.register(ProfileGenerationMode.SWEEP) -class SweepProfileGenerator(ProfileGenerator): - def __init__( - self, - **kwargs, # noqa: RET505, ARG002 - ): - super().__init__(ProfileGenerationMode.SWEEP) - self._sync_run = False - self._max_found = False - self._pending_rates = None - - def next(self, current_report: TextGenerationBenchmarkReport) -> Optional[Profile]: - if not self._sync_run: - self._sync_run = True + @staticmethod + def create_fixed_rate_profile( + index: int, mode: ProfileGenerationMode, rates: List[float] + ) -> Optional[Profile]: + """ + Creates a profile with a fixed rate. + + :param index: The index of the rate in the list. + :type index: int + :param mode: The mode for profile generation (e.g., constant, poisson). + :type mode: ProfileGenerationMode + :param rates: The list of rates for load generation. + :type rates: List[float] + :return: The generated profile or None if index is out of range. + :rtype: Optional[Profile] + """ + modes_map: Dict[str, LoadGenerationMode] = { + "constant": "constant", + "poisson": "poisson", + } + + if mode not in modes_map: + err = ValueError(f"Invalid mode: {mode}") + logger.error(err) + raise err + + profile = ( + Profile( + load_gen_mode=modes_map[mode], + load_gen_rate=rates[index], + ) + if index < len(rates) + else None + ) + logger.debug("Created fixed rate profile: {}", profile) + return profile - return Profile( - load_gen_mode=LoadGenerationMode.SYNCHRONOUS, + @staticmethod + def create_synchronous_profile(index: int) -> Optional[Profile]: + """ + Creates a profile with synchronous mode. + + :param index: The index of the profile to create. + :type index: int + :return: The generated profile or None if index is out of range. + :rtype: Optional[Profile] + """ + profile = ( + Profile( + load_gen_mode="synchronous", load_gen_rate=None, ) + if index < 1 + else None + ) + logger.debug("Created synchronous profile: {}", profile) + return profile - if not self._max_found: - # check if we've found the maximum rate based on the last result - # if not, double the rate; if so, set the flag to fill in missing data - last_benchmark = current_report.benchmarks[-1] - - if not last_benchmark.overloaded: - last_rate = ( - last_benchmark.rate - if last_benchmark.rate - else last_benchmark.completed_request_rate - ) - return Profile( - load_gen_mode=LoadGenerationMode.CONSTANT, - load_gen_rate=last_rate * 2, - ) - - self._max_found = True - first_benchmark = current_report.benchmarks[0] - - min_rate = ( - first_benchmark.rate - if first_benchmark.rate - else first_benchmark.completed_request_rate - ) - max_rate = ( - last_benchmark.rate - if last_benchmark.rate - else last_benchmark.completed_request_rate + @staticmethod + def create_throughput_profile(index: int) -> Optional[Profile]: + """ + Creates a profile with throughput mode. + + :param index: The index of the profile to create. + :type index: int + :return: The generated profile or None if index is out of range. + :rtype: Optional[Profile] + """ + profile = ( + Profile( + load_gen_mode="throughput", + load_gen_rate=None, ) + if index < 1 + else None + ) + logger.debug("Created throughput profile: {}", profile) + return profile - self._pending_rates = list(np.linspace(min_rate, max_rate, 10)) - - if self._pending_rates: - rate = self._pending_rates.pop(0) - return Profile( - load_gen_mode=LoadGenerationMode.CONSTANT, - load_gen_rate=rate, + @staticmethod + def create_sweep_profile( + index: int, + sync_benchmark: Optional[TextGenerationBenchmark], + throughput_benchmark: Optional[TextGenerationBenchmark], + ) -> Optional[Profile]: + """ + Creates a profile with sweep mode, generating profiles between + synchronous and throughput benchmarks. + + :param index: The index of the profile to create. + :type index: int + :param sync_benchmark: The synchronous benchmark data. + :type sync_benchmark: Optional[TextGenerationBenchmark] + :param throughput_benchmark: The throughput benchmark data. + :type throughput_benchmark: Optional[TextGenerationBenchmark] + :return: The generated profile or None if index is out of range. + :rtype: Optional[Profile] + """ + if index == 0: + return ProfileGenerator.create_synchronous_profile(0) + + if not sync_benchmark: + err = ValueError("Synchronous benchmark is required for sweep mode") + logger.error(err) + raise err + + if index == 1: + return ProfileGenerator.create_throughput_profile(0) + + if not throughput_benchmark: + err = ValueError("Throughput benchmark is required for sweep mode") + logger.error(err) + raise err + + min_rate = sync_benchmark.completed_request_rate + max_rate = throughput_benchmark.completed_request_rate + intermediate_rates = list( + np.linspace(min_rate, max_rate, settings.num_sweep_profiles) + ) + + profile = ( + Profile( + load_gen_mode="constant", + load_gen_rate=intermediate_rates[index - 1], ) - - return None + if index < len(intermediate_rates) + else None + ) + logger.debug("Created sweep profile: {}", profile) + return profile diff --git a/src/guidellm/logger.py b/src/guidellm/logger.py index fb79f9f..f26966c 100644 --- a/src/guidellm/logger.py +++ b/src/guidellm/logger.py @@ -38,8 +38,7 @@ from loguru import logger -from guidellm.config import settings -from guidellm.config.base import LoggingSettings +from guidellm.config import LoggingSettings, settings __all__ = ["configure_logger", "logger"] diff --git a/src/guidellm/main.py b/src/guidellm/main.py index 72085c1..d754d6a 100644 --- a/src/guidellm/main.py +++ b/src/guidellm/main.py @@ -1,13 +1,11 @@ +import asyncio + import click from loguru import logger from guidellm.backend import Backend -from guidellm.core import GuidanceReport -from guidellm.executor import ( - RATE_TYPE_TO_LOAD_GEN_MODE_MAPPER, - RATE_TYPE_TO_PROFILE_MODE_MAPPER, - Executor, -) +from guidellm.core import GuidanceReport, TextGenerationBenchmarkReport +from guidellm.executor import Executor from guidellm.logger import configure_logger from guidellm.request import ( EmulatedRequestGenerator, @@ -48,7 +46,7 @@ ) @click.option( "--rate-type", - type=click.Choice(["sweep", "synchronous", "constant", "poisson"]), + type=click.Choice(["sweep", "synchronous", "throughput", "constant", "poisson"]), default="synchronous", help="Type of rate generation for benchmarking", ) @@ -94,7 +92,7 @@ def main( output_path, ): # Create backend - _backend = Backend.create( + backend = Backend.create( backend_type=backend, target=target, host=host, @@ -113,7 +111,7 @@ def main( config=data, tokenizer=tokenizer ) elif data_type == "file": - request_generator = FileRequestGenerator(file_path=data, tokenizer=tokenizer) + request_generator = FileRequestGenerator(path=data, tokenizer=tokenizer) elif data_type == "transformers": request_generator = TransformersDatasetRequestGenerator( dataset=data, tokenizer=tokenizer @@ -121,24 +119,26 @@ def main( else: raise ValueError(f"Unknown data type: {data_type}") - profile_mode = RATE_TYPE_TO_PROFILE_MODE_MAPPER.get(rate_type) - load_gen_mode = RATE_TYPE_TO_LOAD_GEN_MODE_MAPPER.get(rate_type) - - if not profile_mode or not load_gen_mode: - raise ValueError("Invalid rate type") - - # Create executor executor = Executor( + backend=backend, request_generator=request_generator, - backend=_backend, - profile_mode=profile_mode, - profile_args={"load_gen_mode": load_gen_mode, "rates": rate}, - max_requests=max_requests, + mode=rate_type, + rate=rate, + max_number=max_requests, max_duration=max_seconds, ) - - logger.debug("Running the executor") - report = executor.run() + logger.debug( + "Running executor with args: {}", + { + "backend": backend, + "request_generator": request_generator, + "mode": rate_type, + "rate": rate, + "max_number": max_requests, + "max_duration": max_seconds, + }, + ) + report = asyncio.run(_run_executor_for_result(executor)) # Save or print results guidance_report = GuidanceReport() @@ -149,6 +149,20 @@ def main( print(guidance_report) # noqa: T201 +async def _run_executor_for_result(executor: Executor) -> TextGenerationBenchmarkReport: + report = None + + async for result in executor.run(): + if result.completed: + report = result.report + break + + if not report: + raise ValueError("No report generated by executor") + + return report + + if __name__ == "__main__": # invoke logger setup on import with default values # enabling console logging with INFO and disabling file logging diff --git a/src/guidellm/request/__init__.py b/src/guidellm/request/__init__.py index ae46041..95fe230 100644 --- a/src/guidellm/request/__init__.py +++ b/src/guidellm/request/__init__.py @@ -4,9 +4,9 @@ from .transformers import TransformersDatasetRequestGenerator __all__ = [ - "RequestGenerator", "EmulatedConfig", "EmulatedRequestGenerator", "FileRequestGenerator", + "RequestGenerator", "TransformersDatasetRequestGenerator", ] diff --git a/src/guidellm/request/base.py b/src/guidellm/request/base.py index 48e90ff..9a897bb 100644 --- a/src/guidellm/request/base.py +++ b/src/guidellm/request/base.py @@ -3,13 +3,22 @@ import time from abc import ABC, abstractmethod from queue import Empty, Full, Queue -from typing import Iterator, Optional, Union +from typing import Iterator, Literal, Optional, Union from loguru import logger -from transformers import AutoTokenizer, PreTrainedTokenizer +from transformers import ( # type: ignore # noqa: PGH003 + AutoTokenizer, + PreTrainedTokenizer, +) +from guidellm.config import settings from guidellm.core.request import TextGenerationRequest +__all__ = ["GenerationMode", "RequestGenerator"] + + +GenerationMode = Literal["async", "sync"] + class RequestGenerator(ABC): """ @@ -19,7 +28,7 @@ class RequestGenerator(ABC): for tokenizing prompts. :type tokenizer: Union[str, PreTrainedTokenizer] :param mode: The generation mode, either 'async' or 'sync'. - :type mode: str + :type mode: GenerationMode :param async_queue_size: The size of the request queue. :type async_queue_size: int """ @@ -27,7 +36,7 @@ class RequestGenerator(ABC): def __init__( self, tokenizer: Optional[Union[str, PreTrainedTokenizer]] = None, - mode: str = "async", + mode: GenerationMode = "async", async_queue_size: int = 50, ): self._async_queue_size: int = async_queue_size @@ -35,16 +44,20 @@ def __init__( self._queue: Queue = Queue(maxsize=async_queue_size) self._stop_event: threading.Event = threading.Event() - if tokenizer is not None: + if not tokenizer: + self._tokenizer = AutoTokenizer.from_pretrained( + settings.dataset.default_tokenizer + ) + logger.info("Initialized fake tokenizer for request generation") + else: self._tokenizer = ( AutoTokenizer.from_pretrained(tokenizer) if isinstance(tokenizer, str) else tokenizer ) - logger.info("Tokenizer initialized: {}", self._tokenizer) - else: - self._tokenizer = None - logger.debug("No tokenizer provided") + logger.info( + "Tokenizer initialized for request generation: {}", self._tokenizer + ) if self._mode == "async": self._thread = threading.Thread(target=self._populate_queue, daemon=True) @@ -82,18 +95,19 @@ def __iter__(self) -> Iterator[TextGenerationRequest]: self._queue.task_done() yield item except Empty: + time.sleep(0.01) continue else: while not self._stop_event.is_set(): yield self.create_item() @property - def tokenizer(self) -> Optional[PreTrainedTokenizer]: + def tokenizer(self) -> PreTrainedTokenizer: """ Get the tokenizer instance. :return: The tokenizer instance. - :rtype: Optional[PreTrainedTokenizer] + :rtype: PreTrainedTokenizer """ return self._tokenizer diff --git a/src/guidellm/request/emulated.py b/src/guidellm/request/emulated.py index 24bb27e..b1d7f42 100644 --- a/src/guidellm/request/emulated.py +++ b/src/guidellm/request/emulated.py @@ -1,28 +1,96 @@ import json -import re -import unicodedata +import math from dataclasses import dataclass from pathlib import Path from typing import Dict, List, Optional, Tuple, Union import numpy as np -import requests from loguru import logger -from transformers import PreTrainedTokenizer +from transformers import PreTrainedTokenizer # type: ignore # noqa: PGH003 from guidellm.config import settings from guidellm.core.request import TextGenerationRequest -from guidellm.request.base import RequestGenerator +from guidellm.request.base import GenerationMode, RequestGenerator +from guidellm.utils import clean_text, filter_text, load_text, split_text -__all__ = ["EmulatedConfig", "EmulatedRequestGenerator"] +__all__ = ["EmulatedConfig", "EmulatedRequestGenerator", "EndlessTokens"] @dataclass class EmulatedConfig: """ - A dataclass to represent the configuration for emulated requests. + Configuration for emulated text generation requests. + + Args: + prompt_tokens (int): Number of prompt tokens. + prompt_tokens_variance (Optional[int]): Variance for prompt tokens. + prompt_tokens_min (Optional[int]): Minimum number of prompt tokens. + prompt_tokens_max (Optional[int]): Maximum number of prompt tokens. + generated_tokens (Optional[int]): Number of generated tokens. + generated_tokens_variance (Optional[int]): Variance for generated tokens. + generated_tokens_min (Optional[int]): Minimum number of generated tokens. + generated_tokens_max (Optional[int]): Maximum number of generated tokens. """ + @staticmethod + def create_config(config: Union[str, Path, Dict]) -> "EmulatedConfig": + """ + Create an EmulatedConfig instance from a configuration source. + + :param config: Configuration source, can be a dictionary, JSON string, + key=value string, or file path. + :type config: Union[str, Path, Dict] + :return: An instance of EmulatedConfig. + :rtype: EmulatedConfig + :raises FileNotFoundError: If the configuration file is not found. + :raises ValueError: If the configuration format is invalid. + """ + if isinstance(config, dict): + logger.debug("Loading configuration from dict: {}", config) + return EmulatedConfig(**config) + + if isinstance(config, Path) or ( + isinstance(config, str) and (config.endswith(".json") or "{" in config) + ): + logger.debug("Loading configuration from json: {}", config) + + if isinstance(config, str) and "{" in config: + json_text = config.strip() + else: + if isinstance(config, str): + config = Path(config) + + if not config.exists(): + raise FileNotFoundError(f"Configuration file not found: {config}") + + json_text = config.read_text(encoding="utf-8") + + json_dict = json.loads(json_text) + + return EmulatedConfig(**json_dict) + + if isinstance(config, str) and "=" in config: + logger.debug("Loading configuration from csv string: {}", config) + items = config.split(",") + config_dict = {} + for item in items: + key_value = item.strip().split("=") + if len(key_value) != 2: # noqa: PLR2004 + raise ValueError(f"Unexpected format for item: {item}") + key = key_value[0].strip() + value = ( + int(key_value[1].strip()) + if key_value[1].isnumeric() + else key_value[1] + ) + config_dict[key] = value + + return EmulatedConfig(**config_dict) # type: ignore # noqa: PGH003 + + raise ValueError( + f"Invalid configuration given for creation of EmulatedConfig: {config}" + ) + prompt_tokens: int prompt_tokens_variance: Optional[int] = None prompt_tokens_min: Optional[int] = None @@ -33,177 +101,281 @@ class EmulatedConfig: generated_tokens_min: Optional[int] = None generated_tokens_max: Optional[int] = None + @property + def prompt_tokens_range(self) -> Tuple[int, int]: + """ + Get the range (min, max) of prompt tokens to generate. + + :return: The range of prompt tokens. + :rtype: Tuple[int, int] + """ + return self._token_range( + self.prompt_tokens, + self.prompt_tokens_variance, + self.prompt_tokens_min, + self.prompt_tokens_max, + ) + + @property + def output_tokens_range(self) -> Tuple[int, int]: + """ + Get the range (min, max) of output tokens to generate. + + :return: The range of generated tokens. + :rtype: Tuple[int, int] + """ + if not self.generated_tokens: + return 0, 0 + + return self._token_range( + self.generated_tokens, + self.generated_tokens_variance, + self.generated_tokens_min, + self.generated_tokens_max, + ) + + def sample_prompt_tokens(self, rng: np.random.Generator) -> int: + """ + Sample the number of prompt tokens to generate. + + :param rng: The random number generator to use. + :type rng: np.random.Generator + :return: The number of prompt tokens to create. + :rtype: int + """ + return self._sample_tokens( + self.prompt_tokens, + self.prompt_tokens_variance, + self.prompt_tokens_min, + self.prompt_tokens_max, + rng, + ) + + def sample_output_tokens(self, rng: np.random.Generator) -> Optional[int]: + """ + Sample the number of output tokens to generate. + + :param rng: The random number generator to use. + :type rng: np.random.Generator + :return: The number of output tokens to generate. + :rtype: Optional[int] + """ + if not self.generated_tokens: + return None + + return self._sample_tokens( + self.generated_tokens, + self.generated_tokens_variance, + self.generated_tokens_min, + self.generated_tokens_max, + rng, + ) + + @staticmethod + def _sample_tokens( + base: int, + variance: Optional[int], + min_tokens: Optional[int], + max_tokens: Optional[int], + rng: np.random.Generator, + ) -> int: + min_tokens, max_tokens = EmulatedConfig._token_range( + base, variance, min_tokens, max_tokens + ) + + if min_tokens == max_tokens: + return min_tokens + + if not variance: + return rng.integers(min_tokens, max_tokens + 1) + + rand = rng.normal(base, math.sqrt(variance)) + + return int(min(max(rand, min_tokens), max_tokens)) + + @staticmethod + def _token_range( + base: int, + variance: Optional[int], + min_tokens: Optional[int], + max_tokens: Optional[int], + ) -> Tuple[int, int]: + if not variance: + return ( + min_tokens or base, + max_tokens or base, + ) + + min_tokens = min_tokens if min_tokens and min_tokens > 0 else 1 + max_tokens = ( + max_tokens if max_tokens and max_tokens > base else base + 5 * variance + ) + + return min_tokens, max_tokens + + +class EndlessTokens(List[str]): + """ + A list subclass that allows for endless data generation. + """ + + def __init__( + self, + data: Union[str, Path], + filter_start: Optional[Union[str, int]] = None, + filter_end: Optional[Union[str, int]] = None, + clean_text_args: Optional[Dict[str, bool]] = None, + ): + """ + Initialize EndlessDataWords with data. + + :param data: Source text data. + :type data: str + """ + logger.debug("Loading data from: {}", data) + data = load_text(data) + data = filter_text(data, filter_start, filter_end) + data = ( + clean_text(data) + if not clean_text_args + else clean_text(data, **clean_text_args) + ) + self._tokens, self._token_separators, self._line_indices = split_text(data) + + super().__init__(self._tokens) + + @property + def line_indices(self) -> List[int]: + """ + Get the list of start indices for lines. + + :return: List of start indices. + :rtype: List[int] + """ + return self._line_indices + + def create_text(self, start: int, length: int) -> str: + """ + Create a text snippet from the specified range. + + :param start: Start index. + :type start: int + :param length: Length of the snippet. + :type length: int + :return: Text snippet. + :rtype: str + """ + start = start % len(self) + text = "" + buff_token_sep = "" + + for counter in range(length): + index = (start + counter) % len(self) + text += buff_token_sep + self[index] + buff_token_sep = self._token_separators[index] + + return text + class EmulatedRequestGenerator(RequestGenerator): """ A request generator that generates emulated requests based on a configuration. - :param config: The configuration string or file. - :type config: Union[str, Dict] + :param config: The configuration string, file path, or dictionary. + :type config: Union[str, Dict, Path] :param random_seed: The random seed to use for generating requests. :type random_seed: Optional[int] :param tokenizer: The tokenizer instance or the name/config to use for tokenizing prompts. - :type tokenizer: Union[str, PreTrainedTokenizer] + :type tokenizer: Optional[Union[str, PreTrainedTokenizer]] :param mode: The generation mode, either 'async' or 'sync'. - :type mode: str + :type mode: GenerationMode :param async_queue_size: The size of the request queue. :type async_queue_size: int """ def __init__( self, - config: Union[str, Dict], + config: Union[str, Path, Dict], random_seed: Optional[int] = None, tokenizer: Optional[Union[str, PreTrainedTokenizer]] = None, - mode: str = "async", + mode: GenerationMode = "async", async_queue_size: int = 50, ): - super().__init__(tokenizer, mode, async_queue_size) - self._config = self._load_config(config) - self._data = self._load_emulated_data() + """ + Initialize EmulatedRequestGenerator with configuration and tokenizer. + + :param config: Configuration source, can be a dictionary, + JSON string, or file path. + :type config: Union[str, Path, Dict] + :param random_seed: Optional seed for random number generator. + :type random_seed: Optional[int] + :param tokenizer: Tokenizer instance or configuration for tokenizing prompts. + :type tokenizer: Optional[Union[str, PreTrainedTokenizer]] + :param mode: Mode of request generation, either 'async' or 'sync'. + :type mode: str + :param async_queue_size: Size of the asynchronous queue. + :type async_queue_size: int + """ + self._config = EmulatedConfig.create_config(config) + self._tokens = EndlessTokens( + settings.emulated_data.source, + settings.emulated_data.filter_start, + settings.emulated_data.filter_end, + ) self._rng = np.random.default_rng(random_seed) + # NOTE: Must be after all the parameters since the queue population + # function requires attributes above + super().__init__(tokenizer, mode, async_queue_size) + def create_item(self) -> TextGenerationRequest: """ - Create a new result request item from the data. + Create a new text generation request item from the data. - :return: A new result request. + :return: A new text generation request. :rtype: TextGenerationRequest """ - prompt, prompt_token_count = self._sample_prompt() - generated_token_count = self._sample_generated() - - request = TextGenerationRequest( + logger.debug("Creating new text generation request") + target_prompt_token_count = self._config.sample_prompt_tokens(self._rng) + prompt = self.sample_prompt(target_prompt_token_count) + prompt_token_count = len(self.tokenizer.tokenize(prompt)) + output_token_count = self._config.sample_output_tokens(self._rng) + logger.debug("Generated prompt: {}", prompt) + + return TextGenerationRequest( prompt=prompt, prompt_token_count=prompt_token_count, + output_token_count=output_token_count, ) - if generated_token_count: - request.params["generated_tokens"] = generated_token_count - - return request - - def _load_config(self, config: Union[str, Dict]) -> EmulatedConfig: - # load the config file from a dict, string (json or csv), or file path - if isinstance(config, dict): - config_dict = config - logger.info("Loaded configuration from dict: {}", config) - elif isinstance(config, str) and config.endswith(".json"): - with Path(config).open(encoding="utf-8") as file: - config_dict = json.load(file) - - logger.info("Loaded configuration from file: {}", config) - elif isinstance(config, str) and (config.index("{") > -1): - config_dict = json.loads(config.strip()) - logger.info("Loaded configuration from string: {}", config) - elif isinstance(config, str) and (config.index(",") > -1): - items = config.split(",") - config_dict = {} - for item in items: - key_value = item.split("=") - if len(key_value) != 2: # noqa: PLR2004 - raise ValueError(f"Unexpected format for item: {item}") - key, value = key_value - config_dict[key] = value - logger.info("Loaded configuration from csv string: {}", config) - else: - raise ValueError( - f"Invalid configuration given for EmulatedRequestGenerator: {config}" - ) - - # map the config to the EmulatedConfig dataclass - return EmulatedConfig(**config_dict or {}) - - def _load_emulated_data(self) -> List[str]: - url = "https://www.gutenberg.org/files/1342/1342-0.txt" - logger.info(f"Downloading text corpus from {url}") - response = requests.get(url, timeout=settings.request_timeout) - response.raise_for_status() - - content = response.text - start = content.index( - "It is a truth universally acknowledged, that a single man in possession" - ) - end = content.index("CHISWICK PRESS:--CHARLES WHITTINGHAM AND CO.") - content = content[start:end] - - cleaned_content = ( - content.replace("\r\n", " ").replace("\r", " ").replace("\n", " ") - ) - cleaned_content = unicodedata.normalize("NFKD", cleaned_content) - cleaned_content = re.sub(r"\s+", " ", cleaned_content).strip() - - # break lines according to punctuation - lines_text = ( - cleaned_content.replace(". ", ".\n") - .replace("! ", "!\n") - .replace("? ", "?\n") - ) - lines: List[str] = lines_text.split("\n") - - return [line.strip() for line in lines if line.strip()] - - def _token_count(self, text: str) -> int: - return ( - len(self.tokenizer.tokenize(text)) if self.tokenizer else len(text.split()) - ) - - def _sample_prompt(self) -> Tuple[str, int]: - prompt_token_count = self._sample_tokens( - self._config.prompt_tokens, - self._config.prompt_tokens_variance, - self._config.prompt_tokens_min, - self._config.prompt_tokens_max, - ) - - prompt = self._data[self._rng.integers(0, len(self._data))] + def sample_prompt(self, tokens: int) -> str: + """ + Sample a prompt with the specified number of tokens. - while self._token_count(prompt) < prompt_token_count: - prompt += " " + self._data[self._rng.integers(0, len(self._data))] + :param tokens: Number of tokens for the prompt. + :type tokens: int + :return: Sampled prompt text. + :rtype: str + """ + start_line_index = self._rng.integers(0, len(self._tokens.line_indices)) - # truncate the prompt to the desired token count - words = prompt.split() + # binary search to find the proper number of tokens for the prompt + # this is because tokenizers differ in tokenization behavior left = 0 - right = len(words) + right = left + 5 * tokens + while left < right: mid = (left + right) // 2 - if self._token_count(" ".join(words[:mid])) < prompt_token_count: + prompt = self._tokens.create_text(start_line_index, mid) + token_count = len(self.tokenizer.tokenize(prompt)) + + if token_count == tokens: + return prompt + + if token_count < tokens: left = mid + 1 else: right = mid - prompt = " ".join(words[:left]) - - return prompt, prompt_token_count - def _sample_generated(self) -> Optional[int]: - if not self._config.generated_tokens: - return None - - return self._sample_tokens( - self._config.generated_tokens, - self._config.generated_tokens_variance, - self._config.generated_tokens_min, - self._config.generated_tokens_max, - ) - - def _sample_tokens( - self, - base: int, - variance: Optional[int], - min_tokens: Optional[int], - max_tokens: Optional[int], - ) -> int: - variance = variance or 0 - min_tokens = max(1, min_tokens or 1) - max_tokens = max( - min_tokens, max_tokens or base + 5 * variance if variance else 10000 - ) - - return max( - min( - base + self._rng.integers(-variance, variance + 1), - max_tokens, - ), - min_tokens, - ) + return self._tokens.create_text(start_line_index, left) diff --git a/src/guidellm/request/file.py b/src/guidellm/request/file.py index 9d46fbe..c165619 100644 --- a/src/guidellm/request/file.py +++ b/src/guidellm/request/file.py @@ -1,14 +1,13 @@ -import csv -import json from pathlib import Path -from typing import List, Optional, Union +from typing import Optional, Union from loguru import logger -from transformers import PreTrainedTokenizer +from transformers import PreTrainedTokenizer # type: ignore # noqa: PGH003 +from guidellm.config import settings from guidellm.core.request import TextGenerationRequest -from guidellm.request.base import RequestGenerator -from guidellm.utils import PREFERRED_DATA_COLUMNS +from guidellm.request.base import GenerationMode, RequestGenerator +from guidellm.utils import load_text_lines __all__ = ["FileRequestGenerator"] @@ -17,8 +16,8 @@ class FileRequestGenerator(RequestGenerator): """ A request generator implementation for files. - :param file_path: The path to the file containing the data. - :type file_path: str + :param path: The path to the file containing the data. + :type path: Union[str, Path] :param tokenizer: The tokenizer instance or the name/config to use for tokenizing prompts. :type tokenizer: Union[str, PreTrainedTokenizer] @@ -30,16 +29,22 @@ class FileRequestGenerator(RequestGenerator): def __init__( self, - file_path: str, + path: Union[str, Path], tokenizer: Optional[Union[str, PreTrainedTokenizer]] = None, - mode: str = "async", + mode: GenerationMode = "async", async_queue_size: int = 50, ): - super().__init__(tokenizer, mode, async_queue_size) - self._file_path = file_path - self._data = self._load_file() + self._path = path + self._data = load_text_lines( + path, + filters=settings.dataset.preferred_data_columns, + ) self._iterator = iter(self._data) + # NOTE: Must be after all the parameters since the queue population + # function requires attributes above + super().__init__(tokenizer, mode, async_queue_size) + def create_item(self) -> TextGenerationRequest: """ Create a new result request item from the data. @@ -47,89 +52,16 @@ def create_item(self) -> TextGenerationRequest: :return: A new result request. :rtype: TextGenerationRequest """ + logger.debug("Creating new request item from file data") + try: data = next(self._iterator) except StopIteration: self._iterator = iter(self._data) data = next(self._iterator) - token_count = ( - self.tokenizer(data)["input_ids"].shape[0] if self.tokenizer else None - ) + token_count = len(self.tokenizer.tokenize(data)) request = TextGenerationRequest(prompt=data, prompt_token_count=token_count) - logger.debug(f"Created new TextGenerationRequest: {request}") + logger.debug("Created new TextGenerationRequest: {}", request) return request - - def _load_file(self) -> List[str]: - if self._file_path.endswith(".txt"): - data = self._load_text_file() - elif self._file_path.endswith(".csv"): - data = self._load_csv_file() - elif self._file_path.endswith(".jsonl"): - data = self._load_jsonl_file() - elif self._file_path.endswith(".json"): - data = self._load_json_file() - else: - raise ValueError("Unsupported file type") - - return [line.strip() for line in data if line and line.strip()] - - def _load_text_file(self) -> List[str]: - with Path(self._file_path).open(encoding="utf-8") as file: - return file.readlines() - - def _load_csv_file(self) -> List[str]: - data = [] - with Path(self._file_path).open(encoding="utf-8") as file: - reader = csv.DictReader(file) - columns = reader.fieldnames - if not columns: - raise ValueError("Invalid empty value for columns") - for row in reader: - # convert the row to a dictionary - obj = {col: row[col] for col in columns} - data.append(obj) - - return self._extract_prompts(data) - - def _load_jsonl_file(self) -> List[str]: - data = [] - with Path(self._file_path).open(encoding="utf-8") as file: - for line in file: - obj = json.loads(line) - data.append(obj) - - return self._extract_prompts(data) - - def _load_json_file(self) -> List[str]: - with Path(self._file_path).open(encoding="utf-8") as file: - obj = json.load(file) - data = None - - if isinstance(obj, list): - data = obj - elif isinstance(obj, dict): - for value in obj.values(): - if isinstance(value, list): - data = value - break - - if data is None: - raise ValueError( - f"Unsupported JSON structure, " - f"expected a list or a dictionary with a list. Given: {obj}", - ) - - return self._extract_prompts(data) - - def _extract_prompts(self, objects: List[dict]) -> List[str]: - data = [] - for obj in objects: - for col in PREFERRED_DATA_COLUMNS: - if col in obj: - data.append(obj[col]) - break - else: - data.append(next(iter(obj.values()))) - return data diff --git a/src/guidellm/request/transformers.py b/src/guidellm/request/transformers.py index 814937c..49d8388 100644 --- a/src/guidellm/request/transformers.py +++ b/src/guidellm/request/transformers.py @@ -1,18 +1,21 @@ +from pathlib import Path from typing import Optional, Union -from datasets import ( +from datasets import ( # type: ignore # noqa: PGH003 Dataset, DatasetDict, IterableDataset, IterableDatasetDict, - load_dataset, ) from loguru import logger -from transformers import PreTrainedTokenizer +from transformers import PreTrainedTokenizer # type: ignore # noqa: PGH003 from guidellm.core.request import TextGenerationRequest -from guidellm.request.base import RequestGenerator -from guidellm.utils import PREFERRED_DATA_COLUMNS, PREFERRED_DATA_SPLITS +from guidellm.request.base import GenerationMode, RequestGenerator +from guidellm.utils import ( + load_transformers_dataset, + resolve_transformers_dataset_column, +) __all__ = ["TransformersDatasetRequestGenerator"] @@ -39,11 +42,13 @@ class TransformersDatasetRequestGenerator(RequestGenerator): def __init__( self, - dataset: str, + dataset: Union[ + str, Path, DatasetDict, Dataset, IterableDatasetDict, IterableDataset + ], split: Optional[str] = None, column: Optional[str] = None, tokenizer: Optional[Union[str, PreTrainedTokenizer]] = None, - mode: str = "async", + mode: GenerationMode = "async", async_queue_size: int = 50, **kwargs, ): @@ -51,8 +56,12 @@ def __init__( self._split = split self._column = column self._kwargs = kwargs - self._hf_dataset = self._load_dataset() - self._iterator = iter(self._hf_dataset) + + self._hf_dataset = load_transformers_dataset(dataset, split=split, **kwargs) + self._hf_column = resolve_transformers_dataset_column( + self._hf_dataset, column=column + ) + self._hf_dataset_iterator = iter(self._hf_dataset) # NOTE: Must be after all the parameters since the queue population # function requires attributes above @@ -66,12 +75,16 @@ def create_item(self) -> TextGenerationRequest: :rtype: TextGenerationRequest """ - data = next(self._iterator) + logger.debug("Creating new request item from dataset") - prompt = data[self._column] if self._column in data else str(data) - token_count = ( - self._tokenizer(prompt)["input_ids"].shape[0] if self._tokenizer else None - ) + try: + data = next(self._hf_dataset_iterator) + except StopIteration: + self._hf_dataset_iterator = iter(self._hf_dataset) + data = next(self._hf_dataset_iterator) + + prompt = data[self._hf_column] + token_count = len(self.tokenizer.tokenize(prompt)) request = TextGenerationRequest( prompt=prompt, prompt_token_count=token_count, @@ -79,76 +92,3 @@ def create_item(self) -> TextGenerationRequest: logger.debug(f"Created new TextGenerationRequest: {request}") return request - - def _load_dataset(self) -> Dataset: - dataset = self._load_hf_dataset() - - if isinstance(dataset, (DatasetDict, IterableDatasetDict)): - split = self._load_data_split(dataset) - - if split not in dataset: - raise ValueError(f"Split '{split}' not found in dataset") - - dataset = dataset[split] - else: - self._split = str(dataset.split) if dataset else None - - column = self._load_data_column(dataset) - - if column not in dataset.column_names: - raise ValueError(f"Column '{column}' not found in dataset") - - logger.info( - f"Loaded dataset {self._dataset} with split: {self._split} " - f"and column: {self._column}", - ) - - return dataset - - def _load_hf_dataset( - self, - ) -> Union[DatasetDict, Dataset, IterableDatasetDict, IterableDataset]: - if self._dataset.endswith(".csv") or self._dataset.endswith(".json"): - logger.debug(f"Loading dataset from local path: {self._dataset}") - extension = self._dataset.split(".")[-1] - - return load_dataset(extension, data_files=self._dataset, **self._kwargs) - - if self._dataset.endswith(".py"): - logger.debug(f"Loading dataset from local script: {self._dataset}") - - return load_dataset(self._dataset, **self._kwargs) - - logger.debug(f"Loading dataset: {self._dataset}") - - return load_dataset(self._dataset, **self._kwargs) - - def _load_data_split(self, dataset: Union[DatasetDict, IterableDatasetDict]) -> str: - if self._split: - return self._split - - for split in PREFERRED_DATA_SPLITS: - if split in dataset: - self._split = split - break - if self._split is None: - self._split = list(dataset)[0] - - logger.info(f"Inferred split to use: {self._split}") - - return self._split - - def _load_data_column(self, dataset: Union[Dataset, IterableDataset]) -> str: - if self._column: - return self._column - - for col in PREFERRED_DATA_COLUMNS: - if col in dataset.column_names: - self._column = col - break - if self._column is None: - self._column = list(dataset.column_names)[0] - - logger.info(f"Inferred column to use for prompts: {self._column}") - - return self._column diff --git a/src/guidellm/scheduler/__init__.py b/src/guidellm/scheduler/__init__.py index 4b425ab..b3b4ac5 100644 --- a/src/guidellm/scheduler/__init__.py +++ b/src/guidellm/scheduler/__init__.py @@ -1,4 +1,4 @@ +from .base import Scheduler, SchedulerResult from .load_generator import LoadGenerationMode, LoadGenerator -from .scheduler import Scheduler -__all__ = ["LoadGenerationMode", "LoadGenerator", "Scheduler"] +__all__ = ["LoadGenerationMode", "LoadGenerator", "Scheduler", "SchedulerResult"] diff --git a/src/guidellm/scheduler/base.py b/src/guidellm/scheduler/base.py new file mode 100644 index 0000000..1474c12 --- /dev/null +++ b/src/guidellm/scheduler/base.py @@ -0,0 +1,355 @@ +import asyncio +import math +import time +from dataclasses import dataclass +from typing import AsyncGenerator, Literal, Optional, Union, get_args + +from loguru import logger + +from guidellm.backend import Backend +from guidellm.config import settings +from guidellm.core import ( + TextGenerationBenchmark, + TextGenerationError, + TextGenerationRequest, + TextGenerationResult, +) +from guidellm.request import RequestGenerator +from guidellm.scheduler.load_generator import LoadGenerationMode, LoadGenerator + +__all__ = ["Scheduler", "SchedulerResult"] + + +@dataclass +class SchedulerResult: + """ + Represents the result of a single task execution within the Scheduler. + + :param completed: Indicates if the task is completed. + :type completed: bool + :param count_total: Total number of tasks to be executed. + :type count_total: int + :param count_completed: Number of tasks that have been completed so far. + :type count_completed: int + :param benchmark: Benchmark data for the task execution. + :type benchmark: TextGenerationBenchmark + :param current_result: The result of the current request, if any. + :type current_result: Optional[Union[TextGenerationResult, Exception]] + """ + + completed: bool + count_total: int + count_completed: int + benchmark: TextGenerationBenchmark + current_result: Optional[Union[TextGenerationResult, TextGenerationError]] = None + + +class Scheduler: + """ + Schedules and manages the execution of tasks for text generation requests. + + :param generator: The request generator that produces text generation requests. + :type generator: RequestGenerator + :param worker: The backend worker that processes the requests. + :type worker: Backend + :param mode: The mode of load generation (e.g., synchronous, asynchronous). + :type mode: LoadGenerationMode + :param rate: The rate at which requests are generated, if applicable. + :type rate: Optional[float] + :param max_number: Maximum number of requests to be processed. + :type max_number: Optional[int] + :param max_duration: Maximum duration in seconds for which requests + should be processed. + :type max_duration: Optional[float] + + :raises ValueError: If neither max_number nor max_duration is specified or + if they are not positive. + """ + + def __init__( + self, + generator: RequestGenerator, + worker: Backend, + mode: LoadGenerationMode = "synchronous", + rate: Optional[float] = None, + max_number: Optional[int] = None, + max_duration: Optional[float] = None, + ): + logger.info( + "Scheduler initialized with params: generator={}, worker={}, mode={}, " + "rate={}, max_number={}, max_duration={}", + generator, + worker, + mode, + rate, + max_number, + max_duration, + ) + + if mode not in get_args(LoadGenerationMode): + err = ValueError( + f"{mode} is not a valid Load Generation Mode. " + f"Valid options are {get_args(LoadGenerationMode)}" + ) + logger.error(err) + raise err + + if max_number is None and max_duration is None: + err = ValueError("Either max_number or max_duration must be specified") + logger.error(err) + raise err + + if max_number is not None and max_number <= 0: + err = ValueError(f"max_number must be > 0, given: {max_number}") + logger.error(err) + raise err + + if max_duration is not None and max_duration <= 0: + err = ValueError(f"max_duration must be > 0, given: {max_duration}") + logger.error(err) + raise err + + if mode in ["constant", "poisson"] and not rate: + err = ValueError(f"Rate must be > 0 for mode: {mode}. Given: {rate}") + logger.error(err) + raise err + + self._generator = generator + self._worker = worker + self._mode = mode + self._rate = rate + self._max_number = max_number + self._max_duration = max_duration + + self._load_generator = LoadGenerator(mode, rate) + + @property + def generator(self) -> RequestGenerator: + """ + The request generator that produces text generation requests. + + :return: The request generator instance. + :rtype: RequestGenerator + """ + return self._generator + + @property + def worker(self) -> Backend: + """ + The backend worker that processes the requests. + + :return: The backend worker instance. + :rtype: Backend + """ + return self._worker + + @property + def mode(self) -> LoadGenerationMode: + """ + The mode of load generation (e.g., synchronous, asynchronous). + + :return: The load generation mode. + :rtype: LoadGenerationMode + """ + return self._mode + + @property + def rate(self) -> Optional[float]: + """ + The rate at which requests are generated, if applicable. + + :return: The rate of request generation. + :rtype: Optional[float] + """ + return self._rate + + @property + def max_number(self) -> Optional[int]: + """ + Maximum number of requests to be processed. + + :return: The maximum number of requests. + :rtype: Optional[int] + """ + return self._max_number + + @property + def max_duration(self) -> Optional[float]: + """ + Maximum duration in seconds for which requests should be processed. + + :return: The maximum duration in seconds. + :rtype: Optional[float] + """ + return self._max_duration + + @property + def load_generator(self) -> LoadGenerator: + """ + The load generator responsible for generating load based on mode and rate. + + :return: The load generator instance. + :rtype: LoadGenerator + """ + return self._load_generator + + @property + def benchmark_mode(self) -> Literal["asynchronous", "synchronous", "throughput"]: + """ + The benchmark mode for the scheduler. + + :return: The benchmark mode. + :rtype: Literal["asynchronous", "synchronous", "throughput"] + """ + if self._mode == "synchronous": + return "synchronous" + + if self._mode == "throughput": + return "throughput" + + return "asynchronous" + + async def run(self) -> AsyncGenerator[SchedulerResult, None]: + """ + Run the scheduler to process requests based on the configured mode, rate, + maximum number, and maximum duration. + + :yield: The result of each task executed by the scheduler. + :rtype: Generator[SchedulerResult, None, None] + """ + logger.info("Starting Scheduler run") + + benchmark = TextGenerationBenchmark(mode=self.benchmark_mode, rate=self.rate) + start_time = time.time() + end_time = start_time + self.max_duration if self.max_duration else math.inf + max_number = float(self.max_number) if self.max_number else math.inf + runner = self._run_async if self._mode == "asynchronous" else self._run_sync + + def _get_count_total(): + return ( + self.max_number + if self.max_number + else round(self.max_duration) + if self.max_duration + else 0 + ) + + def _get_count_completed(): + return min( + benchmark.request_count + benchmark.error_count + if self.max_number + else round(time.time() - start_time), + _get_count_total(), + ) + + # yield initial result for progress tracking + yield SchedulerResult( + completed=False, + count_total=_get_count_total(), + count_completed=_get_count_completed(), + benchmark=benchmark, + ) + + async for res in runner(benchmark, end_time, max_number): + yield SchedulerResult( + completed=False, + count_total=_get_count_total(), + count_completed=_get_count_completed(), + benchmark=benchmark, + current_result=res, + ) + + logger.info("Scheduler run completed") + + yield SchedulerResult( + completed=True, + count_total=_get_count_total(), + count_completed=_get_count_completed(), + benchmark=benchmark, + ) + + async def _run_sync( + self, benchmark: TextGenerationBenchmark, end_time: float, max_number: float + ) -> AsyncGenerator[Union[TextGenerationResult, TextGenerationError], None]: + for index, (request, submit_at) in enumerate( + zip(self.generator, self.load_generator.times()) + ): + if index >= max_number or time.time() >= end_time: + break + + logger.debug( + "Running synchronous request={} at submit_at={}", + request, + submit_at, + ) + benchmark.request_started() + result = await self._submit_task_coroutine(request, submit_at, end_time) + benchmark.request_completed(result) + logger.debug("Request completed with output: {}", result) + yield result + + async def _run_async( + self, benchmark: TextGenerationBenchmark, end_time: float, max_number: float + ) -> AsyncGenerator[Union[TextGenerationResult, TextGenerationError], None]: + tasks = [] + pending = 0 + + for index, (request, submit_at) in enumerate( + zip(self.generator, self.load_generator.times()) + ): + if index >= max_number or time.time() >= end_time or submit_at >= end_time: + break + + while pending >= settings.max_concurrency: + await asyncio.sleep(0.1) + + logger.debug( + "Running asynchronous request={} at submit_at={}", + request, + submit_at, + ) + + def _completed(_task: asyncio.Task) -> None: + nonlocal pending + pending -= 1 + _res = _task.result() + benchmark.request_completed(_res) + logger.debug("Request completed: {}", _res) + + benchmark.request_started() + task = asyncio.create_task( + self._submit_task_coroutine(request, submit_at, end_time) + ) + task.add_done_callback(_completed) + tasks.append(task) + pending += 1 # noqa: SIM113 + + for compl_task in asyncio.as_completed(tasks): + yield await compl_task + + async def _submit_task_coroutine( + self, request: TextGenerationRequest, submit_at: float, end_time: float + ) -> Union[TextGenerationResult, TextGenerationError]: + if submit_at > end_time: + raise asyncio.TimeoutError( + f"Request submission time {submit_at} is " + f"greater than end time {end_time}" + ) + + try: + if submit_at > time.time(): + await asyncio.sleep(submit_at - time.time()) + + timeout = ( + end_time - time.time() if end_time and end_time < math.inf else None + ) + + return await asyncio.wait_for(self._worker.submit(request), timeout=timeout) + except asyncio.TimeoutError as exc: + logger.info("Request {} timed out: {}", request, exc) + + return TextGenerationError(request=request, message=str(exc)) + except Exception as exc: # noqa: BLE001 + logger.warning("Request {} failed: {}", request, exc) + + return TextGenerationError(request=request, message=str(exc)) diff --git a/src/guidellm/scheduler/load_generator.py b/src/guidellm/scheduler/load_generator.py index a363761..f629752 100644 --- a/src/guidellm/scheduler/load_generator.py +++ b/src/guidellm/scheduler/load_generator.py @@ -1,61 +1,196 @@ import time -from enum import Enum -from typing import Generator +from typing import Generator, Literal, Optional, get_args import numpy as np +from loguru import logger __all__ = ["LoadGenerationMode", "LoadGenerator"] +LoadGenerationMode = Literal["synchronous", "constant", "poisson", "throughput"] -class LoadGenerationMode(str, Enum): - """ - Available values: - * SYNCHRONOUS - * CONSTANT (async) - * POISSON (async) +class LoadGenerator: """ + Load Generator class that generates timestamps for load generation. - SYNCHRONOUS = "synchronous" - CONSTANT = "constant" - POISSON = "poisson" + This class supports multiple load generation modes: "constant", "poisson", + "throughput", and "synchronous". Each mode has its own method for generating + timestamps based on the rate provided during initialization. + :param mode: The mode of load generation. Valid options are "constant", + "poisson", "throughput", and "synchronous". + :type mode: LoadGenerationMode + :param rate: The rate at which to generate timestamps. This value is + interpreted differently depending on the mode. + :type rate: float -class LoadGenerator: - def __init__(self, mode: LoadGenerationMode, rate: float): - if mode == LoadGenerationMode.SYNCHRONOUS: - raise ValueError("Synchronous mode not supported by LoadGenerator") + :raises ValueError: If an invalid mode is provided. + """ + + def __init__(self, mode: LoadGenerationMode, rate: Optional[float] = None): + """ + Initialize the Load Generator with the mode and rate. + + :param mode: The mode of load generation ("constant", "poisson", "throughput", + or "synchronous"). + :type mode: LoadGenerationMode + :param rate: The rate at which to generate timestamps. In the "constant" + mode, this represents the frequency of events. In the "poisson" mode, + it represents the average frequency. + :type rate: Optional[float] + """ + if mode not in get_args(LoadGenerationMode): + error = ValueError( + f"{mode} is not a valid Load Generation Mode. " + f"Valid options are {get_args(LoadGenerationMode)}" + ) + logger.error(error) + raise error + + if mode not in ["synchronous", "throughput"] and (rate is None or rate <= 0): + error = ValueError(f"Rate must be > 0 for mode: {mode}. Given: {rate}") + logger.error(error) + raise error self._mode = mode self._rate = rate + logger.debug( + "Initialized LoadGenerator with mode: {mode}, rate: {rate}", + mode=mode, + rate=rate, + ) + + @property + def mode(self) -> LoadGenerationMode: + """ + Get the mode of load generation. + + :return: The mode of load generation. + :rtype: LoadGenerationMode + """ + return self._mode + + @property + def rate(self) -> Optional[float]: + """ + Get the rate of load generation. + + :return: The rate of load generation. + :rtype: Optional[float] + """ + return self._rate def times(self) -> Generator[float, None, None]: - if self._mode == LoadGenerationMode.SYNCHRONOUS: - raise ValueError("Synchronous mode not supported by LoadGenerator") + """ + Generate timestamps for load generation based on the selected mode. + + :return: A generator that yields timestamps at which each load + should be initiated. + :rtype: Generator[float, None, None] + + :raises ValueError: If the mode is invalid. + """ + logger.debug(f"Generating timestamps using mode: {self._mode}") + + if self._mode == "throughput": + yield from self.throughput_times() + elif self._mode == "constant": + yield from self.constant_times() + elif self._mode == "poisson": + yield from self.poisson_times() + elif self._mode == "synchronous": + yield from self.synchronous_times() + else: + logger.error(f"Invalid mode encountered: {self._mode}") + raise ValueError(f"Invalid mode: {self._mode}") - elif self._mode == LoadGenerationMode.CONSTANT: - yield from self._constant_times() + def synchronous_times(self) -> Generator[float, None, None]: + """ + Generate invalid timestamps for the "synchronous" mode. - elif self._mode == LoadGenerationMode.POISSON: - yield from self._poisson_times() - else: - raise NotImplementedError( - f"{self._mode} is not supported Load Generation Mode", + :return: A generator that yields a constant invalid timestamp (-1.0). + :rtype: Generator[float, None, None] + """ + logger.debug("Generating invalid timestamps for synchronous mode") + while True: + yield -1.0 + + def throughput_times(self) -> Generator[float, None, None]: + """ + Generate timestamps at the maximum rate possible, returning the current time. + + :return: A generator that yields the current time in seconds. + :rtype: Generator[float, None, None] + """ + logger.debug("Generating timestamps at throughput rate") + while True: + yield time.time() + + def constant_times(self) -> Generator[float, None, None]: + """ + Generate timestamps at a constant rate based on the specified rate. + + :return: A generator that yields timestamps incremented by 1/rate seconds. + :rtype: Generator[float, None, None] + """ + logger.debug("Generating constant rate timestamps with rate: {}", self._rate) + + if self._rate is None or self._rate == 0: + raise ValueError( + "Rate must be > 0 for constant mode, given: {}", self._rate ) - def _constant_times(self) -> Generator[float, None, None]: start_time = time.time() time_increment = 1.0 / self._rate counter = 0 while True: - yield start_time + time_increment * counter + yield_time = start_time + time_increment * counter + logger.debug(f"Yielding timestamp: {yield_time}") + yield yield_time counter += 1 - def _poisson_times(self) -> Generator[float, None, None]: + def poisson_times(self) -> Generator[float, None, None]: + """ + Generate timestamps based on a Poisson process, where the number + of requests to be sent per second is drawn from a Poisson distribution. + The inter arrival time between requests is exponentially distributed. + + :return: A generator that yields timestamps based on a Poisson distribution. + :rtype: Generator[float, None, None] + """ + logger.debug("Generating Poisson rate timestamps with rate: {}", self._rate) + + if self._rate is None or self._rate == 0: + raise ValueError("Rate must be > 0 for poisson mode, given: {}", self._rate) + time_tracker = time.time() rng = np.random.default_rng() + time_increment = 1.0 while True: - yield time_tracker - time_tracker += rng.poisson(1.0 / self._rate) + num_requests = rng.poisson(self._rate) + + if num_requests == 0: + yield time_tracker + time_increment + else: + inter_arrival_times = rng.exponential(1.0 / self._rate, num_requests) + logger.debug( + "Calculated new inter-arrival times for poisson process: {}", + inter_arrival_times, + ) + arrival_time_tracker = time_tracker + + for arrival_time in inter_arrival_times: + arrival_time_tracker += arrival_time + + if arrival_time_tracker > time_tracker + time_increment: + logger.debug( + "Arrival time tracker: {} is greater than current time", + arrival_time_tracker, + ) + break + + yield arrival_time_tracker + + time_tracker += time_increment # Move on to the next time period diff --git a/src/guidellm/scheduler/scheduler.py b/src/guidellm/scheduler/scheduler.py deleted file mode 100644 index 129dbaa..0000000 --- a/src/guidellm/scheduler/scheduler.py +++ /dev/null @@ -1,236 +0,0 @@ -import asyncio -import functools -import time -from typing import Callable, Generator, Iterable, List, Optional, Tuple - -from loguru import logger - -from guidellm.backend import Backend -from guidellm.core import ( - TextGenerationBenchmark, - TextGenerationError, - TextGenerationResult, -) -from guidellm.core.request import TextGenerationRequest -from guidellm.request import RequestGenerator - -from .load_generator import LoadGenerationMode, LoadGenerator - -__all__ = ["Scheduler"] - - -class Scheduler: - """ - The scheduler class is responsible for handling tasks and running - """ - - def __init__( - self, - request_generator: RequestGenerator, - backend: Backend, - load_gen_mode: LoadGenerationMode = LoadGenerationMode.SYNCHRONOUS, - load_gen_rate: Optional[float] = None, - max_requests: Optional[int] = None, - max_duration: Optional[float] = None, - ): - if max_requests is None and max_duration is None: - raise ValueError("Either num_requests or duration must be specified") - - if (max_requests is not None and max_requests <= 0) or ( - max_duration is not None and max_duration <= 0 - ): - raise ValueError("max_requests anx max_duration must be > 0") - - if load_gen_mode != LoadGenerationMode.SYNCHRONOUS and load_gen_rate is None: - raise ValueError( - "Rate must be specified for non-synchronous load generation modes", - ) - - self._request_generator = request_generator - self._backend = backend - self._load_gen_mode = load_gen_mode - self._load_gen_rate = load_gen_rate - self._max_requests = max_requests - self._max_duration = max_duration - - def run(self) -> TextGenerationBenchmark: - if self._load_gen_mode == LoadGenerationMode.SYNCHRONOUS: - report: TextGenerationBenchmark = self._run_sync() - else: - report = self._event_loop.run_until_complete(self._run_async()) - - return report - - @property - def load_generator(self) -> LoadGenerator: - if self._load_gen_mode == LoadGenerationMode.SYNCHRONOUS: - raise ValueError("LoadGenerator can't be used is synchronous mode") - - if not self._load_gen_rate: - raise ValueError("Invalid empty value for self._load_gen_rate") - - return LoadGenerator(self._load_gen_mode, self._load_gen_rate) - - def _cancel_running_tasks( - self, - tasks: Iterable[Tuple[TextGenerationRequest, asyncio.Task]], - benchmark: TextGenerationBenchmark, - ) -> None: - """ - Cancel all the running tasks for the scheduler and augment the - benchmark with error reports. - - :param tasks: The `tasks` iterable batch. Where the batch includes - the asyncio.Task and the signature context of that task. - """ - - for request, task in tasks: - if not task.done(): - logger.debug(f"Cancelling running task {task}") - task.cancel() - benchmark.errors.append( - TextGenerationError( - request=request, - message=str(asyncio.CancelledError()), - ), - ) - - def _run_sync(self) -> TextGenerationBenchmark: - benchmark = TextGenerationBenchmark(mode=self._load_gen_mode.value, rate=None) - start_time = time.time() - - logger.debug("Running scheduler in sync mode") - - for requests_counter, callback in enumerate(self._sync_tasks()): - if ( - self._max_requests is not None - and requests_counter >= self._max_requests - ) or ( - self._max_duration is not None - and time.time() - start_time >= self._max_duration - ): - break - - benchmark.request_started() - res = callback() - benchmark.request_completed(res) - - return benchmark - - async def _run_async(self) -> TextGenerationBenchmark: - """ - Running in async mode determines next steps: - * Iterate through all the tasks with load attached - * Check the execution time does not go over the max duration - * Check the number of requests is not greater than max requests - - If the max duration is not specified for the scheduler - check only - max requests and just break the loop without cancelling tasks. - """ - - benchmark: TextGenerationBenchmark = TextGenerationBenchmark( - mode=self._load_gen_mode.value, - rate=self._load_gen_rate, - ) - tasks: List[Tuple[TextGenerationRequest, asyncio.Task]] = [] - start_time: float = time.time() - - for requests_counter, (_task_package, task_start_time) in enumerate( - zip( - self._async_tasks(benchmark), - self.load_generator.times(), - ) - ): - request, task = _task_package - tasks.append((request, task)) - - if ( - self._max_duration is not None - and time.time() - start_time >= self._max_duration - ): - self._cancel_running_tasks(tasks=tasks, benchmark=benchmark) - break - - if ( - self._max_requests is not None - and requests_counter >= self._max_requests - 1 - ): - break - - if (pending_time := task_start_time - time.time()) > 0: - await asyncio.sleep(pending_time) - - if self._max_duration is None: - await asyncio.gather(*(t for _, t in tasks)) - else: - try: - # Set the timeout if the max duration is specified - await asyncio.wait_for( - asyncio.gather(*(t for _, t in tasks), return_exceptions=True), - self._max_duration, - ) - except asyncio.TimeoutError: - self._cancel_running_tasks(tasks=tasks, benchmark=benchmark) - - return benchmark - - def _sync_tasks(self) -> Generator[Callable[..., TextGenerationResult], None, None]: - """ - Iterate through `Backend.submit()` sync callbacks. - """ - - for request in self._request_generator: - yield functools.partial(self._backend.submit, request=request) - - def _async_tasks( - self, - benchmark: TextGenerationBenchmark, - ) -> Generator[Tuple[TextGenerationRequest, asyncio.Task], None, None]: - """ - Iterate through `Backend.submit()` async tasks. - """ - - for request in self._request_generator: - task: asyncio.Task = asyncio.create_task( - self._run_task_async(benchmark=benchmark, request=request), - name=f"Backend.submit({request.prompt})", - ) - - yield request, task - - async def _run_task_async( - self, - benchmark: TextGenerationBenchmark, - request: TextGenerationRequest, - ): - benchmark.request_started() - try: - res = await self._event_loop.run_in_executor( - None, - functools.partial(self._backend.submit, request=request), - ) - except asyncio.CancelledError as err: - benchmark.errors.append( - TextGenerationError( - request=request, - message=str(err), - ), - ) - else: - benchmark.request_completed(res) - - @property - def _event_loop(self) -> asyncio.AbstractEventLoop: - """ - The scheduler internal event loop. - - All the tasks should run in that Event Loop to be managed - from the Scheduler's level. - """ - - try: - loop = asyncio.get_running_loop() - except RuntimeError: - return asyncio.get_event_loop() - else: - return loop diff --git a/src/guidellm/utils/__init__.py b/src/guidellm/utils/__init__.py index 549eee5..60668cf 100644 --- a/src/guidellm/utils/__init__.py +++ b/src/guidellm/utils/__init__.py @@ -1,17 +1,38 @@ -from .constants import ( - PREFERRED_DATA_COLUMNS, - PREFERRED_DATA_SPLITS, - REPORT_HTML_MATCH, - REPORT_HTML_PLACEHOLDER, +from .injector import create_report, inject_data +from .text import ( + clean_text, + filter_text, + is_path, + is_path_like, + is_url, + load_text, + load_text_lines, + parse_text_objects, + split_lines_by_punctuation, + split_text, +) +from .transformers import ( + load_transformers_dataset, + resolve_transformers_dataset, + resolve_transformers_dataset_column, + resolve_transformers_dataset_split, ) -from .injector import create_report, inject_data, load_html_file __all__ = [ - "PREFERRED_DATA_COLUMNS", - "PREFERRED_DATA_SPLITS", - "REPORT_HTML_MATCH", - "REPORT_HTML_PLACEHOLDER", + "clean_text", "create_report", + "filter_text", "inject_data", - "load_html_file", + "is_path", + "is_path_like", + "is_url", + "load_text", + "load_text_lines", + "load_transformers_dataset", + "parse_text_objects", + "resolve_transformers_dataset", + "resolve_transformers_dataset_column", + "resolve_transformers_dataset_split", + "split_lines_by_punctuation", + "split_text", ] diff --git a/src/guidellm/utils/constants.py b/src/guidellm/utils/constants.py deleted file mode 100644 index 1ccb02e..0000000 --- a/src/guidellm/utils/constants.py +++ /dev/null @@ -1,26 +0,0 @@ -__all__ = [ - "PREFERRED_DATA_COLUMNS", - "PREFERRED_DATA_SPLITS", - "REPORT_HTML_MATCH", - "REPORT_HTML_PLACEHOLDER", -] - - -PREFERRED_DATA_COLUMNS = [ - "prompt", - "instruction", - "input", - "inputs", - "question", - "context", - "text", - "content", - "body", - "data", -] - -PREFERRED_DATA_SPLITS = ["test", "validation", "train"] - -REPORT_HTML_MATCH = "window.report_data = {};" - -REPORT_HTML_PLACEHOLDER = "{}" diff --git a/src/guidellm/utils/injector.py b/src/guidellm/utils/injector.py index 70f2150..fb5216a 100644 --- a/src/guidellm/utils/injector.py +++ b/src/guidellm/utils/injector.py @@ -1,16 +1,12 @@ from pathlib import Path from typing import Union -import requests from pydantic import BaseModel from guidellm.config import settings -from guidellm.utils.constants import ( - REPORT_HTML_MATCH, - REPORT_HTML_PLACEHOLDER, -) +from guidellm.utils.text import load_text -__all__ = ["create_report", "inject_data", "load_html_file"] +__all__ = ["create_report", "inject_data"] def create_report(model: BaseModel, output_path: Union[str, Path]) -> Path: @@ -29,9 +25,12 @@ def create_report(model: BaseModel, output_path: Union[str, Path]) -> Path: if not isinstance(output_path, Path): output_path = Path(output_path) - html_content = load_html_file(settings.report_generation.source) + html_content = load_text(settings.report_generation.source) report_content = inject_data( - model, html_content, REPORT_HTML_MATCH, REPORT_HTML_PLACEHOLDER + model, + html_content, + settings.report_generation.report_html_match, + settings.report_generation.report_html_placeholder, ) if not output_path.suffix: @@ -69,26 +68,3 @@ def inject_data( inject_str = match.replace(placeholder, model_str) return html.replace(match, inject_str) - - -def load_html_file(path_or_url: str) -> str: - """ - Load an HTML file from a path or URL - - :param path_or_url: the path or URL to load the HTML file from - :type path_or_url: str - :return: the HTML content - :rtype: str - """ - if path_or_url.startswith("http"): - response = requests.get(path_or_url, timeout=settings.request_timeout) - response.raise_for_status() - - return response.text - - path = Path(path_or_url) - - if not path.exists(): - raise FileNotFoundError(f"File not found: {path_or_url}") - - return path.read_text() diff --git a/src/guidellm/utils/text.py b/src/guidellm/utils/text.py new file mode 100644 index 0000000..13a0dff --- /dev/null +++ b/src/guidellm/utils/text.py @@ -0,0 +1,455 @@ +import csv +import json +import re +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple, Union +from urllib.parse import urlparse + +import ftfy +import requests +import yaml +from loguru import logger + +from guidellm.config import settings + +__all__ = [ + "clean_text", + "filter_text", + "is_path", + "is_path_like", + "is_url", + "load_text", + "load_text_lines", + "parse_text_objects", + "split_lines_by_punctuation", + "split_text", +] + + +NAME_TITLES = [ + "Mr.", + "Mrs.", + "Ms.", + "Dr.", + "Prof.", + "Jr.", + "Sr.", + "St.", + "Lt.", + "Col.", + "Gen.", + "Rep.", + "Sen.", + "Gov.", + "Pres.", +] +SENTENCE_REGEX = r'[^.!?]*[.!?]["\']?\s*(?=[A-Z])' +MAX_EXTENSION_LENGTH = 8 +MAX_PATH_LENGTH = 4096 +EXTENSION_TYPES = { + "csv": "csv", + "jsonl": "jsonl", + "json": "json", + "yaml": "yaml", + "yml": "yaml", + "txt": "txt", + "text": "txt", +} + + +def filter_text( + text: str, + filter_start: Optional[Union[str, int]] = None, + filter_end: Optional[Union[str, int]] = None, +) -> str: + """ + Filter text by start and end strings or indices + + :param text: the text to filter + :param filter_start: the start string or index to filter from + :param filter_end: the end string or index to filter to + :return: the filtered text + """ + filter_start_index = -1 + filter_end_index = -1 + + if filter_start and isinstance(filter_start, str): + filter_start_index = text.index(filter_start) + elif filter_start: + if not isinstance(filter_start, int): + raise ValueError(f"Invalid filter start index: {filter_start}") + filter_start_index = filter_start + + if filter_end and isinstance(filter_end, str): + filter_end_index = text.index(filter_end) + elif filter_end: + if not isinstance(filter_end, int): + raise ValueError(f"Invalid filter end index: {filter_end}") + filter_end_index = filter_end + + if filter_start_index > -1: + text = text[filter_start_index:] + if filter_end_index > -1: + text = text[:filter_end_index] + + return text + + +def clean_text( + text: str, + fix_encoding: bool = True, + clean_whitespace: bool = False, + remove_empty_lines: bool = False, + force_new_line_punctuation: bool = False, +) -> str: + """ + Clean text by fixing encoding, cleaning whitespace, removing empty lines, + and forcing new line punctuation + + :param text: the text to clean + :param fix_encoding: True to fix the encoding of the text, False to leave as is + :param clean_whitespace: True to clean the whitespace in the text + (remove extra spaces, tabs, etc), False to leave as is + :param remove_empty_lines: True to remove empty lines from the text + (lines with only whitespace), False to leave as is + :param force_new_line_punctuation: True to force new lines at punctuation + (line ends in a period, exclamation point, or question mark), + False to leave as is + :return: The cleaned text + """ + + if fix_encoding: + text = ftfy.fix_text(text) + + if clean_whitespace: + text = "\n".join( + [re.sub(r"\s+", " ", line).strip() for line in text.splitlines()] + ) + + if remove_empty_lines: + text = "\n".join([line for line in text.splitlines() if line.strip()]) + + if force_new_line_punctuation: + # first remove any existing new lines + text = " ".join(line for line in text.splitlines() if line.strip()) + lines = split_lines_by_punctuation(text) + text = "\n".join(lines) + + return text + + +def split_lines_by_punctuation(text: str) -> List[str]: + """ + Split text into lines based on punctuation + + :param text: the text to split + :return: the list of lines + """ + + lines = [] + current_line = "" + skip_next = False + + for index, char in enumerate(text): + if skip_next: + skip_next = False + continue + + current_line += char + + if char not in [".", "!", "?"]: + # must match end of sentence punctuation + continue + + # if this is the character for a title, don't split + if any(current_line.endswith(title) for title in NAME_TITLES): + continue + + char_next_1 = text[index + 1] if index + 1 < len(text) else None + char_next_2 = text[index + 2] if index + 2 < len(text) else None + char_next_3 = text[index + 3] if index + 3 < len(text) else None + + next_is_space = char_next_1 and char_next_1.isspace() + next_is_quote_and_space = char_next_1 in ["'", '"'] and char_next_2 == " " + + # next character must be a space or a quote, otherwise skip + if not next_is_space and not next_is_quote_and_space: + continue + + # after this, next character must be an upper case letter + upper_char = char_next_3 if next_is_quote_and_space else char_next_2 + next_is_upper = upper_char and ( + upper_char.isupper() or upper_char in ["'", '"'] + ) + + if not next_is_upper: + continue + + # if next char is a quote, add it and skip next + if next_is_quote_and_space: + current_line += text[index + 1] + skip_next = True + + lines.append(current_line.strip()) + current_line = "" + + if current_line: + lines.append(current_line.strip()) + + return lines + + +def is_url(url: str) -> bool: + """ + Check if a string is a URL + + :param url: the string to check + :return: True if the string is a URL, False if not + """ + try: + result = urlparse(url) + return all([result.scheme, result.netloc]) + except Exception: # noqa: BLE001 + return False + + +def is_path(path: Any) -> bool: + """ + Check if a string is a path + + :param path: the string to check + :return: True if the string is a path, False if not + """ + if not isinstance(path, (str, Path)): + return False + + if isinstance(path, str): + path = Path(path) + + return path.exists() + + +def is_path_like(path: Any, enforce_file: bool = False) -> bool: + """ + Check if a string has a path like structure where it doesn't need to exist + + :param path: the string to check + :param enforce_file: True if the path should be a file, False if not + :return: True if the string is path like, False if not + """ + # if path isn't a str or Path, it's not a path + if not isinstance(path, (str, Path)): + return False + + if isinstance(path, Path): + path = str(path) + + # if text is too long, it's not a path (4096 for most linux setups) + if len(path) > MAX_PATH_LENGTH: + return False + + # if it starts with a URL scheme, it's not a path + if path.startswith(("http", "ftp")): + return False + + test_path = Path(path) + + # if it's supposed to be a file and there's no extension or + # the extension is too long, it's not a path + return not enforce_file or ( + bool(test_path.suffix) and len(test_path.suffix) <= MAX_EXTENSION_LENGTH + ) + + +def split_text(text: str) -> Tuple[List[str], List[str], List[int]]: + """ + Split text into words / tokens, the white space separators between words, + and the indices for each new line + + :param text: the text to split + :return: the words, the white space separators, and the new line indices + """ + if not text or not text.strip(): + return [], [], [] + + text = text.strip() + tokens = [] # type: List[str] + separators = [] # type: List[str] + new_lines = [0] + buffer = text[0] + is_token = not text[0].isspace() + + for char in text[1:]: + char_whitespace = char.isspace() + + if char == "\n": + new_lines.append(len(tokens) + 1) + + if char_whitespace and is_token: + tokens.append(buffer) + buffer = char + is_token = False + elif char_whitespace: + buffer += char + elif not char_whitespace and not is_token: + separators.append(buffer) + buffer = char + is_token = True + else: + buffer += char + + if buffer and is_token: + tokens.append(buffer) + separators.append(" ") + elif buffer: + separators.append(buffer) + + return tokens, separators, new_lines + + +def load_text(data: Union[str, Path], encoding: Optional[str] = None) -> str: + """ + Load an HTML file from a path or URL + + :param data: the path or URL to load the HTML file from + :type data: Union[str, Path] + :param encoding: the encoding to use when reading the file + :type encoding: str + :return: the HTML content + :rtype: str + """ + logger.debug("Loading text: {}", data) + + if not data: + return "" + + # check URLs + if isinstance(data, str) and data.startswith("http"): + response = requests.get(data, timeout=settings.request_timeout) + response.raise_for_status() + return response.text + + # check raw text + if isinstance(data, str) and not is_path_like(data, enforce_file=True): + return data + + # assume local file + if not isinstance(data, Path): + data = Path(data) + + if not data.exists(): + raise FileNotFoundError(f"File not found: {data}") + + if not data.is_file(): + raise IsADirectoryError(f"Path is a directory: {data}") + + return data.read_text(encoding=encoding) + + +def parse_text_objects(data: str, format_: str = "txt") -> List[Dict]: + """ + Parse text data into a list of dictionaries based on the format given + (csv, jsonl, json, yaml, txt). + + :param data: the text data to parse + :param format_: the format of the data to parse: + 'csv', 'jsonl', 'json', 'yaml', 'txt' + :return: the list of dictionaries parsed from the data, if text + then each line is a dictionary with a single key 'text' + """ + if not isinstance(data, str): + raise ValueError(f"Unsupported data given of type: {type(data)}") + + if format_ == "csv": + reader = csv.DictReader(data.splitlines()) + columns = reader.fieldnames + return [{col: row[col] for col in columns} for row in reader] # type: ignore # noqa: PGH003 + + if format_ == "jsonl": + return [json.loads(line) for line in data.splitlines() if line] + + if format_ in ("json", "yaml"): + data = json.loads(data) if format_ == "json" else yaml.safe_load(data) + + if not data: + return [] + + if isinstance(data, dict) and len(data) == 1: + logger.debug("Getting first value from JSON/YAML object: {}", data) + data = list(data.values())[0] + elif isinstance(data, dict): + logger.debug("Converting JSON/YAML object to list: {}", data) + data = list(data.values()) + + if not isinstance(data, list) or not isinstance(data[0], dict): + raise ValueError(f"Unsupported data structure given: {data}") + + return data + + if format_ == "txt": + return [{"text": line} for line in data.splitlines() if line] + + raise ValueError(f"Unsupported format given: {format_}") + + +def load_text_lines( + data: Union[str, Path, List[Dict]], + format_: Optional[str] = None, + filters: Optional[List[str]] = None, + encoding: Optional[str] = None, +) -> List[str]: + """ + Load text lines from a file or data object with optional filtering and formatting. + + + :param data: the data to load the text lines from + :param format_: the format of the data to load, if not provided will be inferred. + Supported formats: 'csv', 'jsonl', 'json', 'yaml', 'txt' + :param filters: the keys to filter the data by when loading in order of preference. + If not provided, will use the first key in the data object. + :param encoding: the encoding to use when reading the file + :return: the list of text lines + """ + logger.debug( + "Loading text lines with format {}, filters {}, encoding {} for data: {}", + format_, + filters, + encoding, + data, + ) + + if not data: + return [] + + if not format_ and isinstance(data, (str, Path)) and "." in str(data): + extension = str(data).split(".")[-1] + format_ = EXTENSION_TYPES.get(extension, "txt") + elif not format_: + format_ = "txt" + + # load the data if it's a path or URL + if isinstance(data, Path) or (isinstance(data, str) and data.startswith("http")): + data = load_text(data, encoding=encoding) + data = clean_text(data) + + # parse the data into a list of dictionaries based on the format + if isinstance(data, str): + data = parse_text_objects(data, format_) + + if not isinstance(data, list): + raise ValueError(f"Unsupported data given of type: {type(data)}") + + if not isinstance(data[0], dict): + raise ValueError(f"Unsupported data item type given: {type(data[0])}") + + # grab the first available filter key to use if preference order as provided + filter_ = list(data[0].keys())[0] + for filt in filters or []: + if filt not in data[0]: + continue + + filter_ = filt + break + + # extract the lines from the data + return [row[filter_] for row in data] if filter_ else [str(row) for row in data] diff --git a/src/guidellm/utils/transformers.py b/src/guidellm/utils/transformers.py new file mode 100644 index 0000000..5405729 --- /dev/null +++ b/src/guidellm/utils/transformers.py @@ -0,0 +1,151 @@ +from pathlib import Path +from typing import List, Optional, Union + +from datasets import ( # type: ignore # noqa: PGH003 + Dataset, + DatasetDict, + IterableDataset, + IterableDatasetDict, + load_dataset, +) +from loguru import logger + +from guidellm.config import settings + +__all__ = [ + "load_transformers_dataset", + "resolve_transformers_dataset", + "resolve_transformers_dataset_column", + "resolve_transformers_dataset_split", +] + + +def load_transformers_dataset( + dataset: Union[ + str, Path, DatasetDict, Dataset, IterableDatasetDict, IterableDataset + ], + split: Optional[str] = None, + preferred_splits: Optional[List[str]] = settings.dataset.preferred_data_splits, + **kwargs, +) -> Union[Dataset, IterableDataset]: + """ + Load a dataset from a file or a script and resolve the preferred split. + + :param dataset: the dataset file or script to load + :param split: the dataset split to use + (overrides preferred_splits, must be in dataset) + :param preferred_splits: the preferred dataset splits to use + :param kwargs: additional keyword arguments to pass to the dataset loader + :return: the loaded dataset + """ + dataset = resolve_transformers_dataset(dataset, **kwargs) + + return resolve_transformers_dataset_split(dataset, split, preferred_splits) + + +def resolve_transformers_dataset( + dataset: Union[ + str, Path, DatasetDict, Dataset, IterableDatasetDict, IterableDataset + ], + **kwargs, +) -> Union[DatasetDict, Dataset, IterableDatasetDict, IterableDataset]: + """ + Resolve the dataset from a file (csv, json, script) or a dataset name. + + :param dataset: the dataset file or script to load + :param kwargs: additional keyword arguments to pass to the dataset loader + :return: the loaded dataset + """ + if isinstance( + dataset, (DatasetDict, Dataset, IterableDatasetDict, IterableDataset) + ): + return dataset + + if not isinstance(dataset, (str, Path)): + raise ValueError(f"Invalid dataset type: {type(dataset)}") + + dataset = str(dataset) + + if dataset.endswith((".csv", ".json")): + logger.debug("Loading dataset from local path: {}", dataset) + extension = dataset.split(".")[-1] + + return load_dataset(extension, data_files=dataset, **kwargs) + + if dataset.endswith(".py"): + logger.debug("Loading dataset from local script: {}", dataset) + + return load_dataset(dataset, **kwargs) + + logger.debug("Loading dataset: {}", dataset) + + return load_dataset(dataset, **kwargs) + + +def resolve_transformers_dataset_split( + dataset: Union[DatasetDict, Dataset, IterableDatasetDict, IterableDataset], + split: Optional[str] = None, + preferred_splits: Optional[List[str]] = settings.dataset.preferred_data_splits, +) -> Union[Dataset, IterableDataset]: + """ + Resolve the preferred split from a dataset dictionary. + + :param dataset: the dataset to resolve the split from + :param split: the dataset split to use + (overrides preferred_splits, must be in dataset) + :param preferred_splits: the preferred dataset splits to use + :return: the resolved dataset split + """ + if not isinstance(dataset, (DatasetDict, IterableDatasetDict)): + logger.debug("Dataset is not a dictionary, using default split") + return dataset + + if split: + if split not in dataset: + raise ValueError(f"Split '{split}' not found in dataset") + + return dataset[split] + + if preferred_splits: + for spl in preferred_splits: + if spl not in dataset: + continue + return dataset[spl] + + return list(dataset.values())[0] + + +def resolve_transformers_dataset_column( + dataset: Union[Dataset, IterableDataset], + column: Optional[str] = None, + preferred_columns: Optional[List[str]] = settings.dataset.preferred_data_columns, +) -> str: + """ + Resolve the preferred column from a dataset. + + :param dataset: the dataset to resolve the column from + :param column: the dataset column to use + (overrides preferred_columns, must be in dataset) + :param preferred_columns: the preferred dataset columns to use + :return: the resolved dataset column + """ + column_names = dataset.column_names + + if not column_names: + # grab from the first item + first_item = next(iter(dataset)) + column_names = list(first_item.keys()) + + if column: + if column not in column_names: + raise ValueError(f"Column '{column}' not found in dataset") + + return column + + if preferred_columns: + for col in preferred_columns: + if col not in column_names: + continue + return col + + return list(column_names)[0] diff --git a/tests/conftest.py b/tests/conftest.py deleted file mode 100644 index 3fbe8a6..0000000 --- a/tests/conftest.py +++ /dev/null @@ -1,31 +0,0 @@ -from typing import Callable, Optional - -import pytest -from guidellm.backend import Backend, BackendEngine, OpenAIBackend -from guidellm.config import settings -from loguru import logger - - -def pytest_configure() -> None: - logger.disable("guidellm") - - -@pytest.fixture() -def openai_backend_factory() -> Callable[..., OpenAIBackend]: - """ - OpenAI Backend factory method. - Call without provided arguments returns default Backend service. - """ - - def inner_wrapper(*_, base_url: Optional[str] = None, **kwargs) -> OpenAIBackend: - defaults = { - "backend_type": BackendEngine.OPENAI_SERVER, - "openai_api_key": "required but not used", - "target": base_url or settings.openai.base_url, - } - - defaults.update(kwargs) - - return Backend.create(**defaults) # type: ignore - - return inner_wrapper diff --git a/tests/dummy/data/pride_and_prejudice.txt b/tests/dummy/data/pride_and_prejudice.txt new file mode 100644 index 0000000..3b93b50 --- /dev/null +++ b/tests/dummy/data/pride_and_prejudice.txt @@ -0,0 +1,2015 @@ +*** START OF THE PROJECT GUTENBERG EBOOK 1342 *** + + PAGE + +Frontispiece iv + +Title-page v + +Dedication vii + +Heading to Preface ix + +Heading to List of Illustrations xxv + +Heading to Chapter I. 1 + +“He came down to see the place” 2 + +Mr. and Mrs. Bennet 5 + +“I hope Mr. Bingley will like it” 6 + +“I’m the tallest” 9 + +“He rode a black horse” 10 + +“When the party entered” 12 + +“She is tolerable” 15 + +Heading to Chapter IV. 18 + +Heading to Chapter V. 22 + +“Without once opening his lips” 24 + +Tailpiece to Chapter V. 26 + +Heading to Chapter VI. 27 + +“The entreaties of several” 31 + +“A note for Miss Bennet” 36 + +“Cheerful prognostics” 40 + +“The apothecary came” 43 + +“Covering a screen” 45 + +“Mrs. Bennet and her two youngest girls” 53 + +Heading to Chapter X. 60 + +“No, no; stay where you are” 67 + +“Piling up the fire” 69 + +Heading to Chapter XII. 75 + +Heading to Chapter XIII. 78 + +Heading to Chapter XIV. 84 + +“Protested that he never read novels” 87 + +Heading to Chapter XV. 89 + +Heading to Chapter XVI. 95 + +“The officers of the ----shire” 97 + +“Delighted to see their dear friend again” 108 + +Heading to Chapter XVIII. 113 + +“Such very superior dancing is not often seen” 118 + +“To assure you in the most animated language” 132 + +Heading to Chapter XX. 139 + +“They entered the breakfast-room” 143 + +Heading to Chapter XXI. 146 + +“Walked back with them” 148 + +Heading to Chapter XXII. 154 + +“So much love and eloquence” 156 + +“Protested he must be entirely mistaken” 161 + +“Whenever she spoke in a low voice” 166 + +Heading to Chapter XXIV. 168 + +Heading to Chapter XXV. 175 + +“Offended two or three young ladies” 177 + +“Will you come and see me?” 181 + +“On the stairs” 189 + +“At the door” 194 + +“In conversation with the ladies” 198 + +“Lady Catherine,” said she, “you have given me a treasure” 200 + +Heading to Chapter XXX. 209 + +“He never failed to inform them” 211 + +“The gentlemen accompanied him” 213 + +Heading to Chapter XXXI. 215 + +Heading to Chapter XXXII. 221 + +“Accompanied by their aunt” 225 + +“On looking up” 228 + +Heading to Chapter XXXIV. 235 + +“Hearing herself called” 243 + +Heading to Chapter XXXVI. 253 + +“Meeting accidentally in town” 256 + +“His parting obeisance” 261 + +“Dawson” 263 + +“The elevation of his feelings” 267 + +“They had forgotten to leave any message” 270 + +“How nicely we are crammed in!” 272 + +Heading to Chapter XL. 278 + +“I am determined never to speak of it again” 283 + +“When Colonel Miller’s regiment went away” 285 + +“Tenderly flirting” 290 + +The arrival of the Gardiners 294 + +“Conjecturing as to the date” 301 + +Heading to Chapter XLIV. 318 + +“To make herself agreeable to all” 321 + +“Engaged by the river” 327 + +Heading to Chapter XLVI. 334 + +“I have not an instant to lose” 339 + +“The first pleasing earnest of their welcome” 345 + +The Post 359 + +“To whom I have related the affair” 363 + +Heading to Chapter XLIX. 368 + +“But perhaps you would like to read it” 370 + +“The spiteful old ladies” 377 + +“With an affectionate smile” 385 + +“I am sure she did not listen” 393 + +“Mr. Darcy with him” 404 + +“Jane happened to look round” 415 + +“Mrs. Long and her nieces” 420 + +“Lizzy, my dear, I want to speak to you” 422 + +Heading to Chapter LVI. 431 + +“After a short survey” 434 + +“But now it comes out” 442 + +“The efforts of his aunt” 448 + +“Unable to utter a syllable” 457 + +“The obsequious civility” 466 + +Heading to Chapter LXI. 472 + +The End 476 + + + + +[Illustration: ·PRIDE AND PREJUDICE· + + + + +Chapter I.] + + +It is a truth universally acknowledged, that a single man in possession +of a good fortune must be in want of a wife. + +However little known the feelings or views of such a man may be on his +first entering a neighbourhood, this truth is so well fixed in the minds +of the surrounding families, that he is considered as the rightful +property of some one or other of their daughters. + +“My dear Mr. Bennet,” said his lady to him one day, “have you heard that +Netherfield Park is let at last?” + +Mr. Bennet replied that he had not. + +“But it is,” returned she; “for Mrs. Long has just been here, and she +told me all about it.” + +Mr. Bennet made no answer. + +“Do not you want to know who has taken it?” cried his wife, impatiently. + +“_You_ want to tell me, and I have no objection to hearing it.” + +[Illustration: + +“He came down to see the place” + +[_Copyright 1894 by George Allen._]] + +This was invitation enough. + +“Why, my dear, you must know, Mrs. Long says that Netherfield is taken +by a young man of large fortune from the north of England; that he came +down on Monday in a chaise and four to see the place, and was so much +delighted with it that he agreed with Mr. Morris immediately; that he is +to take possession before Michaelmas, and some of his servants are to be +in the house by the end of next week.” + +“What is his name?” + +“Bingley.” + +“Is he married or single?” + +“Oh, single, my dear, to be sure! A single man of large fortune; four or +five thousand a year. What a fine thing for our girls!” + +“How so? how can it affect them?” + +“My dear Mr. Bennet,” replied his wife, “how can you be so tiresome? You +must know that I am thinking of his marrying one of them.” + +“Is that his design in settling here?” + +“Design? Nonsense, how can you talk so! But it is very likely that he +_may_ fall in love with one of them, and therefore you must visit him as +soon as he comes.” + +“I see no occasion for that. You and the girls may go--or you may send +them by themselves, which perhaps will be still better; for as you are +as handsome as any of them, Mr. Bingley might like you the best of the +party.” + +“My dear, you flatter me. I certainly _have_ had my share of beauty, but +I do not pretend to be anything extraordinary now. When a woman has five +grown-up daughters, she ought to give over thinking of her own beauty.” + +“In such cases, a woman has not often much beauty to think of.” + +“But, my dear, you must indeed go and see Mr. Bingley when he comes into +the neighbourhood.” + +“It is more than I engage for, I assure you.” + +“But consider your daughters. Only think what an establishment it would +be for one of them. Sir William and Lady Lucas are determined to go, +merely on that account; for in general, you know, they visit no new +comers. Indeed you must go, for it will be impossible for _us_ to visit +him, if you do not.” + +“You are over scrupulous, surely. I dare say Mr. Bingley will be very +glad to see you; and I will send a few lines by you to assure him of my +hearty consent to his marrying whichever he chooses of the girls--though +I must throw in a good word for my little Lizzy.” + +“I desire you will do no such thing. Lizzy is not a bit better than the +others: and I am sure she is not half so handsome as Jane, nor half so +good-humoured as Lydia. But you are always giving _her_ the preference.” + +“They have none of them much to recommend them,” replied he: “they are +all silly and ignorant like other girls; but Lizzy has something more of +quickness than her sisters.” + +“Mr. Bennet, how can you abuse your own children in such a way? You take +delight in vexing me. You have no compassion on my poor nerves.” + +“You mistake me, my dear. I have a high respect for your nerves. They +are my old friends. I have heard you mention them with consideration +these twenty years at least.” + +“Ah, you do not know what I suffer.” + +“But I hope you will get over it, and live to see many young men of four +thousand a year come into the neighbourhood.” + +“It will be no use to us, if twenty such should come, since you will not +visit them.” + +“Depend upon it, my dear, that when there are twenty, I will visit them +all.” + +Mr. Bennet was so odd a mixture of quick parts, sarcastic humour, +reserve, and caprice, that the experience of three-and-twenty years had +been insufficient to make his wife understand his character. _Her_ mind +was less difficult to develope. She was a woman of mean understanding, +little information, and uncertain temper. When she was discontented, she +fancied herself nervous. The business of her life was to get her +daughters married: its solace was visiting and news. + +[Illustration: M^{r.} & M^{rs.} Bennet + +[_Copyright 1894 by George Allen._]] + + + + +[Illustration: + +“I hope Mr. Bingley will like it” + +[_Copyright 1894 by George Allen._]] + + + + +CHAPTER II. + + +[Illustration] + +Mr. Bennet was among the earliest of those who waited on Mr. Bingley. He +had always intended to visit him, though to the last always assuring his +wife that he should not go; and till the evening after the visit was +paid she had no knowledge of it. It was then disclosed in the following +manner. Observing his second daughter employed in trimming a hat, he +suddenly addressed her with,-- + +“I hope Mr. Bingley will like it, Lizzy.” + +“We are not in a way to know _what_ Mr. Bingley likes,” said her mother, +resentfully, “since we are not to visit.” + +“But you forget, mamma,” said Elizabeth, “that we shall meet him at the +assemblies, and that Mrs. Long has promised to introduce him.” + +“I do not believe Mrs. Long will do any such thing. She has two nieces +of her own. She is a selfish, hypocritical woman, and I have no opinion +of her.” + +“No more have I,” said Mr. Bennet; “and I am glad to find that you do +not depend on her serving you.” + +Mrs. Bennet deigned not to make any reply; but, unable to contain +herself, began scolding one of her daughters. + +“Don’t keep coughing so, Kitty, for heaven’s sake! Have a little +compassion on my nerves. You tear them to pieces.” + +“Kitty has no discretion in her coughs,” said her father; “she times +them ill.” + +“I do not cough for my own amusement,” replied Kitty, fretfully. “When +is your next ball to be, Lizzy?” + +“To-morrow fortnight.” + +“Ay, so it is,” cried her mother, “and Mrs. Long does not come back till +the day before; so, it will be impossible for her to introduce him, for +she will not know him herself.” + +“Then, my dear, you may have the advantage of your friend, and introduce +Mr. Bingley to _her_.” + +“Impossible, Mr. Bennet, impossible, when I am not acquainted with him +myself; how can you be so teasing?” + +“I honour your circumspection. A fortnight’s acquaintance is certainly +very little. One cannot know what a man really is by the end of a +fortnight. But if _we_ do not venture, somebody else will; and after +all, Mrs. Long and her nieces must stand their chance; and, therefore, +as she will think it an act of kindness, if you decline the office, I +will take it on myself.” + +The girls stared at their father. Mrs. Bennet said only, “Nonsense, +nonsense!” + +“What can be the meaning of that emphatic exclamation?” cried he. “Do +you consider the forms of introduction, and the stress that is laid on +them, as nonsense? I cannot quite agree with you _there_. What say you, +Mary? For you are a young lady of deep reflection, I know, and read +great books, and make extracts.” + +Mary wished to say something very sensible, but knew not how. + +“While Mary is adjusting her ideas,” he continued, “let us return to Mr. +Bingley.” + +“I am sick of Mr. Bingley,” cried his wife. + +“I am sorry to hear _that_; but why did you not tell me so before? If I +had known as much this morning, I certainly would not have called on +him. It is very unlucky; but as I have actually paid the visit, we +cannot escape the acquaintance now.” + +The astonishment of the ladies was just what he wished--that of Mrs. +Bennet perhaps surpassing the rest; though when the first tumult of joy +was over, she began to declare that it was what she had expected all the +while. + +“How good it was in you, my dear Mr. Bennet! But I knew I should +persuade you at last. I was sure you loved your girls too well to +neglect such an acquaintance. Well, how pleased I am! And it is such a +good joke, too, that you should have gone this morning, and never said a +word about it till now.” + +“Now, Kitty, you may cough as much as you choose,” said Mr. Bennet; and, +as he spoke, he left the room, fatigued with the raptures of his wife. + +“What an excellent father you have, girls,” said she, when the door was +shut. “I do not know how you will ever make him amends for his kindness; +or me either, for that matter. At our time of life, it is not so +pleasant, I can tell you, to be making new acquaintances every day; but +for your sakes we would do anything. Lydia, my love, though you _are_ +the youngest, I dare say Mr. Bingley will dance with you at the next +ball.” + +“Oh,” said Lydia, stoutly, “I am not afraid; for though I _am_ the +youngest, I’m the tallest.” + +The rest of the evening was spent in conjecturing how soon he would +return Mr. Bennet’s visit, and determining when they should ask him to +dinner. + +[Illustration: “I’m the tallest”] + + + + +[Illustration: + + “He rode a black horse” +] + + + + +CHAPTER III. + + +[Illustration] + +Not all that Mrs. Bennet, however, with the assistance of her five +daughters, could ask on the subject, was sufficient to draw from her +husband any satisfactory description of Mr. Bingley. They attacked him +in various ways, with barefaced questions, ingenious suppositions, and +distant surmises; but he eluded the skill of them all; and they were at +last obliged to accept the second-hand intelligence of their neighbour, +Lady Lucas. Her report was highly favourable. Sir William had been +delighted with him. He was quite young, wonderfully handsome, extremely +agreeable, and, to crown the whole, he meant to be at the next assembly +with a large party. Nothing could be more delightful! To be fond of +dancing was a certain step towards falling in love; and very lively +hopes of Mr. Bingley’s heart were entertained. + +“If I can but see one of my daughters happily settled at Netherfield,” +said Mrs. Bennet to her husband, “and all the others equally well +married, I shall have nothing to wish for.” + +In a few days Mr. Bingley returned Mr. Bennet’s visit, and sat about ten +minutes with him in his library. He had entertained hopes of being +admitted to a sight of the young ladies, of whose beauty he had heard +much; but he saw only the father. The ladies were somewhat more +fortunate, for they had the advantage of ascertaining, from an upper +window, that he wore a blue coat and rode a black horse. + +An invitation to dinner was soon afterwards despatched; and already had +Mrs. Bennet planned the courses that were to do credit to her +housekeeping, when an answer arrived which deferred it all. Mr. Bingley +was obliged to be in town the following day, and consequently unable to +accept the honour of their invitation, etc. Mrs. Bennet was quite +disconcerted. She could not imagine what business he could have in town +so soon after his arrival in Hertfordshire; and she began to fear that +he might always be flying about from one place to another, and never +settled at Netherfield as he ought to be. Lady Lucas quieted her fears a +little by starting the idea of his + +[Illustration: + + “When the Party entered” + +[_Copyright 1894 by George Allen._]] + +being gone to London only to get a large party for the ball; and a +report soon followed that Mr. Bingley was to bring twelve ladies and +seven gentlemen with him to the assembly. The girls grieved over such a +number of ladies; but were comforted the day before the ball by hearing +that, instead of twelve, he had brought only six with him from London, +his five sisters and a cousin. And when the party entered the +assembly-room, it consisted of only five altogether: Mr. Bingley, his +two sisters, the husband of the eldest, and another young man. + +Mr. Bingley was good-looking and gentlemanlike: he had a pleasant +countenance, and easy, unaffected manners. His sisters were fine women, +with an air of decided fashion. His brother-in-law, Mr. Hurst, merely +looked the gentleman; but his friend Mr. Darcy soon drew the attention +of the room by his fine, tall person, handsome features, noble mien, and +the report, which was in general circulation within five minutes after +his entrance, of his having ten thousand a year. The gentlemen +pronounced him to be a fine figure of a man, the ladies declared he was +much handsomer than Mr. Bingley, and he was looked at with great +admiration for about half the evening, till his manners gave a disgust +which turned the tide of his popularity; for he was discovered to be +proud, to be above his company, and above being pleased; and not all his +large estate in Derbyshire could save him from having a most forbidding, +disagreeable countenance, and being unworthy to be compared with his +friend. + +Mr. Bingley had soon made himself acquainted with all the principal +people in the room: he was lively and unreserved, danced every dance, +was angry that the ball closed so early, and talked of giving one +himself at Netherfield. Such amiable qualities must speak for +themselves. What a contrast between him and his friend! Mr. Darcy danced +only once with Mrs. Hurst and once with Miss Bingley, declined being +introduced to any other lady, and spent the rest of the evening in +walking about the room, speaking occasionally to one of his own party. +His character was decided. He was the proudest, most disagreeable man in +the world, and everybody hoped that he would never come there again. +Amongst the most violent against him was Mrs. Bennet, whose dislike of +his general behaviour was sharpened into particular resentment by his +having slighted one of her daughters. + +Elizabeth Bennet had been obliged, by the scarcity of gentlemen, to sit +down for two dances; and during part of that time, Mr. Darcy had been +standing near enough for her to overhear a conversation between him and +Mr. Bingley, who came from the dance for a few minutes to press his +friend to join it. + +“Come, Darcy,” said he, “I must have you dance. I hate to see you +standing about by yourself in this stupid manner. You had much better +dance.” + +“I certainly shall not. You know how I detest it, unless I am +particularly acquainted with my partner. At such an assembly as this, it +would be insupportable. Your sisters are engaged, and there is not +another woman in the room whom it would not be a punishment to me to +stand up with.” + +“I would not be so fastidious as you are,” cried Bingley, “for a +kingdom! Upon my honour, I never met with so many pleasant girls in my +life as I have this evening; and there are several of them, you see, +uncommonly pretty.” + +“_You_ are dancing with the only handsome girl in the room,” said Mr. +Darcy, looking at the eldest Miss Bennet. + +“Oh, she is the most beautiful creature I ever beheld! But there is one +of her sisters sitting down just behind you, who is very pretty, and I +dare say very agreeable. Do let me ask my partner to introduce you.” + +[Illustration: + +“She is tolerable” + +[_Copyright 1894 by George Allen._]] + +“Which do you mean?” and turning round, he looked for a moment at +Elizabeth, till, catching her eye, he withdrew his own, and coldly said, +“She is tolerable: but not handsome enough to tempt _me_; and I am in no +humour at present to give consequence to young ladies who are slighted +by other men. You had better return to your partner and enjoy her +smiles, for you are wasting your time with me.” + +Mr. Bingley followed his advice. Mr. Darcy walked off; and Elizabeth +remained with no very cordial feelings towards him. She told the story, +however, with great spirit among her friends; for she had a lively, +playful disposition, which delighted in anything ridiculous. + +The evening altogether passed off pleasantly to the whole family. Mrs. +Bennet had seen her eldest daughter much admired by the Netherfield +party. Mr. Bingley had danced with her twice, and she had been +distinguished by his sisters. Jane was as much gratified by this as her +mother could be, though in a quieter way. Elizabeth felt Jane’s +pleasure. Mary had heard herself mentioned to Miss Bingley as the most +accomplished girl in the neighbourhood; and Catherine and Lydia had been +fortunate enough to be never without partners, which was all that they +had yet learnt to care for at a ball. They returned, therefore, in good +spirits to Longbourn, the village where they lived, and of which they +were the principal inhabitants. They found Mr. Bennet still up. With a +book, he was regardless of time; and on the present occasion he had a +good deal of curiosity as to the event of an evening which had raised +such splendid expectations. He had rather hoped that all his wife’s +views on the stranger would be disappointed; but he soon found that he +had a very different story to hear. + +“Oh, my dear Mr. Bennet,” as she entered the room, “we have had a most +delightful evening, a most excellent ball. I wish you had been there. +Jane was so admired, nothing could be like it. Everybody said how well +she looked; and Mr. Bingley thought her quite beautiful, and danced with +her twice. Only think of _that_, my dear: he actually danced with her +twice; and she was the only creature in the room that he asked a second +time. First of all, he asked Miss Lucas. I was so vexed to see him stand +up with her; but, however, he did not admire her at all; indeed, nobody +can, you know; and he seemed quite struck with Jane as she was going +down the dance. So he inquired who she was, and got introduced, and +asked her for the two next. Then, the two third he danced with Miss +King, and the two fourth with Maria Lucas, and the two fifth with Jane +again, and the two sixth with Lizzy, and the _Boulanger_----” + +“If he had had any compassion for _me_,” cried her husband impatiently, +“he would not have danced half so much! For God’s sake, say no more of +his partners. O that he had sprained his ancle in the first dance!” + +“Oh, my dear,” continued Mrs. Bennet, “I am quite delighted with him. He +is so excessively handsome! and his sisters are charming women. I never +in my life saw anything more elegant than their dresses. I dare say the +lace upon Mrs. Hurst’s gown----” + +Here she was interrupted again. Mr. Bennet protested against any +description of finery. She was therefore obliged to seek another branch +of the subject, and related, with much bitterness of spirit, and some +exaggeration, the shocking rudeness of Mr. Darcy. + +“But I can assure you,” she added, “that Lizzy does not lose much by not +suiting _his_ fancy; for he is a most disagreeable, horrid man, not at +all worth pleasing. So high and so conceited, that there was no enduring +him! He walked here, and he walked there, fancying himself so very +great! Not handsome enough to dance with! I wish you had been there, my +dear, to have given him one of your set-downs. I quite detest the man.” + + + + +[Illustration] + + + + +CHAPTER IV. + + +[Illustration] + +When Jane and Elizabeth were alone, the former, who had been cautious in +her praise of Mr. Bingley before, expressed to her sister how very much +she admired him. + +“He is just what a young-man ought to be,” said she, “sensible, +good-humoured, lively; and I never saw such happy manners! so much ease, +with such perfect good breeding!” + +“He is also handsome,” replied Elizabeth, “which a young man ought +likewise to be if he possibly can. His character is thereby complete.” + +“I was very much flattered by his asking me to dance a second time. I +did not expect such a compliment.” + +“Did not you? _I_ did for you. But that is one great difference between +us. Compliments always take _you_ by surprise, and _me_ never. What +could be more natural than his asking you again? He could not help +seeing that you were about five times as pretty as every other woman in +the room. No thanks to his gallantry for that. Well, he certainly is +very agreeable, and I give you leave to like him. You have liked many a +stupider person.” + +“Dear Lizzy!” + +“Oh, you are a great deal too apt, you know, to like people in general. +You never see a fault in anybody. All the world are good and agreeable +in your eyes. I never heard you speak ill of a human being in my life.” + +“I would wish not to be hasty in censuring anyone; but I always speak +what I think.” + +“I know you do: and it is _that_ which makes the wonder. With _your_ +good sense, to be so honestly blind to the follies and nonsense of +others! Affectation of candour is common enough; one meets with it +everywhere. But to be candid without ostentation or design,--to take the +good of everybody’s character and make it still better, and say nothing +of the bad,--belongs to you alone. And so, you like this man’s sisters, +too, do you? Their manners are not equal to his.” + +“Certainly not, at first; but they are very pleasing women when you +converse with them. Miss Bingley is to live with her brother, and keep +his house; and I am much mistaken if we shall not find a very charming +neighbour in her.” + +Elizabeth listened in silence, but was not convinced: their behaviour at +the assembly had not been calculated to please in general; and with more +quickness of observation and less pliancy of temper than her sister, and +with a judgment, too, unassailed by any attention to herself, she was +very little disposed to approve them. They were, in fact, very fine +ladies; not deficient in good-humour when they were pleased, nor in the +power of being agreeable where they chose it; but proud and conceited. +They were rather handsome; had been educated in one of the first private +seminaries in town; had a fortune of twenty thousand pounds; were in the +habit of spending more than they ought, and of associating with people +of rank; and were, therefore, in every respect entitled to think well of +themselves and meanly of others. They were of a respectable family in +the north of England; a circumstance more deeply impressed on their +memories than that their brother’s fortune and their own had been +acquired by trade. + +Mr. Bingley inherited property to the amount of nearly a hundred +thousand pounds from his father, who had intended to purchase an estate, +but did not live to do it. Mr. Bingley intended it likewise, and +sometimes made choice of his county; but, as he was now provided with a +good house and the liberty of a manor, it was doubtful to many of those +who best knew the easiness of his temper, whether he might not spend the +remainder of his days at Netherfield, and leave the next generation to +purchase. + +His sisters were very anxious for his having an estate of his own; but +though he was now established only as a tenant, Miss Bingley was by no +means unwilling to preside at his table; nor was Mrs. Hurst, who had +married a man of more fashion than fortune, less disposed to consider +his house as her home when it suited her. Mr. Bingley had not been of +age two years when he was tempted, by an accidental recommendation, to +look at Netherfield House. He did look at it, and into it, for half an +hour; was pleased with the situation and the principal rooms, satisfied +with what the owner said in its praise, and took it immediately. + +Between him and Darcy there was a very steady friendship, in spite of a +great opposition of character. Bingley was endeared to Darcy by the +easiness, openness, and ductility of his temper, though no disposition +could offer a greater contrast to his own, and though with his own he +never appeared dissatisfied. On the strength of Darcy’s regard, Bingley +had the firmest reliance, and of his judgment the highest opinion. In +understanding, Darcy was the superior. Bingley was by no means +deficient; but Darcy was clever. He was at the same time haughty, +reserved, and fastidious; and his manners, though well bred, were not +inviting. In that respect his friend had greatly the advantage. Bingley +was sure of being liked wherever he appeared; Darcy was continually +giving offence. + +The manner in which they spoke of the Meryton assembly was sufficiently +characteristic. Bingley had never met with pleasanter people or prettier +girls in his life; everybody had been most kind and attentive to him; +there had been no formality, no stiffness; he had soon felt acquainted +with all the room; and as to Miss Bennet, he could not conceive an angel +more beautiful. Darcy, on the contrary, had seen a collection of people +in whom there was little beauty and no fashion, for none of whom he had +felt the smallest interest, and from none received either attention or +pleasure. Miss Bennet he acknowledged to be pretty; but she smiled too +much. + +Mrs. Hurst and her sister allowed it to be so; but still they admired +her and liked her, and pronounced her to be a sweet girl, and one whom +they should not object to know more of. Miss Bennet was therefore +established as a sweet girl; and their brother felt authorized by such +commendation to think of her as he chose. + + + + +[Illustration: [_Copyright 1894 by George Allen._]] + + + + +CHAPTER V. + + +[Illustration] + +Within a short walk of Longbourn lived a family with whom the Bennets +were particularly intimate. Sir William Lucas had been formerly in trade +in Meryton, where he had made a tolerable fortune, and risen to the +honour of knighthood by an address to the king during his mayoralty. The +distinction had, perhaps, been felt too strongly. It had given him a +disgust to his business and to his residence in a small market town; +and, quitting them both, he had removed with his family to a house about +a mile from Meryton, denominated from that period Lucas Lodge; where he +could think with pleasure of his own importance, and, unshackled by +business, occupy himself solely in being civil to all the world. For, +though elated by his rank, it did not render him supercilious; on the +contrary, he was all attention to everybody. By nature inoffensive, +friendly, and obliging, his presentation at St. James’s had made him +courteous. + +Lady Lucas was a very good kind of woman, not too clever to be a +valuable neighbour to Mrs. Bennet. They had several children. The eldest +of them, a sensible, intelligent young woman, about twenty-seven, was +Elizabeth’s intimate friend. + +That the Miss Lucases and the Miss Bennets should meet to talk over a +ball was absolutely necessary; and the morning after the assembly +brought the former to Longbourn to hear and to communicate. + +“_You_ began the evening well, Charlotte,” said Mrs. Bennet, with civil +self-command, to Miss Lucas. “_You_ were Mr. Bingley’s first choice.” + +“Yes; but he seemed to like his second better.” + +“Oh, you mean Jane, I suppose, because he danced with her twice. To be +sure that _did_ seem as if he admired her--indeed, I rather believe he +_did_--I heard something about it--but I hardly know what--something +about Mr. Robinson.” + +“Perhaps you mean what I overheard between him and Mr. Robinson: did not +I mention it to you? Mr. Robinson’s asking him how he liked our Meryton +assemblies, and whether he did not think there were a great many pretty +women in the room, and _which_ he thought the prettiest? and his +answering immediately to the last question, ‘Oh, the eldest Miss Bennet, +beyond a doubt: there cannot be two opinions on that point.’” + +“Upon my word! Well, that was very decided, indeed--that does seem as +if--but, however, it may all come to nothing, you know.” + +“_My_ overhearings were more to the purpose than _yours_, Eliza,” said +Charlotte. “Mr. Darcy is not so well worth listening to as his friend, +is he? Poor Eliza! to be only just _tolerable_.” + +“I beg you will not put it into Lizzy’s head to be vexed by his +ill-treatment, for he is such a disagreeable man that it would be quite +a misfortune to be liked by him. Mrs. Long told me last night that he +sat close to her for half an hour without once opening his lips.” + +[Illustration: “Without once opening his lips” + +[_Copyright 1894 by George Allen._]] + +“Are you quite sure, ma’am? Is not there a little mistake?” said Jane. +“I certainly saw Mr. Darcy speaking to her.” + +“Ay, because she asked him at last how he liked Netherfield, and he +could not help answering her; but she said he seemed very angry at being +spoke to.” + +“Miss Bingley told me,” said Jane, “that he never speaks much unless +among his intimate acquaintance. With _them_ he is remarkably +agreeable.” + +“I do not believe a word of it, my dear. If he had been so very +agreeable, he would have talked to Mrs. Long. But I can guess how it +was; everybody says that he is eat up with pride, and I dare say he had +heard somehow that Mrs. Long does not keep a carriage, and had to come +to the ball in a hack chaise.” + +“I do not mind his not talking to Mrs. Long,” said Miss Lucas, “but I +wish he had danced with Eliza.” + +“Another time, Lizzy,” said her mother, “I would not dance with _him_, +if I were you.” + +“I believe, ma’am, I may safely promise you _never_ to dance with him.” + +“His pride,” said Miss Lucas, “does not offend _me_ so much as pride +often does, because there is an excuse for it. One cannot wonder that so +very fine a young man, with family, fortune, everything in his favour, +should think highly of himself. If I may so express it, he has a _right_ +to be proud.” + +“That is very true,” replied Elizabeth, “and I could easily forgive +_his_ pride, if he had not mortified _mine_.” + +“Pride,” observed Mary, who piqued herself upon the solidity of her +reflections, “is a very common failing, I believe. By all that I have +ever read, I am convinced that it is very common indeed; that human +nature is particularly prone to it, and that there are very few of us +who do not cherish a feeling of self-complacency on the score of some +quality or other, real or imaginary. Vanity and pride are different +things, though the words are often used synonymously. A person may be +proud without being vain. Pride relates more to our opinion of +ourselves; vanity to what we would have others think of us.” + +“If I were as rich as Mr. Darcy,” cried a young Lucas, who came with his +sisters, “I should not care how proud I was. I would keep a pack of +foxhounds, and drink a bottle of wine every day.” + +“Then you would drink a great deal more than you ought,” said Mrs. +Bennet; “and if I were to see you at it, I should take away your bottle +directly.” + +The boy protested that she should not; she continued to declare that she +would; and the argument ended only with the visit. + +[Illustration] + + + + +[Illustration] + + + + +CHAPTER VI. + + +[Illustration] + +The ladies of Longbourn soon waited on those of Netherfield. The visit +was returned in due form. Miss Bennet’s pleasing manners grew on the +good-will of Mrs. Hurst and Miss Bingley; and though the mother was +found to be intolerable, and the younger sisters not worth speaking to, +a wish of being better acquainted with _them_ was expressed towards the +two eldest. By Jane this attention was received with the greatest +pleasure; but Elizabeth still saw superciliousness in their treatment of +everybody, hardly excepting even her sister, and could not like them; +though their kindness to Jane, such as it was, had a value, as arising, +in all probability, from the influence of their brother’s admiration. It +was generally evident, whenever they met, that he _did_ admire her; and +to _her_ it was equally evident that Jane was yielding to the preference +which she had begun to entertain for him from the first, and was in a +way to be very much in love; but she considered with pleasure that it +was not likely to be discovered by the world in general, since Jane +united with great strength of feeling, a composure of temper and an +uniform cheerfulness of manner, which would guard her from the +suspicions of the impertinent. She mentioned this to her friend, Miss +Lucas. + +“It may, perhaps, be pleasant,” replied Charlotte, “to be able to impose +on the public in such a case; but it is sometimes a disadvantage to be +so very guarded. If a woman conceals her affection with the same skill +from the object of it, she may lose the opportunity of fixing him; and +it will then be but poor consolation to believe the world equally in the +dark. There is so much of gratitude or vanity in almost every +attachment, that it is not safe to leave any to itself. We can all +_begin_ freely--a slight preference is natural enough; but there are +very few of us who have heart enough to be really in love without +encouragement. In nine cases out of ten, a woman had better show _more_ +affection than she feels. Bingley likes your sister undoubtedly; but he +may never do more than like her, if she does not help him on.” + +“But she does help him on, as much as her nature will allow. If _I_ can +perceive her regard for him, he must be a simpleton indeed not to +discover it too.” + +“Remember, Eliza, that he does not know Jane’s disposition as you do.” + +“But if a woman is partial to a man, and does not endeavor to conceal +it, he must find it out.” + +“Perhaps he must, if he sees enough of her. But though Bingley and Jane +meet tolerably often, it is never for many hours together; and as they +always see each other in large mixed parties, it is impossible that +every moment should be employed in conversing together. Jane should +therefore make the most of every half hour in which she can command his +attention. When she is secure of him, there will be leisure for falling +in love as much as she chooses.” + +“Your plan is a good one,” replied Elizabeth, “where nothing is in +question but the desire of being well married; and if I were determined +to get a rich husband, or any husband, I dare say I should adopt it. But +these are not Jane’s feelings; she is not acting by design. As yet she +cannot even be certain of the degree of her own regard, nor of its +reasonableness. She has known him only a fortnight. She danced four +dances with him at Meryton; she saw him one morning at his own house, +and has since dined in company with him four times. This is not quite +enough to make her understand his character.” + +“Not as you represent it. Had she merely _dined_ with him, she might +only have discovered whether he had a good appetite; but you must +remember that four evenings have been also spent together--and four +evenings may do a great deal.” + +“Yes: these four evenings have enabled them to ascertain that they both +like Vingt-un better than Commerce, but with respect to any other +leading characteristic, I do not imagine that much has been unfolded.” + +“Well,” said Charlotte, “I wish Jane success with all my heart; and if +she were married to him to-morrow, I should think she had as good a +chance of happiness as if she were to be studying his character for a +twelvemonth. Happiness in marriage is entirely a matter of chance. If +the dispositions of the parties are ever so well known to each other, or +ever so similar beforehand, it does not advance their felicity in the +least. They always continue to grow sufficiently unlike afterwards to +have their share of vexation; and it is better to know as little as +possible of the defects of the person with whom you are to pass your +life.” + +“You make me laugh, Charlotte; but it is not sound. You know it is not +sound, and that you would never act in this way yourself.” + +Occupied in observing Mr. Bingley’s attention to her sister, Elizabeth +was far from suspecting that she was herself becoming an object of some +interest in the eyes of his friend. Mr. Darcy had at first scarcely +allowed her to be pretty: he had looked at her without admiration at the +ball; and when they next met, he looked at her only to criticise. But no +sooner had he made it clear to himself and his friends that she had +hardly a good feature in her face, than he began to find it was rendered +uncommonly intelligent by the beautiful expression of her dark eyes. To +this discovery succeeded some others equally mortifying. Though he had +detected with a critical eye more than one failure of perfect symmetry +in her form, he was forced to acknowledge her figure to be light and +pleasing; and in spite of his asserting that her manners were not those +of the fashionable world, he was caught by their easy playfulness. Of +this she was perfectly unaware: to her he was only the man who made +himself agreeable nowhere, and who had not thought her handsome enough +to dance with. + +He began to wish to know more of her; and, as a step towards conversing +with her himself, attended to her conversation with others. His doing so +drew her notice. It was at Sir William Lucas’s, where a large party were +assembled. + +“What does Mr. Darcy mean,” said she to Charlotte, “by listening to my +conversation with Colonel Forster?” + +“That is a question which Mr. Darcy only can answer.” + +“But if he does it any more, I shall certainly let him know that I see +what he is about. He has a very satirical eye, and if I do not begin by +being impertinent myself, I shall soon grow afraid of him.” + +[Illustration: “The entreaties of several” [_Copyright 1894 by George +Allen._]] + +On his approaching them soon afterwards, though without seeming to have +any intention of speaking, Miss Lucas defied her friend to mention such +a subject to him, which immediately provoking Elizabeth to do it, she +turned to him and said,-- + +“Did not you think, Mr. Darcy, that I expressed myself uncommonly well +just now, when I was teasing Colonel Forster to give us a ball at +Meryton?” + +“With great energy; but it is a subject which always makes a lady +energetic.” + +“You are severe on us.” + +“It will be _her_ turn soon to be teased,” said Miss Lucas. “I am going +to open the instrument, Eliza, and you know what follows.” + +“You are a very strange creature by way of a friend!--always wanting me +to play and sing before anybody and everybody! If my vanity had taken a +musical turn, you would have been invaluable; but as it is, I would +really rather not sit down before those who must be in the habit of +hearing the very best performers.” On Miss Lucas’s persevering, however, +she added, “Very well; if it must be so, it must.” And gravely glancing +at Mr. Darcy, “There is a very fine old saying, which everybody here is +of course familiar with--‘Keep your breath to cool your porridge,’--and +I shall keep mine to swell my song.” + +Her performance was pleasing, though by no means capital. After a song +or two, and before she could reply to the entreaties of several that she +would sing again, she was eagerly succeeded at the instrument by her +sister Mary, who having, in consequence of being the only plain one in +the family, worked hard for knowledge and accomplishments, was always +impatient for display. + +Mary had neither genius nor taste; and though vanity had given her +application, it had given her likewise a pedantic air and conceited +manner, which would have injured a higher degree of excellence than she +had reached. Elizabeth, easy and unaffected, had been listened to with +much more pleasure, though not playing half so well; and Mary, at the +end of a long concerto, was glad to purchase praise and gratitude by +Scotch and Irish airs, at the request of her younger sisters, who with +some of the Lucases, and two or three officers, joined eagerly in +dancing at one end of the room. + +Mr. Darcy stood near them in silent indignation at such a mode of +passing the evening, to the exclusion of all conversation, and was too +much engrossed by his own thoughts to perceive that Sir William Lucas +was his neighbour, till Sir William thus began:-- + +“What a charming amusement for young people this is, Mr. Darcy! There is +nothing like dancing, after all. I consider it as one of the first +refinements of polished societies.” + +“Certainly, sir; and it has the advantage also of being in vogue amongst +the less polished societies of the world: every savage can dance.” + +Sir William only smiled. “Your friend performs delightfully,” he +continued, after a pause, on seeing Bingley join the group; “and I doubt +not that you are an adept in the science yourself, Mr. Darcy.” + +“You saw me dance at Meryton, I believe, sir.” + +“Yes, indeed, and received no inconsiderable pleasure from the sight. Do +you often dance at St. James’s?” + +“Never, sir.” + +“Do you not think it would be a proper compliment to the place?” + +“It is a compliment which I never pay to any place if I can avoid it.” + +“You have a house in town, I conclude?” + +Mr. Darcy bowed. + +“I had once some thoughts of fixing in town myself, for I am fond of +superior society; but I did not feel quite certain that the air of +London would agree with Lady Lucas.” + +He paused in hopes of an answer: but his companion was not disposed to +make any; and Elizabeth at that instant moving towards them, he was +struck with the notion of doing a very gallant thing, and called out to +her,-- + +“My dear Miss Eliza, why are not you dancing? Mr. Darcy, you must allow +me to present this young lady to you as a very desirable partner. You +cannot refuse to dance, I am sure, when so much beauty is before you.” +And, taking her hand, he would have given it to Mr. Darcy, who, though +extremely surprised, was not unwilling to receive it, when she instantly +drew back, and said with some discomposure to Sir William,-- + +“Indeed, sir, I have not the least intention of dancing. I entreat you +not to suppose that I moved this way in order to beg for a partner.” + +Mr. Darcy, with grave propriety, requested to be allowed the honour of +her hand, but in vain. Elizabeth was determined; nor did Sir William at +all shake her purpose by his attempt at persuasion. + +“You excel so much in the dance, Miss Eliza, that it is cruel to deny me +the happiness of seeing you; and though this gentleman dislikes the +amusement in general, he can have no objection, I am sure, to oblige us +for one half hour.” + +“Mr. Darcy is all politeness,” said Elizabeth, smiling. + +“He is, indeed: but considering the inducement, my dear Miss Eliza, we +cannot wonder at his complaisance; for who would object to such a +partner?” + +Elizabeth looked archly, and turned away. Her resistance had not injured +her with the gentleman, and he was thinking of her with some +complacency, when thus accosted by Miss Bingley,-- + +“I can guess the subject of your reverie.” + +“I should imagine not.” + +“You are considering how insupportable it would be to pass many +evenings in this manner,--in such society; and, indeed, I am quite of +your opinion. I was never more annoyed! The insipidity, and yet the +noise--the nothingness, and yet the self-importance, of all these +people! What would I give to hear your strictures on them!” + +“Your conjecture is totally wrong, I assure you. My mind was more +agreeably engaged. I have been meditating on the very great pleasure +which a pair of fine eyes in the face of a pretty woman can bestow.” + +Miss Bingley immediately fixed her eyes on his face, and desired he +would tell her what lady had the credit of inspiring such reflections. +Mr. Darcy replied, with great intrepidity,-- + +“Miss Elizabeth Bennet.” + +“Miss Elizabeth Bennet!” repeated Miss Bingley. “I am all astonishment. +How long has she been such a favourite? and pray when am I to wish you +joy?” + +“That is exactly the question which I expected you to ask. A lady’s +imagination is very rapid; it jumps from admiration to love, from love +to matrimony, in a moment. I knew you would be wishing me joy.” + +“Nay, if you are so serious about it, I shall consider the matter as +absolutely settled. You will have a charming mother-in-law, indeed, and +of course she will be always at Pemberley with you.” + +He listened to her with perfect indifference, while she chose to +entertain herself in this manner; and as his composure convinced her +that all was safe, her wit flowed along. + + + + +[Illustration: + + “A note for Miss Bennet” + +[_Copyright 1894 by George Allen._]] + + + + +CHAPTER VII. + + +[Illustration] + +Mr. Bennet’s property consisted almost entirely in an estate of two +thousand a year, which, unfortunately for his daughters, was entailed, +in default of heirs male, on a distant relation; and their mother’s +fortune, though ample for her situation in life, could but ill supply +the deficiency of his. Her father had been an attorney in Meryton, and +had left her four thousand pounds. + +She had a sister married to a Mr. Philips, who had been a clerk to their +father and succeeded him in the business, and a brother settled in +London in a respectable line of trade. + +The village of Longbourn was only one mile from Meryton; a most +convenient distance for the young ladies, who were usually tempted +thither three or four times a week, to pay their duty to their aunt, and +to a milliner’s shop just over the way. The two youngest of the family, +Catherine and Lydia, were particularly frequent in these attentions: +their minds were more vacant than their sisters’, and when nothing +better offered, a walk to Meryton was necessary to amuse their morning +hours and furnish conversation for the evening; and, however bare of +news the country in general might be, they always contrived to learn +some from their aunt. At present, indeed, they were well supplied both +with news and happiness by the recent arrival of a militia regiment in +the neighbourhood; it was to remain the whole winter, and Meryton was +the head-quarters. + +Their visits to Mrs. Philips were now productive of the most interesting +intelligence. Every day added something to their knowledge of the +officers’ names and connections. Their lodgings were not long a secret, +and at length they began to know the officers themselves. Mr. Philips +visited them all, and this opened to his nieces a source of felicity +unknown before. They could talk of nothing but officers; and Mr. +Bingley’s large fortune, the mention of which gave animation to their +mother, was worthless in their eyes when opposed to the regimentals of +an ensign. + +After listening one morning to their effusions on this subject, Mr. +Bennet coolly observed,-- + +“From all that I can collect by your manner of talking, you must be two +of the silliest girls in the country. I have suspected it some time, but +I am now convinced.” + +Catherine was disconcerted, and made no answer; but Lydia, with perfect +indifference, continued to express her admiration of Captain Carter, and +her hope of seeing him in the course of the day, as he was going the +next morning to London. + +“I am astonished, my dear,” said Mrs. Bennet, “that you should be so +ready to think your own children silly. If I wished to think slightingly +of anybody’s children, it should not be of my own, however.” + +“If my children are silly, I must hope to be always sensible of it.” + +“Yes; but as it happens, they are all of them very clever.” + +“This is the only point, I flatter myself, on which we do not agree. I +had hoped that our sentiments coincided in every particular, but I must +so far differ from you as to think our two youngest daughters uncommonly +foolish.” + +“My dear Mr. Bennet, you must not expect such girls to have the sense of +their father and mother. When they get to our age, I dare say they will +not think about officers any more than we do. I remember the time when I +liked a red coat myself very well--and, indeed, so I do still at my +heart; and if a smart young colonel, with five or six thousand a year, +should want one of my girls, I shall not say nay to him; and I thought +Colonel Forster looked very becoming the other night at Sir William’s in +his regimentals.” + +“Mamma,” cried Lydia, “my aunt says that Colonel Forster and Captain +Carter do not go so often to Miss Watson’s as they did when they first +came; she sees them now very often standing in Clarke’s library.” + +Mrs. Bennet was prevented replying by the entrance of the footman with a +note for Miss Bennet; it came from Netherfield, and the servant waited +for an answer. Mrs. Bennet’s eyes sparkled with pleasure, and she was +eagerly calling out, while her daughter read,-- + +“Well, Jane, who is it from? What is it about? What does he say? Well, +Jane, make haste and tell us; make haste, my love.” + +“It is from Miss Bingley,” said Jane, and then read it aloud. + + /* NIND “My dear friend, */ + + “If you are not so compassionate as to dine to-day with Louisa and + me, we shall be in danger of hating each other for the rest of our + lives; for a whole day’s _tête-à -tête_ between two women can never + end without a quarrel. Come as soon as you can on the receipt of + this. My brother and the gentlemen are to dine with the officers. + Yours ever, + +“CAROLINE BINGLEY.” + +“With the officers!” cried Lydia: “I wonder my aunt did not tell us of +_that_.” + +“Dining out,” said Mrs. Bennet; “that is very unlucky.” + +“Can I have the carriage?” said Jane. + +“No, my dear, you had better go on horseback, because it seems likely to +rain; and then you must stay all night.” + +“That would be a good scheme,” said Elizabeth, “if you were sure that +they would not offer to send her home.” + +“Oh, but the gentlemen will have Mr. Bingley’s chaise to go to Meryton; +and the Hursts have no horses to theirs.” + +“I had much rather go in the coach.” + +“But, my dear, your father cannot spare the horses, I am sure. They are +wanted in the farm, Mr. Bennet, are not they?” + +[Illustration: Cheerful prognostics] + +“They are wanted in the farm much oftener than I can get them.” + +“But if you have got them to-day,” said Elizabeth, “my mother’s purpose +will be answered.” + +She did at last extort from her father an acknowledgment that the horses +were engaged; Jane was therefore obliged to go on horseback, and her +mother attended her to the door with many cheerful prognostics of a bad +day. Her hopes were answered; Jane had not been gone long before it +rained hard. Her sisters were uneasy for her, but her mother was +delighted. The rain continued the whole evening without intermission; +Jane certainly could not come back. + +“This was a lucky idea of mine, indeed!” said Mrs. Bennet, more than +once, as if the credit of making it rain were all her own. Till the next +morning, however, she was not aware of all the felicity of her +contrivance. Breakfast was scarcely over when a servant from Netherfield +brought the following note for Elizabeth:-- + + /* NIND “My dearest Lizzie, */ + + “I find myself very unwell this morning, which, I suppose, is to be + imputed to my getting wet through yesterday. My kind friends will + not hear of my returning home till I am better. They insist also on + my seeing Mr. Jones--therefore do not be alarmed if you should hear + of his having been to me--and, excepting a sore throat and a + headache, there is not much the matter with me. + +“Yours, etc.” + +“Well, my dear,” said Mr. Bennet, when Elizabeth had read the note +aloud, “if your daughter should have a dangerous fit of illness--if she +should die--it would be a comfort to know that it was all in pursuit of +Mr. Bingley, and under your orders.” + +“Oh, I am not at all afraid of her dying. People do not die of little +trifling colds. She will be taken good care of. As long as she stays +there, it is all very well. I would go and see her if I could have the +carriage.” + +Elizabeth, feeling really anxious, determined to go to her, though the +carriage was not to be had: and as she was no horsewoman, walking was +her only alternative. She declared her resolution. + +“How can you be so silly,” cried her mother, “as to think of such a +thing, in all this dirt! You will not be fit to be seen when you get +there.” + +“I shall be very fit to see Jane--which is all I want.” + +“Is this a hint to me, Lizzy,” said her father, “to send for the +horses?” + +“No, indeed. I do not wish to avoid the walk. The distance is nothing, +when one has a motive; only three miles. I shall be back by dinner.” + +“I admire the activity of your benevolence,” observed Mary, “but every +impulse of feeling should be guided by reason; and, in my opinion, +exertion should always be in proportion to what is required.” + +“We will go as far as Meryton with you,” said Catherine and Lydia. +Elizabeth accepted their company, and the three young ladies set off +together. + +“If we make haste,” said Lydia, as they walked along, “perhaps we may +see something of Captain Carter, before he goes.” + +In Meryton they parted: the two youngest repaired to the lodgings of one +of the officers’ wives, and Elizabeth continued her walk alone, crossing +field after field at a quick pace, jumping over stiles and springing +over puddles, with impatient activity, and finding herself at last +within view of the house, with weary ancles, dirty stockings, and a face +glowing with the warmth of exercise. + +She was shown into the breakfast parlour, where all but Jane were +assembled, and where her appearance created a great deal of surprise. +That she should have walked three miles so early in the day in such +dirty weather, and by herself, was almost incredible to Mrs. Hurst and +Miss Bingley; and Elizabeth was convinced that they held her in contempt +for it. She was received, however, very politely by them; and in their +brother’s manners there was something better than politeness--there was +good-humour and kindness. Mr. Darcy said very little, and Mr. Hurst +nothing at all. The former was divided between admiration of the +brilliancy which exercise had given to her complexion and doubt as to +the occasion’s justifying her coming so far alone. The latter was +thinking only of his breakfast. + +Her inquiries after her sister were not very favourably answered. Miss +Bennet had slept ill, and though up, was very feverish, and not well +enough to leave her room. Elizabeth was glad to be taken to her +immediately; and Jane, who had only been withheld by the fear of giving +alarm or inconvenience, from expressing in her note how much she longed +for such a visit, was delighted at her entrance. She was not equal, +however, to much conversation; and when Miss Bingley left them together, +could attempt little beside expressions of gratitude for the +extraordinary kindness she was treated with. Elizabeth silently attended +her. + +When breakfast was over, they were joined by the sisters; and Elizabeth +began to like them herself, when she saw how much affection and +solicitude they showed for Jane. The apothecary came; and having +examined his patient, said, as might be supposed, that she had caught a +violent cold, and that they must endeavour to get the better of it; +advised her to return to bed, and promised her some draughts. The advice +was followed readily, for the feverish symptoms increased, and her head +ached acutely. Elizabeth did not quit her room for a moment, nor were +the other ladies often absent; the gentlemen being out, they had in fact +nothing to do elsewhere. + +When the clock struck three, Elizabeth felt that she must go, and very +unwillingly said so. Miss Bingley offered her the carriage, and she only +wanted a little pressing to accept it, when Jane testified such concern +at parting with her that Miss Bingley was obliged to convert the offer +of the chaise into an invitation to remain at Netherfield for the +present. Elizabeth most thankfully consented, and a servant was +despatched to Longbourn, to acquaint the family with her stay, and bring +back a supply of clothes. + +[Illustration: + +“The Apothecary came” +] + + + + +[Illustration: + +“covering a screen” +] + + + + +CHAPTER VIII. + + +[Illustration] + +At five o’clock the two ladies retired to dress, and at half-past six +Elizabeth was summoned to dinner. To the civil inquiries which then +poured in, and amongst which she had the pleasure of distinguishing the +much superior solicitude of Mr. Bingley, she could not make a very +favourable answer. Jane was by no means better. The sisters, on hearing +this, repeated three or four times how much they were grieved, how +shocking it was to have a bad cold, and how excessively they disliked +being ill themselves; and then thought no more of the matter: and their +indifference towards Jane, when not immediately before them, restored +Elizabeth to the enjoyment of all her original dislike. + +Their brother, indeed, was the only one of the party whom she could +regard with any complacency. His anxiety for Jane was evident, and his +attentions to herself most pleasing; and they prevented her feeling +herself so much an intruder as she believed she was considered by the +others. She had very little notice from any but him. Miss Bingley was +engrossed by Mr. Darcy, her sister scarcely less so; and as for Mr. +Hurst, by whom Elizabeth sat, he was an indolent man, who lived only to +eat, drink, and play at cards, who, when he found her prefer a plain +dish to a ragout, had nothing to say to her. + +When dinner was over, she returned directly to Jane, and Miss Bingley +began abusing her as soon as she was out of the room. Her manners were +pronounced to be very bad indeed,--a mixture of pride and impertinence: +she had no conversation, no style, no taste, no beauty. Mrs. Hurst +thought the same, and added,-- + +“She has nothing, in short, to recommend her, but being an excellent +walker. I shall never forget her appearance this morning. She really +looked almost wild.” + +“She did indeed, Louisa. I could hardly keep my countenance. Very +nonsensical to come at all! Why must _she_ be scampering about the +country, because her sister had a cold? Her hair so untidy, so blowzy!” + +“Yes, and her petticoat; I hope you saw her petticoat, six inches deep +in mud, I am absolutely certain, and the gown which had been let down to +hide it not doing its office.” + +“Your picture may be very exact, Louisa,” said Bingley; “but this was +all lost upon me. I thought Miss Elizabeth Bennet looked remarkably well +when she came into the room this morning. Her dirty petticoat quite +escaped my notice.” + +“_You_ observed it, Mr. Darcy, I am sure,” said Miss Bingley; “and I am +inclined to think that you would not wish to see _your sister_ make such +an exhibition.” + +“Certainly not.” + +“To walk three miles, or four miles, or five miles, or whatever it is, +above her ancles in dirt, and alone, quite alone! what could she mean by +it? It seems to me to show an abominable sort of conceited independence, +a most country-town indifference to decorum.” + +“It shows an affection for her sister that is very pleasing,” said +Bingley. + +“I am afraid, Mr. Darcy,” observed Miss Bingley, in a half whisper, +“that this adventure has rather affected your admiration of her fine +eyes.” + +“Not at all,” he replied: “they were brightened by the exercise.” A +short pause followed this speech, and Mrs. Hurst began again,-- + +“I have an excessive regard for Jane Bennet,--she is really a very sweet +girl,--and I wish with all my heart she were well settled. But with such +a father and mother, and such low connections, I am afraid there is no +chance of it.” + +“I think I have heard you say that their uncle is an attorney in +Meryton?” + +“Yes; and they have another, who lives somewhere near Cheapside.” + +“That is capital,” added her sister; and they both laughed heartily. + +“If they had uncles enough to fill _all_ Cheapside,” cried Bingley, “it +would not make them one jot less agreeable.” + +“But it must very materially lessen their chance of marrying men of any +consideration in the world,” replied Darcy. + +To this speech Bingley made no answer; but his sisters gave it their +hearty assent, and indulged their mirth for some time at the expense of +their dear friend’s vulgar relations. + +With a renewal of tenderness, however, they repaired to her room on +leaving the dining-parlour, and sat with her till summoned to coffee. +She was still very poorly, and Elizabeth would not quit her at all, till +late in the evening, when she had the comfort of seeing her asleep, and +when it appeared to her rather right than pleasant that she should go +down stairs herself. On entering the drawing-room, she found the whole +party at loo, and was immediately invited to join them; but suspecting +them to be playing high, she declined it, and making her sister the +excuse, said she would amuse herself, for the short time she could stay +below, with a book. Mr. Hurst looked at her with astonishment. + +“Do you prefer reading to cards?” said he; “that is rather singular.” + +“Miss Eliza Bennet,” said Miss Bingley, “despises cards. She is a great +reader, and has no pleasure in anything else.” + +“I deserve neither such praise nor such censure,” cried Elizabeth; “I +am _not_ a great reader, and I have pleasure in many things.” + +“In nursing your sister I am sure you have pleasure,” said Bingley; “and +I hope it will soon be increased by seeing her quite well.” + +Elizabeth thanked him from her heart, and then walked towards a table +where a few books were lying. He immediately offered to fetch her +others; all that his library afforded. + +“And I wish my collection were larger for your benefit and my own +credit; but I am an idle fellow; and though I have not many, I have more +than I ever looked into.” + +Elizabeth assured him that she could suit herself perfectly with those +in the room. + +“I am astonished,” said Miss Bingley, “that my father should have left +so small a collection of books. What a delightful library you have at +Pemberley, Mr. Darcy!” + +“It ought to be good,” he replied: “it has been the work of many +generations.” + +“And then you have added so much to it yourself--you are always buying +books.” + +“I cannot comprehend the neglect of a family library in such days as +these.” + +“Neglect! I am sure you neglect nothing that can add to the beauties of +that noble place. Charles, when you build _your_ house, I wish it may be +half as delightful as Pemberley.” + +“I wish it may.” + +“But I would really advise you to make your purchase in that +neighbourhood, and take Pemberley for a kind of model. There is not a +finer county in England than Derbyshire.” + +“With all my heart: I will buy Pemberley itself, if Darcy will sell it.” + +“I am talking of possibilities, Charles.” + +“Upon my word, Caroline, I should think it more possible to get +Pemberley by purchase than by imitation.” + +Elizabeth was so much caught by what passed, as to leave her very little +attention for her book; and, soon laying it wholly aside, she drew near +the card-table, and stationed herself between Mr. Bingley and his eldest +sister, to observe the game. + +“Is Miss Darcy much grown since the spring?” said Miss Bingley: “will +she be as tall as I am?” + +“I think she will. She is now about Miss Elizabeth Bennet’s height, or +rather taller.” + +“How I long to see her again! I never met with anybody who delighted me +so much. Such a countenance, such manners, and so extremely accomplished +for her age! Her performance on the pianoforte is exquisite.” + +“It is amazing to me,” said Bingley, “how young ladies can have patience +to be so very accomplished as they all are.” + +“All young ladies accomplished! My dear Charles, what do you mean?” + +“Yes, all of them, I think. They all paint tables, cover screens, and +net purses. I scarcely know any one who cannot do all this; and I am +sure I never heard a young lady spoken of for the first time, without +being informed that she was very accomplished.” + +“Your list of the common extent of accomplishments,” said Darcy, “has +too much truth. The word is applied to many a woman who deserves it no +otherwise than by netting a purse or covering a screen; but I am very +far from agreeing with you in your estimation of ladies in general. I +cannot boast of knowing more than half-a-dozen in the whole range of my +acquaintance that are really accomplished.” + +“Nor I, I am sure,” said Miss Bingley. + +“Then,” observed Elizabeth, “you must comprehend a great deal in your +idea of an accomplished woman.” + +“Yes; I do comprehend a great deal in it.” + +“Oh, certainly,” cried his faithful assistant, “no one can be really +esteemed accomplished who does not greatly surpass what is usually met +with. A woman must have a thorough knowledge of music, singing, drawing, +dancing, and the modern languages, to deserve the word; and, besides all +this, she must possess a certain something in her air and manner of +walking, the tone of her voice, her address and expressions, or the word +will be but half deserved.” + +“All this she must possess,” added Darcy; “and to all she must yet add +something more substantial in the improvement of her mind by extensive +reading.” + +“I am no longer surprised at your knowing _only_ six accomplished women. +I rather wonder now at your knowing _any_.” + +“Are you so severe upon your own sex as to doubt the possibility of all +this?” + +“_I_ never saw such a woman. _I_ never saw such capacity, and taste, and +application, and elegance, as you describe, united.” + +Mrs. Hurst and Miss Bingley both cried out against the injustice of her +implied doubt, and were both protesting that they knew many women who +answered this description, when Mr. Hurst called them to order, with +bitter complaints of their inattention to what was going forward. As all +conversation was thereby at an end, Elizabeth soon afterwards left the +room. + +“Eliza Bennet,” said Miss Bingley, when the door was closed on her, “is +one of those young ladies who seek to recommend themselves to the other +sex by undervaluing their own; and with many men, I daresay, it +succeeds; but, in my opinion, it is a paltry device, a very mean art.” + +“Undoubtedly,” replied Darcy, to whom this remark was chiefly addressed, +“there is meanness in _all_ the arts which ladies sometimes condescend +to employ for captivation. Whatever bears affinity to cunning is +despicable.” + +Miss Bingley was not so entirely satisfied with this reply as to +continue the subject. + +Elizabeth joined them again only to say that her sister was worse, and +that she could not leave her. Bingley urged Mr. Jones’s being sent for +immediately; while his sisters, convinced that no country advice could +be of any service, recommended an express to town for one of the most +eminent physicians. This she would not hear of; but she was not so +unwilling to comply with their brother’s proposal; and it was settled +that Mr. Jones should be sent for early in the morning, if Miss Bennet +were not decidedly better. Bingley was quite uncomfortable; his sisters +declared that they were miserable. They solaced their wretchedness, +however, by duets after supper; while he could find no better relief to +his feelings than by giving his housekeeper directions that every +possible attention might be paid to the sick lady and her sister. + + + + +[Illustration: + +M^{rs} Bennet and her two youngest girls + +[_Copyright 1894 by George Allen._]] + + + + +CHAPTER IX. + + +[Illustration] + +Elizabeth passed the chief of the night in her sister’s room, and in the +morning had the pleasure of being able to send a tolerable answer to the +inquiries which she very early received from Mr. Bingley by a housemaid, +and some time afterwards from the two elegant ladies who waited on his +sisters. In spite of this amendment, however, she requested to have a +note sent to Longbourn, desiring her mother to visit Jane, and form her +own judgment of her situation. The note was immediately despatched, and +its contents as quickly complied with. Mrs. Bennet, accompanied by her +two youngest girls, reached Netherfield soon after the family breakfast. + +Had she found Jane in any apparent danger, Mrs. Bennet would have been +very miserable; but being satisfied on seeing her that her illness was +not alarming, she had no wish of her recovering immediately, as her +restoration to health would probably remove her from Netherfield. She +would not listen, therefore, to her daughter’s proposal of being carried +home; neither did the apothecary, who arrived about the same time, think +it at all advisable. After sitting a little while with Jane, on Miss +Bingley’s appearance and invitation, the mother and three daughters all +attended her into the breakfast parlour. Bingley met them with hopes +that Mrs. Bennet had not found Miss Bennet worse than she expected. + +“Indeed I have, sir,” was her answer. “She is a great deal too ill to be +moved. Mr. Jones says we must not think of moving her. We must trespass +a little longer on your kindness.” + +“Removed!” cried Bingley. “It must not be thought of. My sister, I am +sure, will not hear of her removal.” + +“You may depend upon it, madam,” said Miss Bingley, with cold civility, +“that Miss Bennet shall receive every possible attention while she +remains with us.” + +Mrs. Bennet was profuse in her acknowledgments. + +“I am sure,” she added, “if it was not for such good friends, I do not +know what would become of her, for she is very ill indeed, and suffers a +vast deal, though with the greatest patience in the world, which is +always the way with her, for she has, without exception, the sweetest +temper I ever met with. I often tell my other girls they are nothing to +_her_. You have a sweet room here, Mr. Bingley, and a charming prospect +over that gravel walk. I do not know a place in the country that is +equal to Netherfield. You will not think of quitting it in a hurry, I +hope, though you have but a short lease.” + +“Whatever I do is done in a hurry,” replied he; “and therefore if I +should resolve to quit Netherfield, I should probably be off in five +minutes. At present, however, I consider myself as quite fixed here.” + +“That is exactly what I should have supposed of you,” said Elizabeth. + +“You begin to comprehend me, do you?” cried he, turning towards her. + +“Oh yes--I understand you perfectly.” + +“I wish I might take this for a compliment; but to be so easily seen +through, I am afraid, is pitiful.” + +“That is as it happens. It does not necessarily follow that a deep, +intricate character is more or less estimable than such a one as yours.” + +“Lizzy,” cried her mother, “remember where you are, and do not run on in +the wild manner that you are suffered to do at home.” + +“I did not know before,” continued Bingley, immediately, “that you were +a studier of character. It must be an amusing study.” + +“Yes; but intricate characters are the _most_ amusing. They have at +least that advantage.” + +“The country,” said Darcy, “can in general supply but few subjects for +such a study. In a country neighbourhood you move in a very confined and +unvarying society.” + +“But people themselves alter so much, that there is something new to be +observed in them for ever.” + +“Yes, indeed,” cried Mrs. Bennet, offended by his manner of mentioning a +country neighbourhood. “I assure you there is quite as much of _that_ +going on in the country as in town.” + +Everybody was surprised; and Darcy, after looking at her for a moment, +turned silently away. Mrs. Bennet, who fancied she had gained a complete +victory over him, continued her triumph,-- + +“I cannot see that London has any great advantage over the country, for +my part, except the shops and public places. The country is a vast deal +pleasanter, is not it, Mr. Bingley?” + +“When I am in the country,” he replied, “I never wish to leave it; and +when I am in town, it is pretty much the same. They have each their +advantages, and I can be equally happy in either.” + +“Ay, that is because you have the right disposition. But that +gentleman,” looking at Darcy, “seemed to think the country was nothing +at all.” + +“Indeed, mamma, you are mistaken,” said Elizabeth, blushing for her +mother. “You quite mistook Mr. Darcy. He only meant that there was not +such a variety of people to be met with in the country as in town, which +you must acknowledge to be true.” + +“Certainly, my dear, nobody said there were; but as to not meeting with +many people in this neighbourhood, I believe there are few +neighbourhoods larger. I know we dine with four-and-twenty families.” + +Nothing but concern for Elizabeth could enable Bingley to keep his +countenance. His sister was less delicate, and directed her eye towards +Mr. Darcy with a very expressive smile. Elizabeth, for the sake of +saying something that might turn her mother’s thoughts, now asked her if +Charlotte Lucas had been at Longbourn since _her_ coming away. + +“Yes, she called yesterday with her father. What an agreeable man Sir +William is, Mr. Bingley--is not he? so much the man of fashion! so +genteel and so easy! He has always something to say to everybody. _That_ +is my idea of good breeding; and those persons who fancy themselves very +important and never open their mouths quite mistake the matter.” + +“Did Charlotte dine with you?” + +“No, she would go home. I fancy she was wanted about the mince-pies. For +my part, Mr. Bingley, _I_ always keep servants that can do their own +work; _my_ daughters are brought up differently. But everybody is to +judge for themselves, and the Lucases are a very good sort of girls, I +assure you. It is a pity they are not handsome! Not that _I_ think +Charlotte so _very_ plain; but then she is our particular friend.” + +“She seems a very pleasant young woman,” said Bingley. + +“Oh dear, yes; but you must own she is very plain. Lady Lucas herself +has often said so, and envied me Jane’s beauty. I do not like to boast +of my own child; but to be sure, Jane--one does not often see anybody +better looking. It is what everybody says. I do not trust my own +partiality. When she was only fifteen there was a gentleman at my +brother Gardiner’s in town so much in love with her, that my +sister-in-law was sure he would make her an offer before we came away. +But, however, he did not. Perhaps he thought her too young. However, he +wrote some verses on her, and very pretty they were.” + +“And so ended his affection,” said Elizabeth, impatiently. “There has +been many a one, I fancy, overcome in the same way. I wonder who first +discovered the efficacy of poetry in driving away love!” + +“I have been used to consider poetry as the _food_ of love,” said Darcy. + +“Of a fine, stout, healthy love it may. Everything nourishes what is +strong already. But if it be only a slight, thin sort of inclination, I +am convinced that one good sonnet will starve it entirely away.” + +Darcy only smiled; and the general pause which ensued made Elizabeth +tremble lest her mother should be exposing herself again. She longed to +speak, but could think of nothing to say; and after a short silence Mrs. +Bennet began repeating her thanks to Mr. Bingley for his kindness to +Jane, with an apology for troubling him also with Lizzy. Mr. Bingley was +unaffectedly civil in his answer, and forced his younger sister to be +civil also, and say what the occasion required. She performed her part, +indeed, without much graciousness, but Mrs. Bennet was satisfied, and +soon afterwards ordered her carriage. Upon this signal, the youngest of +her daughters put herself forward. The two girls had been whispering to +each other during the whole visit; and the result of it was, that the +youngest should tax Mr. Bingley with having promised on his first coming +into the country to give a ball at Netherfield. + +Lydia was a stout, well-grown girl of fifteen, with a fine complexion +and good-humoured countenance; a favourite with her mother, whose +affection had brought her into public at an early age. She had high +animal spirits, and a sort of natural self-consequence, which the +attentions of the officers, to whom her uncle’s good dinners and her +own easy manners recommended her, had increased into assurance. She was +very equal, therefore, to address Mr. Bingley on the subject of the +ball, and abruptly reminded him of his promise; adding, that it would be +the most shameful thing in the world if he did not keep it. His answer +to this sudden attack was delightful to her mother’s ear. + +“I am perfectly ready, I assure you, to keep my engagement; and, when +your sister is recovered, you shall, if you please, name the very day of +the ball. But you would not wish to be dancing while she is ill?” + +Lydia declared herself satisfied. “Oh yes--it would be much better to +wait till Jane was well; and by that time, most likely, Captain Carter +would be at Meryton again. And when you have given _your_ ball,” she +added, “I shall insist on their giving one also. I shall tell Colonel +Forster it will be quite a shame if he does not.” + +Mrs. Bennet and her daughters then departed, and Elizabeth returned +instantly to Jane, leaving her own and her relations’ behaviour to the +remarks of the two ladies and Mr. Darcy; the latter of whom, however, +could not be prevailed on to join in their censure of _her_, in spite of +all Miss Bingley’s witticisms on _fine eyes_. + + + + +[Illustration] + + + + +CHAPTER X. + + +[Illustration] + +The day passed much as the day before had done. Mrs. Hurst and Miss +Bingley had spent some hours of the morning with the invalid, who +continued, though slowly, to mend; and, in the evening, Elizabeth joined +their party in the drawing-room. The loo table, however, did not appear. +Mr. Darcy was writing, and Miss Bingley, seated near him, was watching +the progress of his letter, and repeatedly calling off his attention by +messages to his sister. Mr. Hurst and Mr. Bingley were at piquet, and +Mrs. Hurst was observing their game. + +Elizabeth took up some needlework, and was sufficiently amused in +attending to what passed between Darcy and his companion. The perpetual +commendations of the lady either on his hand-writing, or on the evenness +of his lines, or on the length of his letter, with the perfect unconcern +with which her praises were received, formed a curious dialogue, and was +exactly in unison with her opinion of each. + +“How delighted Miss Darcy will be to receive such a letter!” + +He made no answer. + +“You write uncommonly fast.” + +“You are mistaken. I write rather slowly.” + +“How many letters you must have occasion to write in the course of a +year! Letters of business, too! How odious I should think them!” + +“It is fortunate, then, that they fall to my lot instead of to yours.” + +“Pray tell your sister that I long to see her.” + +“I have already told her so once, by your desire.” + +“I am afraid you do not like your pen. Let me mend it for you. I mend +pens remarkably well.” + +“Thank you--but I always mend my own.” + +“How can you contrive to write so even?” + +He was silent. + +“Tell your sister I am delighted to hear of her improvement on the harp, +and pray let her know that I am quite in raptures with her beautiful +little design for a table, and I think it infinitely superior to Miss +Grantley’s.” + +“Will you give me leave to defer your raptures till I write again? At +present I have not room to do them justice.” + +“Oh, it is of no consequence. I shall see her in January. But do you +always write such charming long letters to her, Mr. Darcy?” + +“They are generally long; but whether always charming, it is not for me +to determine.” + +“It is a rule with me, that a person who can write a long letter with +ease cannot write ill.” + +“That will not do for a compliment to Darcy, Caroline,” cried her +brother, “because he does _not_ write with ease. He studies too much +for words of four syllables. Do not you, Darcy?” + +“My style of writing is very different from yours.” + +“Oh,” cried Miss Bingley, “Charles writes in the most careless way +imaginable. He leaves out half his words, and blots the rest.” + +“My ideas flow so rapidly that I have not time to express them; by which +means my letters sometimes convey no ideas at all to my correspondents.” + +“Your humility, Mr. Bingley,” said Elizabeth, “must disarm reproof.” + +“Nothing is more deceitful,” said Darcy, “than the appearance of +humility. It is often only carelessness of opinion, and sometimes an +indirect boast.” + +“And which of the two do you call _my_ little recent piece of modesty?” + +“The indirect boast; for you are really proud of your defects in +writing, because you consider them as proceeding from a rapidity of +thought and carelessness of execution, which, if not estimable, you +think at least highly interesting. The power of doing anything with +quickness is always much prized by the possessor, and often without any +attention to the imperfection of the performance. When you told Mrs. +Bennet this morning, that if you ever resolved on quitting Netherfield +you should be gone in five minutes, you meant it to be a sort of +panegyric, of compliment to yourself; and yet what is there so very +laudable in a precipitance which must leave very necessary business +undone, and can be of no real advantage to yourself or anyone else?” + + + CHISWICK PRESS:--CHARLES WHITTINGHAM AND CO. + TOOKS COURT, CHANCERY LANE, LONDON. + + +*** END OF THE PROJECT GUTENBERG EBOOK 1342 *** diff --git a/tests/dummy/data/transformers.py b/tests/dummy/data/transformers.py new file mode 100644 index 0000000..7d8911b --- /dev/null +++ b/tests/dummy/data/transformers.py @@ -0,0 +1,50 @@ +from typing import Iterable + +from datasets import ( # type: ignore + Dataset, + DatasetDict, + IterableDataset, + IterableDatasetDict, +) + + +def create_sample_dataset( + column: str = "text", pattern: str = "sample text {}" +) -> Dataset: + return Dataset.from_dict({column: [pattern.format(ind) for ind in range(1, 4)]}) + + +def create_sample_iterable_dataset( + column: str = "text", pattern: str = "sample text {}" +) -> IterableDataset: + def _generator(): + for ind in range(1, 4): + yield {column: pattern.format(ind)} + + return IterableDataset.from_generator(_generator) + + +def create_sample_dataset_dict( + splits: Iterable[str] = ("train", "test"), + column: str = "text", + pattern: str = "sample text {}", +): + return DatasetDict( + { + split: create_sample_dataset(column=column, pattern=pattern) + for split in splits + } + ) + + +def create_sample_iterable_dataset_dict( + splits: Iterable[str] = ("train", "test"), + column: str = "text", + pattern: str = "sample text {}", +): + return IterableDatasetDict( + { + split: create_sample_iterable_dataset(column=column, pattern=pattern) + for split in splits + } + ) diff --git a/tests/e2e/core/__init__.py b/tests/e2e/cli/__init__.py similarity index 100% rename from tests/e2e/core/__init__.py rename to tests/e2e/cli/__init__.py diff --git a/tests/unit/cli/conftest.py b/tests/e2e/cli/conftest.py similarity index 100% rename from tests/unit/cli/conftest.py rename to tests/e2e/cli/conftest.py diff --git a/tests/unit/cli/test_application_entrypoint.py b/tests/e2e/cli/test_application_entrypoint.py similarity index 93% rename from tests/unit/cli/test_application_entrypoint.py rename to tests/e2e/cli/test_application_entrypoint.py index 1ac3bc1..e555bb0 100644 --- a/tests/unit/cli/test_application_entrypoint.py +++ b/tests/e2e/cli/test_application_entrypoint.py @@ -3,6 +3,7 @@ import pytest from click.testing import CliRunner + from guidellm.main import main @@ -23,7 +24,11 @@ def test_main_cli_overrided( ["--target", "localhost:9000", "--backend", "test", "--rate-type", "sweep"], ) default_main_kwargs.update( - {"target": "localhost:9000", "backend": "test", "rate_type": "sweep"} + { + "target": "localhost:9000", + "backend": "test", + "rate_type": "sweep", + } ) assert patch_main.call_count == 1 diff --git a/tests/unit/cli/test_main_validation.py b/tests/e2e/cli/test_main_validation.py similarity index 99% rename from tests/unit/cli/test_main_validation.py rename to tests/e2e/cli/test_main_validation.py index 73196c2..db3d49b 100644 --- a/tests/unit/cli/test_main_validation.py +++ b/tests/e2e/cli/test_main_validation.py @@ -1,4 +1,5 @@ import pytest + from guidellm.main import main diff --git a/tests/integration/backend/__init__.py b/tests/integration/backend/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/integration/backend/test_openai_backend_submit.py b/tests/integration/backend/test_openai_backend_submit.py deleted file mode 100644 index 77c20c1..0000000 --- a/tests/integration/backend/test_openai_backend_submit.py +++ /dev/null @@ -1,71 +0,0 @@ -import time -from typing import Callable - -import pytest -import requests -from guidellm.backend import OpenAIBackend -from guidellm.config import settings -from guidellm.core import TextGenerationRequest, TextGenerationResult -from openai.pagination import SyncPage -from openai.types import Model - - -@pytest.fixture(scope="session", autouse=True) -def _openai_server_healthcheck(): - """ - Check if the openai server is running - """ - - if not (openai_server := settings.openai.base_url): - raise ValueError( - "Integration backend tests can't be run without " - "GUIDELLM__OPENAI__BASE_URL specified", - ) - - try: - requests.get(openai_server, timeout=10) - except requests.ConnectionError: - raise SystemExit( - "Integration backend tests can't be run without " - f"OpenAI compatible server running. Please check the {openai_server}", - ) from None - - -@pytest.mark.skip("OpenAI compatible service is not deployed yet") -@pytest.mark.sanity() -def test_openai_submit_request( - mocker, - openai_backend_factory: Callable[..., OpenAIBackend], -): - """ - Check the OpenAI making request and checking the results. - - Check if the total time that is stored in the TextGenerationResult corresponds - to the real execution time - """ - - openai_resources_models_list_patch = mocker.patch( - "openai.resources.models.Models.list", - return_value=SyncPage( - object="list", - data=[ - Model( - id="d69244a8-3f30-4f08-a432-8c83d5f254ad", - created=1719814049, - object="model", - owned_by="guidellm", - ), - ], - ), - ) - backend: OpenAIBackend = openai_backend_factory() - request = TextGenerationRequest(prompt="Generate numbers from 1 to 10") - - start_time = time.perf_counter() - result: TextGenerationResult = backend.submit(request=request) - total_for_submit = time.perf_counter() - start_time - - assert result.start_time is not None - assert result.end_time is not None - assert openai_resources_models_list_patch.call_count == 1 - assert abs((result.end_time - result.start_time) - total_for_submit) < 1 diff --git a/tests/integration/executor/__init__.py b/tests/integration/executor/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/integration/executor/conftest.py b/tests/integration/executor/conftest.py deleted file mode 100644 index e6dbbe1..0000000 --- a/tests/integration/executor/conftest.py +++ /dev/null @@ -1,36 +0,0 @@ -from typing import List, cast - -import openai -import pytest -from openai.pagination import SyncPage - -from tests import dummy - - -@pytest.fixture(autouse=True) -def openai_completion_create_patch( - mocker, -) -> openai.Stream[openai.types.Completion]: - """ - Mock available models function to avoid OpenAI API call. - """ - - items = list(dummy.data.openai_completion_factory()) - mocker.patch("openai.resources.completions.Completions.create", return_value=items) - - return cast(openai.Stream[openai.types.Completion], items) - - -@pytest.fixture(autouse=True) -def openai_models_list_patch(mocker) -> List[openai.types.Model]: - """ - Mock available models function to avoid OpenAI API call. - """ - - items: List[openai.types.Model] = list(dummy.data.openai_model_factory()) - mocker.patch( - "openai.resources.models.Models.list", - return_value=SyncPage(object="list", data=items), - ) - - return items diff --git a/tests/integration/executor/test_report_generation.py b/tests/integration/executor/test_report_generation.py deleted file mode 100644 index 9aa0fb3..0000000 --- a/tests/integration/executor/test_report_generation.py +++ /dev/null @@ -1,179 +0,0 @@ -import time - -import pytest -from guidellm.backend import OpenAIBackend -from guidellm.core import TextGenerationBenchmarkReport -from guidellm.executor import Executor, ProfileGenerationMode -from guidellm.scheduler import LoadGenerationMode - -from tests import dummy - - -@pytest.mark.sanity() -def test_executor_openai_single_report_generation_sync_mode( - openai_backend_factory, - openai_completion_create_patch, -): - request_genrator = dummy.services.TestRequestGenerator( - tokenizer="bert-base-uncased", - ) - profile_generation_mode = ProfileGenerationMode.FIXED_RATE - profile_generator_kwargs = { - "load_gen_mode": LoadGenerationMode.SYNCHRONOUS, - } - - executor = Executor( - backend=openai_backend_factory(), - request_generator=request_genrator, - profile_mode=profile_generation_mode, - profile_args=profile_generator_kwargs, - max_requests=1, - max_duration=2, - ) - - report: TextGenerationBenchmarkReport = executor.run() - - assert isinstance(executor.backend, OpenAIBackend) - assert len(report.benchmarks) == 1 - assert len(report.benchmarks[0].results) == 1 - assert report.benchmarks[0].results[0].output == " ".join( - item.content for item in openai_completion_create_patch - ) - - -@pytest.mark.sanity() -def test_executor_openai_single_report_generation_constant_mode_infinite( - openai_backend_factory, -): - """ - Test without max duration defined. - - Does not matter how many requests is specified, - the execution DOES NOT have any duration limitations. - """ - - request_genrator = dummy.services.TestRequestGenerator( - tokenizer="bert-base-uncased", - ) - profile_generation_mode = ProfileGenerationMode.FIXED_RATE - profile_generator_kwargs = { - "load_gen_mode": LoadGenerationMode.CONSTANT, - "rates": [1.0], - } - - executor = Executor( - backend=openai_backend_factory(), - request_generator=request_genrator, - profile_mode=profile_generation_mode, - profile_args=profile_generator_kwargs, - max_requests=2, - max_duration=None, # not specified for no limitations - ) - - report: TextGenerationBenchmarkReport = executor.run() - - assert isinstance(executor.backend, OpenAIBackend) - assert len(report.benchmarks) == 1 - assert len(report.benchmarks[0].errors) == 0 - - -@pytest.mark.sanity() -def test_executor_openai_single_report_generation_constant_mode_limited( - openai_backend_factory, -): - """ - Test with max duration defined. - """ - - request_genrator = dummy.services.TestRequestGenerator( - tokenizer="bert-base-uncased", - ) - profile_generation_mode = ProfileGenerationMode.FIXED_RATE - profile_generator_kwargs = { - "load_gen_mode": LoadGenerationMode.CONSTANT, - "rates": [1.0], - } - - executor = Executor( - backend=openai_backend_factory(), - request_generator=request_genrator, - profile_mode=profile_generation_mode, - profile_args=profile_generator_kwargs, - max_requests=2, - max_duration=3, - ) - - report: TextGenerationBenchmarkReport = executor.run() - - assert isinstance(executor.backend, OpenAIBackend) - assert len(report.benchmarks) == 1 - assert len(report.benchmarks[0].results) == 2 - - -@pytest.mark.sanity() -def test_executor_openai_single_report_generation_constant_mode_failed( - mocker, - openai_backend_factory, -): - """ - Test max duration immediate tasks iteration break up - because of the `time.time() - start_time >= self._max_duration`. - """ - - mocker.patch("guidellm.backend.Backend.submit", side_effect=Exception) - - request_genrator = dummy.services.TestRequestGenerator( - tokenizer="bert-base-uncased", - ) - profile_generation_mode = ProfileGenerationMode.FIXED_RATE - profile_generator_kwargs = { - "load_gen_mode": LoadGenerationMode.CONSTANT, - "rates": [1.0], - } - - executor = Executor( - backend=openai_backend_factory(), - request_generator=request_genrator, - profile_mode=profile_generation_mode, - profile_args=profile_generator_kwargs, - max_requests=3, - max_duration=None, - ) - - report: TextGenerationBenchmarkReport = executor.run() - - assert isinstance(executor.backend, OpenAIBackend) - assert len(report.benchmarks) == 1 - assert len(report.benchmarks[0].errors) == 3 - - -@pytest.mark.sanity() -def test_executor_openai_single_report_generation_constant_mode_cancelled_reports( - openai_backend_factory, -): - request_genrator = dummy.services.TestRequestGenerator( - tokenizer="bert-base-uncased", - ) - profile_generation_mode = ProfileGenerationMode.FIXED_RATE - profile_generator_kwargs = { - "load_gen_mode": LoadGenerationMode.CONSTANT, - "rates": [1.0], - } - - executor = Executor( - backend=openai_backend_factory(), - request_generator=request_genrator, - profile_mode=profile_generation_mode, - profile_args=profile_generator_kwargs, - max_requests=5, - max_duration=3, - ) - - start_time: float = time.perf_counter() - report: TextGenerationBenchmarkReport = executor.run() - end_time: float = time.perf_counter() - start_time - - assert isinstance(executor.backend, OpenAIBackend) - assert len(report.benchmarks) == 1 - assert len(report.benchmarks[0].errors) > 0 - assert round(end_time) == 3 diff --git a/tests/integration/request/__init__.py b/tests/integration/request/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/integration/request/test_base.py b/tests/integration/request/test_base.py deleted file mode 100644 index 5df6fe7..0000000 --- a/tests/integration/request/test_base.py +++ /dev/null @@ -1,23 +0,0 @@ -import pytest -from guidellm.core.request import TextGenerationRequest -from guidellm.request.base import RequestGenerator -from transformers import AutoTokenizer, PreTrainedTokenizerBase - - -class TestRequestGenerator(RequestGenerator): - def create_item(self) -> TextGenerationRequest: - return TextGenerationRequest(prompt="Test prompt") - - -@pytest.mark.smoke() -def test_request_generator_with_hf_tokenizer(): - tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") - generator = TestRequestGenerator(tokenizer=tokenizer) - assert generator.tokenizer == tokenizer - - -@pytest.mark.smoke() -def test_request_generator_with_string_tokenizer(): - generator = TestRequestGenerator(tokenizer="bert-base-uncased") - assert isinstance(generator.tokenizer, PreTrainedTokenizerBase) - assert generator.tokenizer.name_or_path == "bert-base-uncased" diff --git a/tests/integration/test_guidellm.py b/tests/integration/test_guidellm.py new file mode 100644 index 0000000..75ab221 --- /dev/null +++ b/tests/integration/test_guidellm.py @@ -0,0 +1,8 @@ +import pytest + +from guidellm.config import settings + + +@pytest.mark.smoke() +def test_import(): + assert settings diff --git a/tests/unit/backend/test_base.py b/tests/unit/backend/test_base.py index dfe6259..c518f82 100644 --- a/tests/unit/backend/test_base.py +++ b/tests/unit/backend/test_base.py @@ -1,44 +1,177 @@ -from typing import Iterator, List, Optional - import pytest -from guidellm.backend import Backend, BackendEngine, GenerativeResponse, OpenAIBackend -from guidellm.core import TextGenerationRequest +from guidellm.backend import Backend, GenerativeResponse +from guidellm.core import TextGenerationRequest, TextGenerationResult -@Backend.register(backend_type=BackendEngine.TEST) -class TestBackend(Backend): - """ - The test implementation of a LLM Backend. - """ - def __init__(self, target: str, model: str = "test"): - self.target: str = target - self.model: str = model +@pytest.mark.smoke() +def test_backend_registry(): + class MockBackend(Backend): + async def make_request(self, request): + yield GenerativeResponse(type_="final", output="Test") - def make_request( - self, - request: TextGenerationRequest, - ) -> Iterator[GenerativeResponse]: - raise NotImplementedError + def available_models(self): + return ["mock-model"] - def available_models(self) -> List[str]: - raise NotImplementedError + backend_type = "test" + Backend.register(backend_type)(MockBackend) # type: ignore + assert Backend._registry[backend_type] is MockBackend # type: ignore - @property - def default_model(self) -> str: - raise NotImplementedError + backend_instance = Backend.create(backend_type) # type: ignore + assert isinstance(backend_instance, MockBackend) - def model_tokenizer(self, model: str) -> Optional[str]: - raise NotImplementedError + with pytest.raises(ValueError): + Backend.create("invalid_type") # type: ignore @pytest.mark.smoke() -def test_backend_registry(): - """ - Ensure that all registered classes exist in the Backend._registry. - """ - - assert Backend._registry == { - BackendEngine.TEST: TestBackend, - BackendEngine.OPENAI_SERVER: OpenAIBackend, - } +def test_generative_response_creation(): + response = GenerativeResponse(type_="final", output="Test Output") + assert response.type_ == "final" + assert response.output == "Test Output" + assert response.add_token is None + assert response.prompt is None + + response = GenerativeResponse(type_="token_iter", add_token="token") + assert response.type_ == "token_iter" + assert response.add_token == "token" + assert response.output is None + + +@pytest.mark.smoke() +@pytest.mark.asyncio() +async def test_backend_make_request(): + class MockBackend(Backend): + async def make_request(self, request): + yield GenerativeResponse( + type_="token_iter", + add_token="Token", + prompt="Hello, world!", + prompt_token_count=5, + ) + yield GenerativeResponse( + type_="final", + output="This is a final response.", + prompt="Hello, world!", + prompt_token_count=5, + output_token_count=10, + ) + + def available_models(self): + return ["mock-model"] + + backend = MockBackend() + index = 0 + + async for response in backend.make_request(TextGenerationRequest(prompt="Test")): + if index == 0: + assert response.type_ == "token_iter" + assert response.add_token == "Token" + assert response.prompt == "Hello, world!" + assert response.prompt_token_count == 5 + else: + assert response.type_ == "final" + assert response.output == "This is a final response." + assert response.prompt == "Hello, world!" + assert response.prompt_token_count == 5 + assert response.output_token_count == 10 + index += 1 + + +@pytest.mark.smoke() +@pytest.mark.asyncio() +async def test_backend_submit_final(): + class MockBackend(Backend): + async def make_request(self, request): + yield GenerativeResponse(type_="final", output="Test") + + def available_models(self): + return ["mock-model"] + + backend = MockBackend() + result = await backend.submit(TextGenerationRequest(prompt="Test")) + assert isinstance(result, TextGenerationResult) + assert result.output == "Test" + + +@pytest.mark.smoke() +@pytest.mark.asyncio() +async def test_backend_submit_multi(): + class MockBackend(Backend): + async def make_request(self, request): + yield GenerativeResponse(type_="token_iter", add_token="Token") + yield GenerativeResponse(type_="token_iter", add_token=" ") + yield GenerativeResponse(type_="token_iter", add_token="Test") + yield GenerativeResponse(type_="final") + + def available_models(self): + return ["mock-model"] + + backend = MockBackend() + result = await backend.submit(TextGenerationRequest(prompt="Test")) + assert isinstance(result, TextGenerationResult) + assert result.output == "Token Test" + + +@pytest.mark.regression() +@pytest.mark.asyncio() +async def test_backend_submit_no_response(): + class MockBackend(Backend): + async def make_request(self, request): + if False: # simulate no yield + yield + + def available_models(self): + return ["mock-model"] + + backend = MockBackend() + + with pytest.raises(ValueError): + await backend.submit(TextGenerationRequest(prompt="Test")) + + +@pytest.mark.smoke() +@pytest.mark.asyncio() +async def test_backend_submit_multi_final(): + class MockBackend(Backend): + async def make_request(self, request): + yield GenerativeResponse(type_="token_iter", add_token="Token") + yield GenerativeResponse(type_="token_iter", add_token=" ") + yield GenerativeResponse(type_="token_iter", add_token="Test") + yield GenerativeResponse(type_="final") + yield GenerativeResponse(type_="final") + + def available_models(self): + return ["mock-model"] + + backend = MockBackend() + + with pytest.raises(ValueError): + await backend.submit(TextGenerationRequest(prompt="Test")) + + +@pytest.mark.smoke() +def test_backend_models(): + class MockBackend(Backend): + def available_models(self): + return ["mock-model", "mock-model-2"] + + async def make_request(self, request): + yield GenerativeResponse(type_="final", output="") + + backend = MockBackend() + assert backend.available_models() == ["mock-model", "mock-model-2"] + assert backend.default_model == "mock-model" + + +@pytest.mark.regression() +def test_backend_abstract_methods(): + with pytest.raises(TypeError): + Backend() # type: ignore + + class IncompleteBackend(Backend): + async def make_request(self, request): + yield GenerativeResponse(type_="final", output="Test") + + with pytest.raises(TypeError): + IncompleteBackend() # type: ignore diff --git a/tests/unit/backend/test_openai_backend.py b/tests/unit/backend/test_openai_backend.py index 7777bcd..80cab45 100644 --- a/tests/unit/backend/test_openai_backend.py +++ b/tests/unit/backend/test_openai_backend.py @@ -1,75 +1,293 @@ -""" -This module includes unit tests for the OpenAI Backend Service. -""" - -from typing import Callable, Optional +from unittest.mock import AsyncMock, Mock, patch import pytest -from guidellm.backend import OpenAIBackend + +from guidellm.backend import Backend, OpenAIBackend +from guidellm.config import reload_settings, settings from guidellm.core import TextGenerationRequest -from tests.dummy.services import TestRequestGenerator +@pytest.fixture() +def mock_openai_client(): + with patch("guidellm.backend.openai.AsyncOpenAI") as mock_async_const, patch( + "guidellm.backend.openai.OpenAI" + ) as mock_sync_const: + mock_model = Mock() + mock_model.id = "mock-model" + mock_model_2 = Mock() + mock_model_2.id = "mock-model-2" + mock_model_data = Mock() + mock_model_data.data = [mock_model, mock_model_2] -@pytest.mark.smoke() -def test_openai_backend_creation_with_default_model(openai_backend_factory: Callable): - """ - Test whether the OpenAI Backend service is created correctly - with all default parameters. - Also checks whether the `default_models` parameter does not abuse the OpenAI API. - """ + def create_async_create(inst): + async def stream(): + for ind in range(3): + choice = Mock() + choice.delta.content = f"token{ind}" if ind % 2 == 0 else " " + choice.finish_reason = None + chunk = Mock() + chunk.choices = [choice] + + yield chunk + + choice = Mock() + choice.finish_reason = "stop" + chunk = Mock() + chunk.choices = [choice] + yield chunk - backend_service = openai_backend_factory() + async def create(*args, **kwargs): + inst.create_args = args + inst.create_kwargs = kwargs + return stream() - assert isinstance(backend_service, OpenAIBackend) - assert backend_service.default_model == backend_service.available_models()[0] + return create + + def async_constructor(*args, **kwargs): + mock_async_instance = AsyncMock() + mock_async_instance.models.list.return_value = mock_model_data + mock_async_instance.args = args + mock_async_instance.kwargs = kwargs + mock_async_instance.chat.completions.create.side_effect = ( + create_async_create(mock_async_instance) + ) + + return mock_async_instance + + def sync_constructor(*args, **kwargs): + mock_sync_instance = Mock() + mock_sync_instance.models.list.return_value = mock_model_data + mock_sync_instance.args = args + mock_sync_instance.kwargs = kwargs + return mock_sync_instance + + mock_async_const.side_effect = async_constructor + mock_sync_const.side_effect = sync_constructor + yield mock_async_const, mock_sync_const @pytest.mark.smoke() -def test_model_tokenizer(openai_backend_factory): - backend_service = openai_backend_factory() - assert backend_service.model_tokenizer("bert-base-uncased") +@pytest.mark.parametrize( + ( + "openai_api_key", + "target", + "host", + "port", + "model", + "request_args", + "expected_base_url", + ), + [ + ( + "test_key", + "http://test-target", + None, + None, + "test-model", + {"arg1": "value1"}, + "http://test-target", + ), + ("test_key", None, "localhost", 8000, "test-model", {}, "localhost:8000"), + (None, None, None, None, None, {}, settings.openai.base_url), + ], +) +def test_openai_backend_create( + openai_api_key, + target, + host, + port, + model, + request_args, + expected_base_url, + mock_openai_client, +): + backends = [ + Backend.create( + "openai_server", + openai_api_key=openai_api_key, + target=target, + host=host, + port=port, + model=model, + **request_args, + ), + OpenAIBackend( + openai_api_key=openai_api_key, + target=target, + host=host, + port=port, + model=model, + **request_args, + ), + ] + + for backend in backends: + assert backend._async_client.kwargs["api_key"] == ( # type: ignore + openai_api_key or settings.openai.api_key + ) + assert backend._async_client.kwargs["base_url"] == expected_base_url # type: ignore + assert backend._client.kwargs["api_key"] == ( # type: ignore + openai_api_key or settings.openai.api_key + ) + assert backend._client.kwargs["base_url"] == expected_base_url # type: ignore + if model: + assert backend._model == model # type: ignore @pytest.mark.smoke() -def test_model_tokenizer_no_model(openai_backend_factory): - backend_service = openai_backend_factory() - tokenizer = backend_service.model_tokenizer("invalid") - assert tokenizer is None +def test_openai_backend_models(mock_openai_client): + backend = OpenAIBackend() + assert backend.available_models() == ["mock-model", "mock-model-2"] + assert backend.default_model == "mock-model" + assert backend.model == "mock-model" @pytest.mark.smoke() -def test_make_request(openai_backend_factory, openai_completion_create_patch): - """ - Test `OpenAIBackend.make_request()` workflow. - - Notes: - * The output token count is not used without the `TextGenerationResult.start()` - and `TextGenerationResult.start()` - """ - - request: TextGenerationRequest = TestRequestGenerator().create_item() - backend_service: OpenAIBackend = openai_backend_factory() - total_generative_responses = 0 - - for generative_response, completion_patch in zip( - backend_service.make_request(request=request), - openai_completion_create_patch, - ): - total_generative_responses += 1 - expected_token: Optional[str] = completion_patch.content or None - - assert generative_response.add_token == expected_token +@pytest.mark.parametrize( + ("req", "request_args"), + [ + (TextGenerationRequest(prompt="Test"), None), + ( + TextGenerationRequest(prompt="Test", params={"generated_tokens": 10}), + None, + ), + ( + TextGenerationRequest(prompt="Test", params={"generated_tokens": 10}), + {"max_tokens": 10}, + ), + ( + TextGenerationRequest(prompt="Test"), + {"max_tokens": 10, "stop": "stop"}, + ), + ], +) +@pytest.mark.asyncio() +async def test_openai_backend_make_request(req, request_args, mock_openai_client): + backend = OpenAIBackend(**(request_args or {})) + counter = 0 + + async for response in backend.make_request(req): + if counter < 3: + assert response.type_ == "token_iter" + assert response.add_token == f"token{counter}" if counter % 2 == 0 else " " + elif counter == 3: + assert response.type_ == "final" + else: + raise ValueError("Too many responses received from the backend") + + counter += 1 + + # check the kwargs passed to the openai client + # now that the generator has been consumed + assert backend._async_client.create_args == () # type: ignore + assert backend._async_client.create_kwargs["model"] == "mock-model" # type: ignore + assert backend._async_client.create_kwargs["messages"] == [ # type: ignore + {"role": "system", "content": req.prompt} + ] + assert backend._async_client.create_kwargs["stream"] # type: ignore + assert backend._async_client.create_kwargs["n"] == 1 # type: ignore + + if req.output_token_count is not None: assert ( - generative_response.type_ == "final" - if completion_patch.stop is True - else "token_iter" + backend._async_client.create_kwargs["max_tokens"] == req.output_token_count # type: ignore ) - if expected_token is not None: - assert generative_response.prompt_token_count is None - assert generative_response.output_token_count is None - else: - assert generative_response.prompt_token_count == 2 - assert generative_response.output_token_count == 0 + assert backend._async_client.create_kwargs["stop"] is None # type: ignore + elif request_args is not None and "max_tokens" not in request_args: + assert ( + backend._async_client.create_kwargs["max_tokens"] # type: ignore + == settings.openai.max_gen_tokens + ) + + if request_args: + for key, value in request_args.items(): + assert backend._async_client.create_kwargs[key] == value # type: ignore + + +@pytest.mark.sanity() +@pytest.mark.asyncio() +async def test_openai_backend_submit(mock_openai_client): + backend = OpenAIBackend() + request = TextGenerationRequest(prompt="Test", prompt_token_count=1) + result = await backend.submit(request) + + assert result.request == request + assert result.prompt == request.prompt + assert result.prompt_token_count == 1 + assert result.output == "token0 token2" + assert result.output_token_count == 3 + assert result.last_time is not None + assert result.first_token_set + assert result.start_time is not None + assert result.first_token_time is not None + assert result.end_time is not None + assert len(result.decode_times) == 2 + + +@pytest.mark.sanity() +def test_openai_backend_api_key(mock_openai_client): + backend = OpenAIBackend() + assert backend._async_client.kwargs["api_key"] == settings.openai.api_key # type: ignore + assert backend._client.kwargs["api_key"] == settings.openai.api_key # type: ignore + + backend = OpenAIBackend(openai_api_key="test_key") + assert backend._async_client.kwargs["api_key"] == "test_key" # type: ignore + assert backend._client.kwargs["api_key"] == "test_key" # type: ignore + + +@pytest.mark.sanity() +def test_openai_backend_api_key_env(mock_openai_client, mocker): + mocker.patch.dict( + "os.environ", + { + "GUIDELLM__OPENAI__API_KEY": "test_key", + }, + ) + reload_settings() + + backend = OpenAIBackend() + assert backend._async_client.kwargs["api_key"] == "test_key" # type: ignore + assert backend._client.kwargs["api_key"] == "test_key" # type: ignore + + +@pytest.mark.sanity() +def test_openai_backend_target(mock_openai_client): + backend = OpenAIBackend(target="http://test-target") + assert backend._async_client.kwargs["base_url"] == "http://test-target" # type: ignore + assert backend._client.kwargs["base_url"] == "http://test-target" # type: ignore + + backend = OpenAIBackend(host="localhost", port=8000) + assert backend._async_client.kwargs["base_url"] == "localhost:8000" # type: ignore + assert backend._client.kwargs["base_url"] == "localhost:8000" # type: ignore + + backend = OpenAIBackend() + assert backend._async_client.kwargs["base_url"] == settings.openai.base_url # type: ignore + assert backend._client.kwargs["base_url"] == settings.openai.base_url # type: ignore + + +@pytest.mark.sanity() +def test_openai_backend_target_env(mock_openai_client, mocker): + mocker.patch.dict( + "os.environ", + { + "GUIDELLM__OPENAI__BASE_URL": "http://test-target", + }, + ) + reload_settings() + + backend = OpenAIBackend() + assert backend._async_client.kwargs["base_url"] == "http://test-target" # type: ignore + assert backend._client.kwargs["base_url"] == "http://test-target" # type: ignore + + +@pytest.mark.regression() +def test_openai_backend_target_none_error(mock_openai_client, mocker): + mocker.patch.dict( + "os.environ", + { + "GUIDELLM__OPENAI__BASE_URL": "", + }, + ) + reload_settings() - assert total_generative_responses == 3 + with pytest.raises(ValueError): + OpenAIBackend(target=None, host=None, port=None) diff --git a/tests/unit/cli/__init__.py b/tests/unit/cli/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/unit/config/__init__.py b/tests/unit/config/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index e6dbbe1..ae2cc34 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -1,36 +1,33 @@ -from typing import List, cast +from pathlib import Path +from typing import List +from unittest.mock import MagicMock, patch -import openai import pytest -from openai.pagination import SyncPage +import requests_mock -from tests import dummy +@pytest.fixture() +def mock_auto_tokenizer(): + with patch("transformers.AutoTokenizer.from_pretrained") as mock_from_pretrained: -@pytest.fixture(autouse=True) -def openai_completion_create_patch( - mocker, -) -> openai.Stream[openai.types.Completion]: - """ - Mock available models function to avoid OpenAI API call. - """ + def _fake_tokenize(text: str) -> List[int]: + tokens = text.split() + return [0] * len(tokens) - items = list(dummy.data.openai_completion_factory()) - mocker.patch("openai.resources.completions.Completions.create", return_value=items) + mock_tokenizer = MagicMock() + mock_tokenizer.tokenize = MagicMock(side_effect=_fake_tokenize) + mock_from_pretrained.return_value = mock_tokenizer + yield mock_tokenizer - return cast(openai.Stream[openai.types.Completion], items) +@pytest.fixture() +def mock_requests_pride_and_prejudice(): + text_path = Path(__file__).parent / "dummy" / "data" / "pride_and_prejudice.txt" + text_content = text_path.read_text() -@pytest.fixture(autouse=True) -def openai_models_list_patch(mocker) -> List[openai.types.Model]: - """ - Mock available models function to avoid OpenAI API call. - """ - - items: List[openai.types.Model] = list(dummy.data.openai_model_factory()) - mocker.patch( - "openai.resources.models.Models.list", - return_value=SyncPage(object="list", data=items), - ) - - return items + with requests_mock.Mocker() as mock: + mock.get( + "https://www.gutenberg.org/files/1342/1342-0.txt", + text=text_content, + ) + yield mock diff --git a/tests/unit/core/test_distribution.py b/tests/unit/core/test_distribution.py index 1525107..95b7e92 100644 --- a/tests/unit/core/test_distribution.py +++ b/tests/unit/core/test_distribution.py @@ -1,4 +1,5 @@ import pytest + from guidellm.core import Distribution @@ -20,6 +21,22 @@ def test_distribution_statistics(): assert dist.min == 1 assert dist.max == 5 assert dist.range == 4 + assert dist.percentile(50) == 3.0 + assert dist.percentiles([25, 50, 75]) == pytest.approx([2.0, 3.0, 4.0]) + + +@pytest.mark.smoke() +def test_distribution_no_data(): + dist = Distribution(data=[]) + assert dist.mean == 0.0 + assert dist.median == 0.0 + assert dist.variance == 0.0 + assert dist.std_deviation == 0.0 + assert dist.min == 0.0 + assert dist.max == 0.0 + assert dist.range == 0.0 + assert dist.percentile(50) == 0.0 + assert dist.percentiles([25, 50, 75]) == [0.0, 0.0, 0.0] @pytest.mark.sanity() @@ -41,49 +58,50 @@ def test_distribution_remove_data(): assert dist.data == [1, 3, 5] -@pytest.mark.skip("fix me") @pytest.mark.regression() def test_distribution_str(): data = [1, 2, 3, 4, 5] dist = Distribution(data=data) - assert str(dist) == ( - "Distribution({'mean': 3.0, 'median': 3.0, " - "'variance': 2.0, 'std_deviation': 1.4142135623730951, " - "'percentile_indices': " - "[10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 99], " - "'percentile_values': " - "[1.4, 1.8, 2.2, 2.6, 3.0, 3.4, 3.8, 4.2, 4.6, 4.8, 4.96], " - "'min': 1, 'max': 5, 'range': 4})" + assert "Distribution({" in str(dist) + assert "'mean': 3.0" in str(dist) + assert "'median': 3.0" in str(dist) + assert "'variance': 2.0" in str(dist) + assert "'percentile_indices': [10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 99]" in str( + dist + ) + assert ( + "'percentile_values': [1.4, 1.8, 2.2, 2.6, 3.0, 3.4, 3.8, 4.2, 4.6, 4.8, 4.96]" + in str(dist) ) + assert "'min': 1" in str(dist) + assert "'max': 5" in str(dist) + assert "'range': 4" in str(dist) -@pytest.mark.skip("fix me") @pytest.mark.regression() def test_distribution_repr(): data = [1, 2, 3, 4, 5] dist = Distribution(data=data) - assert repr(dist) == f"Distribution(data={data})" + assert repr(dist) == f"Distribution(data={dist.data})" -@pytest.mark.skip("fix me") @pytest.mark.regression() def test_distribution_json(): data = [1, 2, 3, 4, 5] dist = Distribution(data=data) json_str = dist.to_json() - assert '"data":[1,2,3,4,5]' in json_str + assert f'"data":[{dist.data[0]}' in json_str dist_restored = Distribution.from_json(json_str) assert dist_restored.data == data -@pytest.mark.skip("fix me") @pytest.mark.regression() def test_distribution_yaml(): data = [1, 2, 3, 4, 5] dist = Distribution(data=data) yaml_str = dist.to_yaml() - assert "data:\n- 1\n- 2\n- 3\n- 4\n- 5\n" in yaml_str + assert f"data:\n- {dist.data[0]}" in yaml_str dist_restored = Distribution.from_yaml(yaml_str) assert dist_restored.data == data diff --git a/tests/unit/core/test_report.py b/tests/unit/core/test_report.py index 713aea5..610879e 100644 --- a/tests/unit/core/test_report.py +++ b/tests/unit/core/test_report.py @@ -2,6 +2,7 @@ from pathlib import Path import pytest + from guidellm.core import ( Distribution, GuidanceReport, @@ -34,14 +35,14 @@ def sample_benchmark_report() -> TextGenerationBenchmarkReport: ) sample_error = TextGenerationError(request=sample_request, message="sample error") sample_benchmark = TextGenerationBenchmark( - mode="async", + mode="asynchronous", rate=1.0, results=[sample_result], errors=[sample_error], concurrencies=[], ) return TextGenerationBenchmarkReport( - benchmarks=[sample_benchmark], args=[{"arg1": "value1"}] + benchmarks=[sample_benchmark], args={"arg1": "value1"} ) diff --git a/tests/unit/core/test_request.py b/tests/unit/core/test_request.py index 4d90a14..8550eb2 100644 --- a/tests/unit/core/test_request.py +++ b/tests/unit/core/test_request.py @@ -1,4 +1,5 @@ import pytest + from guidellm.core import TextGenerationRequest @@ -8,7 +9,7 @@ def test_text_generation_request_initialization(): request = TextGenerationRequest(prompt=prompt) assert request.prompt == prompt assert request.prompt_token_count is None - assert request.generate_token_count is None + assert request.output_token_count is None assert request.params == {} @@ -16,17 +17,17 @@ def test_text_generation_request_initialization(): def test_text_generation_request_initialization_with_params(): prompt = "Generate a story" prompt_token_count = 50 - generate_token_count = 100 + output_token_count = 100 params = {"temperature": 0.7} request = TextGenerationRequest( prompt=prompt, prompt_token_count=prompt_token_count, - generate_token_count=generate_token_count, + output_token_count=output_token_count, params=params, ) assert request.prompt == prompt assert request.prompt_token_count == prompt_token_count - assert request.generate_token_count == generate_token_count + assert request.output_token_count == output_token_count assert request.params == params @@ -34,12 +35,12 @@ def test_text_generation_request_initialization_with_params(): def test_request_json(): prompt = "Generate text" prompt_token_count = 10 - generate_token_count = 50 + output_token_count = 50 params = {"temperature": 0.7} request = TextGenerationRequest( prompt=prompt, prompt_token_count=prompt_token_count, - generate_token_count=generate_token_count, + output_token_count=output_token_count, params=params, ) json_str = request.to_json() @@ -50,7 +51,7 @@ def test_request_json(): assert request.id == request_restored.id assert request_restored.prompt == prompt assert request_restored.prompt_token_count == prompt_token_count - assert request_restored.generate_token_count == generate_token_count + assert request_restored.output_token_count == output_token_count assert request_restored.params == params @@ -58,12 +59,12 @@ def test_request_json(): def test_request_yaml(): prompt = "Generate text" prompt_token_count = 15 - generate_token_count = 55 + output_token_count = 55 params = {"temperature": 0.8} request = TextGenerationRequest( prompt=prompt, prompt_token_count=prompt_token_count, - generate_token_count=generate_token_count, + output_token_count=output_token_count, params=params, ) yaml_str = request.to_yaml() @@ -74,5 +75,5 @@ def test_request_yaml(): assert request.id == request_restored.id assert request_restored.prompt == prompt assert request_restored.prompt_token_count == prompt_token_count - assert request_restored.generate_token_count == generate_token_count + assert request_restored.output_token_count == output_token_count assert request_restored.params == params diff --git a/tests/unit/core/test_result.py b/tests/unit/core/test_result.py index a6d942b..39aecc7 100644 --- a/tests/unit/core/test_result.py +++ b/tests/unit/core/test_result.py @@ -1,4 +1,7 @@ +import time + import pytest + from guidellm.core import ( TextGenerationBenchmark, TextGenerationBenchmarkReport, @@ -17,7 +20,7 @@ def test_text_generation_result_initialization(): assert result.output == "" -@pytest.mark.sanity() +@pytest.mark.smoke() def test_text_generation_result_start(): request = TextGenerationRequest(prompt="Generate a story") result = TextGenerationResult(request=request) @@ -27,23 +30,25 @@ def test_text_generation_result_start(): assert result.start_time is not None -@pytest.mark.sanity() +@pytest.mark.smoke() def test_text_generation_result_output_token(): request = TextGenerationRequest(prompt="Generate a story") result = TextGenerationResult(request=request) prompt = "Once upon a time" result.start(prompt) - token = "the" - result.output_token(token) + tokens = ["the", " ", "quick", " ", "brown", " ", "fox"] + for token in tokens: + result.output_token(token) + result.end() assert result.last_time assert result.start_time - assert result.output == f"{token} " + assert result.output == "the quick brown fox" assert result.last_time is not None assert result.last_time > result.start_time -@pytest.mark.sanity() +@pytest.mark.smoke() def test_text_generation_result_end(): request = TextGenerationRequest(prompt="Generate a story") result = TextGenerationResult(request=request) @@ -57,6 +62,16 @@ def test_text_generation_result_end(): assert result.end_time > result.start_time +@pytest.mark.sanity() +def test_text_generation_result_improper_lifecycle(): + request = TextGenerationRequest(prompt="Generate a story") + result = TextGenerationResult(request=request) + with pytest.raises(ValueError): + result.output_token("the") + with pytest.raises(ValueError): + result.end("The end") + + @pytest.mark.regression() def test_text_generation_result_json(): request = TextGenerationRequest(prompt="Generate a story") @@ -144,34 +159,98 @@ def test_text_generation_error_yaml(): @pytest.mark.smoke() def test_text_generation_benchmark_initialization(): - benchmark = TextGenerationBenchmark(mode="test", rate=1.0) - assert benchmark.mode == "test" + benchmark = TextGenerationBenchmark(mode="synchronous", rate=1.0) + assert benchmark.mode == "synchronous" assert benchmark.rate == 1.0 assert benchmark.request_count == 0 assert benchmark.error_count == 0 -@pytest.mark.sanity() +@pytest.mark.smoke() def test_text_generation_benchmark_started(): - benchmark = TextGenerationBenchmark(mode="test", rate=1.0) + benchmark = TextGenerationBenchmark(mode="synchronous", rate=1.0) + assert benchmark.completed_request_rate == 0.0 + assert not benchmark.overloaded benchmark.request_started() assert len(benchmark.concurrencies) == 1 -@pytest.mark.regression() +@pytest.mark.smoke() +def test_text_generation_benchmark_expected_rate(): + num_requests = 5 + time_per_request = 0.25 + expected_rate = 1.0 / time_per_request + + benchmark = TextGenerationBenchmark(mode="synchronous", rate=expected_rate) + + for index in range(num_requests): + request = TextGenerationRequest(prompt=f"Generate a story {index}") + benchmark.request_started() + result = TextGenerationResult(request=request) + result.start("Once upon a time") + time.sleep(time_per_request) + result.end("The end") + benchmark.request_completed(result) + + assert len(benchmark.results) == num_requests + assert len(benchmark.errors) == 0 + assert len(benchmark.concurrencies) == 10 + assert benchmark.request_count == num_requests + assert benchmark.error_count == 0 + assert benchmark.completed_request_rate == pytest.approx(expected_rate, rel=0.1) + assert not benchmark.overloaded + + +@pytest.mark.smoke() +def test_text_generation_benchmark_overloaded_rate(): + num_requests = 5 + time_per_request = 0.25 + expected_rate = 1.0 / time_per_request + + benchmark = TextGenerationBenchmark(mode="synchronous", rate=expected_rate * 1.5) + + for index in range(num_requests): + request = TextGenerationRequest(prompt=f"Generate a story {index}") + benchmark.request_started() + result = TextGenerationResult(request=request) + result.start("Once upon a time") + time.sleep(time_per_request) + result.end("The end") + benchmark.request_completed(result) + + assert len(benchmark.results) == num_requests + assert len(benchmark.errors) == 0 + assert len(benchmark.concurrencies) == 10 + assert benchmark.request_count == num_requests + assert benchmark.error_count == 0 + assert benchmark.completed_request_rate == pytest.approx(expected_rate, rel=0.1) + assert benchmark.overloaded + + +@pytest.mark.smoke() def test_text_generation_benchmark_completed_with_result(): - benchmark = TextGenerationBenchmark(mode="test", rate=1.0) + benchmark = TextGenerationBenchmark(mode="synchronous", rate=1.0) + + with pytest.raises(ValueError): + benchmark.request_completed(None) # type: ignore + benchmark.request_started() request = TextGenerationRequest(prompt="Generate a story") result = TextGenerationResult(request=request) + + with pytest.raises(ValueError): + benchmark.request_completed(result) + + result.start("Once upon a time") + result.end("The end") benchmark.request_completed(result) assert benchmark.request_count == 1 assert benchmark.error_count == 0 -@pytest.mark.regression() +@pytest.mark.smoke() def test_text_generation_benchmark_completed_with_error(): - benchmark = TextGenerationBenchmark(mode="test", rate=1.0) + benchmark = TextGenerationBenchmark(mode="synchronous", rate=1.0) benchmark.request_started() request = TextGenerationRequest(prompt="Generate a story") error = TextGenerationError(request=request, message=str(Exception("Test error"))) @@ -180,15 +259,30 @@ def test_text_generation_benchmark_completed_with_error(): assert benchmark.error_count == 1 +@pytest.mark.regression() +def test_text_generation_benchmark_iter(): + benchmark = TextGenerationBenchmark(mode="synchronous", rate=1.0) + benchmark.request_started() + request = TextGenerationRequest(prompt="Generate a story") + result = TextGenerationResult(request=request) + result.start("Once upon a time") + result.end("The end") + benchmark.request_completed(result) + for res in benchmark: + assert res == result + + @pytest.mark.regression() def test_text_generation_benchmark_json(): - benchmark = TextGenerationBenchmark(mode="test", rate=1.0) + benchmark = TextGenerationBenchmark(mode="synchronous", rate=1.0) benchmark.request_started() request = TextGenerationRequest(prompt="Generate a story") result = TextGenerationResult(request=request) + result.start("Once upon a time") + result.end("The end") benchmark.request_completed(result) json_str = benchmark.to_json() - assert '"mode":"test"' in json_str + assert '"mode":"synchronous"' in json_str assert '"rate":1.0' in json_str benchmark_restored = TextGenerationBenchmark.from_json(json_str) @@ -203,13 +297,15 @@ def test_text_generation_benchmark_json(): @pytest.mark.regression() def test_text_generation_benchmark_yaml(): - benchmark = TextGenerationBenchmark(mode="test", rate=1.0) + benchmark = TextGenerationBenchmark(mode="synchronous", rate=1.0) benchmark.request_started() request = TextGenerationRequest(prompt="Generate a story") result = TextGenerationResult(request=request) + result.start("Once upon a time") + result.end("The end") benchmark.request_completed(result) yaml_str = benchmark.to_yaml() - assert "mode: test" in yaml_str + assert "mode: synchronous" in yaml_str assert "rate: 1.0" in yaml_str benchmark_restored = TextGenerationBenchmark.from_yaml(yaml_str) @@ -229,22 +325,65 @@ def test_text_generation_benchmark_report_initialization(): assert len(report.args) == 0 -@pytest.mark.sanity() +@pytest.mark.smoke() def test_text_generation_benchmark_report_add_benchmark(): report = TextGenerationBenchmarkReport() - benchmark = TextGenerationBenchmark(mode="test", rate=1.0) + benchmark = TextGenerationBenchmark(mode="synchronous", rate=1.0) report.add_benchmark(benchmark) assert len(report.benchmarks) == 1 +@pytest.mark.sanity() +def test_text_generation_benchmark_report_iter(): + report = TextGenerationBenchmarkReport() + + fast_benchmark = TextGenerationBenchmark(mode="synchronous", rate=10.0) + for _ in range(5): + fast_benchmark.request_started() + request = TextGenerationRequest(prompt="Generate a story") + result = TextGenerationResult(request=request) + result.start("Once upon a time") + time.sleep(0.1) + result.end("The end") + fast_benchmark.request_completed(result) + report.add_benchmark(fast_benchmark) + + slow_benchmark = TextGenerationBenchmark(mode="synchronous", rate=5.0) + for _ in range(5): + slow_benchmark.request_started() + request = TextGenerationRequest(prompt="Generate a story") + result = TextGenerationResult(request=request) + result.start("Once upon a time") + time.sleep(0.2) + result.end("The end") + slow_benchmark.request_completed(result) + report.add_benchmark(slow_benchmark) + + for index, benchmark in enumerate(report): + if index == 0: + assert benchmark == fast_benchmark + elif index == 1: + assert benchmark == slow_benchmark + else: + raise AssertionError("Unexpected benchmark in report") + + for index, benchmark in enumerate(report.benchmarks_sorted): + if index == 0: + assert benchmark == slow_benchmark + elif index == 1: + assert benchmark == fast_benchmark + else: + raise AssertionError("Unexpected benchmark in report") + + @pytest.mark.regression() def test_text_generation_benchmark_report_json(): report = TextGenerationBenchmarkReport() - benchmark = TextGenerationBenchmark(mode="test", rate=1.0) + benchmark = TextGenerationBenchmark(mode="synchronous", rate=1.0) report.add_benchmark(benchmark) json_str = report.to_json() assert '"benchmarks":' in json_str - assert '"args":[]' in json_str + assert '"args":{}' in json_str report_restored = TextGenerationBenchmarkReport.from_json(json_str) assert len(report.benchmarks) == len(report_restored.benchmarks) @@ -257,11 +396,11 @@ def test_text_generation_benchmark_report_json(): @pytest.mark.regression() def test_text_generation_benchmark_report_yaml(): report = TextGenerationBenchmarkReport() - benchmark = TextGenerationBenchmark(mode="test", rate=1.0) + benchmark = TextGenerationBenchmark(mode="synchronous", rate=1.0) report.add_benchmark(benchmark) yaml_str = report.to_yaml() assert "benchmarks:" in yaml_str - assert "args: []" in yaml_str + assert "args: {}" in yaml_str report_restored = TextGenerationBenchmarkReport.from_yaml(yaml_str) assert len(report.benchmarks) == len(report_restored.benchmarks) diff --git a/tests/unit/core/test_serializable.py b/tests/unit/core/test_serializable.py index b2d238d..ce0cec8 100644 --- a/tests/unit/core/test_serializable.py +++ b/tests/unit/core/test_serializable.py @@ -2,7 +2,8 @@ from pathlib import Path import pytest -from guidellm.core.serializable import Serializable, SerializableFileType + +from guidellm.core.serializable import Serializable class ExampleModel(Serializable): @@ -11,32 +12,28 @@ class ExampleModel(Serializable): @pytest.mark.smoke() -def test_serializable_to_json(): +def test_serializable_json(): + # to json example = ExampleModel(name="John Doe", age=30) json_str = example.to_json() assert '"name":"John Doe"' in json_str assert '"age":30' in json_str - -@pytest.mark.smoke() -def test_serializable_from_json(): - json_str = '{"name": "John Doe", "age": 30}' + # from json example = ExampleModel.from_json(json_str) assert example.name == "John Doe" assert example.age == 30 @pytest.mark.smoke() -def test_serializable_to_yaml(): +def test_serializable_yaml(): + # to yaml example = ExampleModel(name="John Doe", age=30) yaml_str = example.to_yaml() assert "name: John Doe" in yaml_str assert "age: 30" in yaml_str - -@pytest.mark.smoke() -def test_serializable_from_yaml(): - yaml_str = "name: John Doe\nage: 30\n" + # from yaml example = ExampleModel.from_yaml(yaml_str) assert example.name == "John Doe" assert example.age == 30 @@ -47,7 +44,7 @@ def test_serializable_file_json(): example = ExampleModel(name="John Doe", age=30) with tempfile.TemporaryDirectory() as temp_dir: file_path = Path(temp_dir) / "example.json" - saved_path = example.save_file(file_path, SerializableFileType.JSON) + saved_path = example.save_file(file_path, "json") assert Path(saved_path).exists() loaded_example = ExampleModel.load_file(saved_path) assert loaded_example.name == "John Doe" @@ -59,7 +56,7 @@ def test_serializable_file_yaml(): example = ExampleModel(name="John Doe", age=30) with tempfile.TemporaryDirectory() as temp_dir: file_path = Path(temp_dir) / "example.yaml" - saved_path = example.save_file(file_path, SerializableFileType.YAML) + saved_path = example.save_file(file_path, "yaml") assert Path(saved_path).exists() loaded_example = ExampleModel.load_file(saved_path) assert loaded_example.name == "John Doe" @@ -78,11 +75,11 @@ def test_serializable_file_without_extension(): assert loaded_example.age == 30 -@pytest.mark.smoke() +@pytest.mark.sanity() def test_serializable_file_with_directory_json(): example = ExampleModel(name="John Doe", age=30) with tempfile.TemporaryDirectory() as temp_dir: - saved_path = example.save_file(temp_dir, SerializableFileType.JSON) + saved_path = example.save_file(temp_dir, "json") assert Path(saved_path).exists() assert saved_path.endswith(".json") loaded_example = ExampleModel.load_file(saved_path) @@ -90,11 +87,11 @@ def test_serializable_file_with_directory_json(): assert loaded_example.age == 30 -@pytest.mark.smoke() +@pytest.mark.sanity() def test_serializable_file_with_directory_yaml(): example = ExampleModel(name="John Doe", age=30) with tempfile.TemporaryDirectory() as temp_dir: - saved_path = example.save_file(temp_dir, SerializableFileType.YAML) + saved_path = example.save_file(temp_dir, "yaml") assert Path(saved_path).exists() assert saved_path.endswith(".yaml") loaded_example = ExampleModel.load_file(saved_path) @@ -102,45 +99,53 @@ def test_serializable_file_with_directory_yaml(): assert loaded_example.age == 30 -@pytest.mark.smoke() -def test_serializable_save_file_invalid_extension(): +@pytest.mark.sanity() +def test_serializable_file_infer_extension(): + example = ExampleModel(name="John Doe", age=30) + with tempfile.TemporaryDirectory() as temp_dir: + inferred_path = example.save_file(temp_dir, "json") + assert Path(inferred_path).exists() + assert inferred_path.endswith(".json") + loaded_example = ExampleModel.load_file(inferred_path) + assert loaded_example.name == "John Doe" + assert loaded_example.age == 30 + + +@pytest.mark.regression() +def test_serializable_file_invalid_extension(): + # to file example = ExampleModel(name="John Doe", age=30) with tempfile.TemporaryDirectory() as temp_dir: invalid_file_path = Path(temp_dir) / "example.txt" with pytest.raises(ValueError, match="Unsupported file extension.*"): example.save_file(invalid_file_path) + # to directory + with tempfile.TemporaryDirectory() as temp_dir: + invalid_file_path = Path(temp_dir) + with pytest.raises(ValueError, match="Unsupported file extension.*"): + example.save_file(invalid_file_path, type_="txt") # type: ignore -@pytest.mark.smoke() -def test_serializable_load_file_invalid_extension(): + # from file with tempfile.TemporaryDirectory() as temp_dir: invalid_file_path = Path(temp_dir) / "example.txt" with invalid_file_path.open("w") as file: file.write("invalid content") - with pytest.raises(ValueError, match="Unsupported file extension: TXT"): + with pytest.raises(ValueError, match="Unsupported file extension.*"): ExampleModel.load_file(invalid_file_path) -@pytest.mark.smoke() -def test_serializable_file_no_type_provided(): - example = ExampleModel(name="John Doe", age=30) +@pytest.mark.regression() +def test_serializable_load_missing_path(): with tempfile.TemporaryDirectory() as temp_dir: - file_path = Path(temp_dir) / "example" - saved_path = example.save_file(file_path) - assert Path(saved_path).exists() - assert saved_path.endswith(".yaml") - loaded_example = ExampleModel.load_file(saved_path) - assert loaded_example.name == "John Doe" - assert loaded_example.age == 30 + invalid_file_path = Path(temp_dir) / "example.yaml" + with pytest.raises(FileNotFoundError): + ExampleModel.load_file(invalid_file_path) -@pytest.mark.smoke() -def test_serializable_file_infer_extension(): - example = ExampleModel(name="John Doe", age=30) +@pytest.mark.regression() +def test_serializable_load_non_file_path(): with tempfile.TemporaryDirectory() as temp_dir: - inferred_path = example.save_file(temp_dir, SerializableFileType.JSON) - assert Path(inferred_path).exists() - assert inferred_path.endswith(".json") - loaded_example = ExampleModel.load_file(inferred_path) - assert loaded_example.name == "John Doe" - assert loaded_example.age == 30 + invalid_file_path = Path(temp_dir) + with pytest.raises(ValueError, match="Path is not a file.*"): + ExampleModel.load_file(invalid_file_path) diff --git a/tests/unit/executor/test_base.py b/tests/unit/executor/test_base.py new file mode 100644 index 0000000..2cfa670 --- /dev/null +++ b/tests/unit/executor/test_base.py @@ -0,0 +1,278 @@ +from typing import List, Optional, Union +from unittest.mock import create_autospec, patch + +import pytest + +from guidellm.backend import Backend +from guidellm.config import settings +from guidellm.core import ( + TextGenerationBenchmarkReport, +) +from guidellm.executor.base import Executor, ExecutorResult +from guidellm.executor.profile_generator import ProfileGenerator +from guidellm.request import RequestGenerator +from guidellm.scheduler import Scheduler, SchedulerResult + + +@pytest.fixture() +def mock_scheduler(): + with patch("guidellm.executor.base.Scheduler") as mock_scheduler: + + def scheduler_constructor(*args, **kwargs): + mock_instance = create_autospec(Scheduler, instance=True) + mock_instance.args = args + mock_instance.kwargs = kwargs + num_requests = kwargs.get("max_number", 10) + + async def run(): + benchmark = create_autospec( + TextGenerationBenchmarkReport, instance=True + ) + benchmark.completed_request_rate = kwargs.get("rate", None) + yield SchedulerResult( + completed=False, + count_total=10, + count_completed=0, + benchmark=benchmark, + current_result=None, + ) + + for index in range(num_requests): + yield SchedulerResult( + completed=False, + count_total=10, + count_completed=index + 1, + benchmark=benchmark, + current_result=create_autospec( + TextGenerationBenchmarkReport, instance=True + ), + ) + + yield SchedulerResult( + completed=True, + count_total=num_requests, + count_completed=num_requests, + benchmark=benchmark, + current_result=None, + ) + + mock_instance.run.side_effect = run + + return mock_instance + + mock_scheduler.side_effect = scheduler_constructor + yield mock_scheduler + + +@pytest.mark.smoke() +def test_executor_result_instantiation(): + report = create_autospec(TextGenerationBenchmarkReport, instance=True) + scheduler_result = create_autospec(SchedulerResult, instance=True) + executor_result = ExecutorResult( + completed=True, + count_total=10, + count_completed=5, + report=report, + scheduler_result=scheduler_result, + ) + + assert executor_result.completed is True + assert executor_result.count_total == 10 + assert executor_result.count_completed == 5 + assert executor_result.report == report + assert executor_result.scheduler_result == scheduler_result + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("mode", "rate"), + [ + ("sweep", None), + ("synchronous", None), + ("throughput", None), + ("constant", 10), + ("constant", [10, 20, 30]), + ("poisson", 10), + ("poisson", [10, 20, 30]), + ], +) +def test_executor_instantiation(mode, rate): + backend = create_autospec(Backend, instance=True) + request_generator = create_autospec(RequestGenerator, instance=True) + executor = Executor( + backend=backend, + request_generator=request_generator, + mode=mode, + rate=rate, + max_number=100, + max_duration=60.0, + ) + + assert executor.backend == backend + assert executor.request_generator == request_generator + assert executor.profile_generator is not None + assert isinstance(executor.profile_generator, ProfileGenerator) + assert executor.profile_generator.mode == mode + assert ( + executor.profile_generator.rates == rate + if not rate or isinstance(rate, list) + else [rate] + ) + assert executor.max_number == 100 + assert executor.max_duration == 60.0 + + +async def _run_executor_tests( + executor: Executor, + num_profiles: int, + num_requests: int, + mode: str, + rate: Optional[Union[float, List[float]]], +): + iterator = executor.run() + + result = await iterator.__anext__() + assert result.completed is False + assert result.count_total == num_profiles + assert result.count_completed == 0 + assert result.report is not None + assert isinstance(result.report, TextGenerationBenchmarkReport) + assert len(result.report.benchmarks) == 0 + assert "mode" in result.report.args + assert result.report.args["mode"] == mode + assert "rate" in result.report.args + assert ( + result.report.args["rate"] == rate + if rate is None or isinstance(rate, list) + else [rate] + ) + assert "max_number" in result.report.args + assert result.report.args["max_number"] == num_requests + assert "max_duration" in result.report.args + assert result.report.args["max_duration"] is None + assert result.scheduler_result is None + + for benchmark_index in range(num_profiles): + result = await iterator.__anext__() + assert result.completed is False + assert result.count_total == num_profiles + assert result.count_completed == benchmark_index + assert result.report is not None + assert len(result.report.benchmarks) == benchmark_index + assert result.scheduler_result is not None + assert isinstance(result.scheduler_result, SchedulerResult) + + for _ in range(num_requests): + result = await iterator.__anext__() + assert result.completed is False + assert result.count_total == num_profiles + assert result.count_completed == benchmark_index + assert result.report is not None + assert len(result.report.benchmarks) == benchmark_index + assert result.scheduler_result is not None + assert isinstance(result.scheduler_result, SchedulerResult) + + result = await iterator.__anext__() + assert result.completed is False + assert result.count_total == num_profiles + assert result.count_completed == benchmark_index + 1 + assert result.report is not None + assert len(result.report.benchmarks) == benchmark_index + 1 + assert result.scheduler_result is not None + assert isinstance(result.scheduler_result, SchedulerResult) + result.scheduler_result.benchmark.completed_request_rate = ( # type: ignore + benchmark_index + 1 + ) + + result = await iterator.__anext__() + assert result.completed is True + assert result.count_total == num_profiles + assert result.count_completed == num_profiles + assert result.report is not None + assert len(result.report.benchmarks) == num_profiles + assert result.scheduler_result is None + + +@pytest.mark.smoke() +@pytest.mark.asyncio() +async def test_executor_run_sweep(mock_scheduler): + num_requests = 15 + + backend = create_autospec(Backend, instance=True) + request_generator = create_autospec(RequestGenerator, instance=True) + executor = Executor( + backend=backend, + request_generator=request_generator, + mode="sweep", + rate=None, + max_number=num_requests, + ) + + await _run_executor_tests( + executor, settings.num_sweep_profiles, num_requests, "sweep", None + ) + + +@pytest.mark.smoke() +@pytest.mark.asyncio() +async def test_executor_run_synchronous(mock_scheduler): + num_requests = 15 + + backend = create_autospec(Backend, instance=True) + request_generator = create_autospec(RequestGenerator, instance=True) + executor = Executor( + backend=backend, + request_generator=request_generator, + mode="synchronous", + rate=None, + max_number=num_requests, + ) + + await _run_executor_tests(executor, 1, num_requests, "synchronous", None) + + +@pytest.mark.smoke() +@pytest.mark.asyncio() +async def test_executor_run_throughput(mock_scheduler): + num_requests = 15 + + backend = create_autospec(Backend, instance=True) + request_generator = create_autospec(RequestGenerator, instance=True) + executor = Executor( + backend=backend, + request_generator=request_generator, + mode="throughput", + rate=None, + max_number=num_requests, + ) + + await _run_executor_tests(executor, 1, num_requests, "throughput", None) + + +@pytest.mark.smoke() +@pytest.mark.asyncio() +@pytest.mark.parametrize( + ("mode", "rate"), + [ + ("constant", 10), + ("constant", [10, 20, 30]), + ("poisson", 10), + ("poisson", [10, 20, 30]), + ], +) +async def test_executor_run_constant_poisson(mock_scheduler, mode, rate): + num_requests = 15 + + backend = create_autospec(Backend, instance=True) + request_generator = create_autospec(RequestGenerator, instance=True) + executor = Executor( + backend=backend, + request_generator=request_generator, + mode=mode, + rate=rate, + max_number=num_requests, + ) + + await _run_executor_tests( + executor, len(rate) if isinstance(rate, list) else 1, num_requests, mode, rate + ) diff --git a/tests/unit/executor/test_executor.py b/tests/unit/executor/test_executor.py deleted file mode 100644 index f0f3968..0000000 --- a/tests/unit/executor/test_executor.py +++ /dev/null @@ -1,86 +0,0 @@ -from unittest.mock import MagicMock, patch - -import pytest -from guidellm.backend.base import Backend -from guidellm.executor import Executor, Profile, ProfileGenerator -from guidellm.executor.profile_generator import ProfileGenerationMode -from guidellm.request.base import RequestGenerator -from guidellm.scheduler import LoadGenerationMode - - -def test_executor_creation(): - mock_request_generator = MagicMock(spec=RequestGenerator) - mock_backend = MagicMock(spec=Backend) - profile_mode = ProfileGenerationMode.SWEEP - profile_args = None - max_requests = None - max_duration = None - executor = Executor( - mock_backend, - mock_request_generator, - profile_mode, - profile_args, - max_requests, - max_duration, - ) - assert executor.request_generator == mock_request_generator - assert executor.backend == mock_backend - assert executor.max_requests == max_requests - assert executor.max_duration == max_duration - - -@pytest.fixture() -def mock_request_generator(): - return MagicMock(spec=RequestGenerator) - - -@pytest.fixture() -def mock_backend(): - return MagicMock(spec=Backend) - - -@pytest.fixture() -def mock_scheduler(): - with patch("guidellm.executor.executor.Scheduler") as MockScheduler: - yield MockScheduler - - -def test_executor_run(mock_request_generator, mock_backend, mock_scheduler): - mock_profile_generator = MagicMock(spec=ProfileGenerator) - profiles = [ - Profile(load_gen_mode=LoadGenerationMode.CONSTANT, load_gen_rate=1.0), - Profile(load_gen_mode=LoadGenerationMode.CONSTANT, load_gen_rate=2.0), - None, - ] - mock_profile_generator.next.side_effect = profiles - - with patch( - "guidellm.executor.executor.ProfileGenerator.create", - return_value=mock_profile_generator, - ): - executor = Executor( - request_generator=mock_request_generator, - backend=mock_backend, - profile_mode=ProfileGenerationMode.FIXED_RATE, - profile_args={ - "load_gen_mode": LoadGenerationMode.CONSTANT, - "rates": [1.0, 2.0], - }, - max_requests=10, - max_duration=100, - ) - - mock_benchmark = MagicMock() - mock_scheduler.return_value.run.return_value = mock_benchmark - - report = executor.run() - - assert mock_scheduler.call_count == 2 - assert len(report.benchmarks) == 2 - assert report.benchmarks[0] == mock_benchmark - assert report.benchmarks[1] == mock_benchmark - calls = mock_scheduler.call_args_list - assert calls[0][1]["load_gen_mode"] == LoadGenerationMode.CONSTANT - assert calls[0][1]["load_gen_rate"] == 1.0 - assert calls[1][1]["load_gen_mode"] == LoadGenerationMode.CONSTANT - assert calls[1][1]["load_gen_rate"] == 2.0 diff --git a/tests/unit/executor/test_fixed_rate_profile_generation_mode.py b/tests/unit/executor/test_fixed_rate_profile_generation_mode.py deleted file mode 100644 index bdb5fc5..0000000 --- a/tests/unit/executor/test_fixed_rate_profile_generation_mode.py +++ /dev/null @@ -1,49 +0,0 @@ -from typing import List, Optional - -import pytest -from guidellm.core import TextGenerationBenchmark, TextGenerationBenchmarkReport -from guidellm.executor import Executor, ProfileGenerationMode -from guidellm.scheduler import LoadGenerationMode - -from tests import dummy - - -@pytest.mark.parametrize( - "load_gen_mode", - [ - LoadGenerationMode.SYNCHRONOUS, - LoadGenerationMode.POISSON, - LoadGenerationMode.CONSTANT, - ], -) -def test_executor_single_profile_generator_benchmark_report( - mocker, - openai_backend_factory, - load_gen_mode, -): - scheduler_run_patch = mocker.patch( - "guidellm.scheduler.scheduler.Scheduler.run", - return_value=TextGenerationBenchmark(mode="test", rate=1.0), - ) - request_genrator = dummy.services.TestRequestGenerator( - tokenizer="bert-base-uncased", - ) - rates: Optional[List[float]] = [1.0] - if load_gen_mode == LoadGenerationMode.SYNCHRONOUS: - rates = None - profile_generator_kwargs = {"load_gen_mode": load_gen_mode, "rates": rates} - - executor = Executor( - backend=openai_backend_factory(), - request_generator=request_genrator, - profile_mode=ProfileGenerationMode.FIXED_RATE, - profile_args=profile_generator_kwargs, - max_requests=1, - max_duration=None, - ) - - report: TextGenerationBenchmarkReport = executor.run() - - assert scheduler_run_patch.call_count == 1 - assert len(report.benchmarks) == 1 - assert report.benchmarks[0].mode == "test" diff --git a/tests/unit/executor/test_profile_generator.py b/tests/unit/executor/test_profile_generator.py index 897b8e3..1389459 100644 --- a/tests/unit/executor/test_profile_generator.py +++ b/tests/unit/executor/test_profile_generator.py @@ -1,154 +1,194 @@ -from unittest.mock import MagicMock +from typing import get_args +from unittest.mock import create_autospec -import numpy as np import pytest -from guidellm.core import TextGenerationBenchmark, TextGenerationBenchmarkReport -from guidellm.executor import ( - FixedRateProfileGenerator, - ProfileGenerationMode, - ProfileGenerator, - SweepProfileGenerator, + +from guidellm import settings +from guidellm.core import ( + TextGenerationBenchmark, + TextGenerationBenchmarkReport, +) +from guidellm.executor import Profile, ProfileGenerationMode, ProfileGenerator + + +@pytest.mark.smoke() +def test_profile_generator_mode(): + assert set(get_args(ProfileGenerationMode)) == { + "sweep", + "synchronous", + "throughput", + "constant", + "poisson", + } + + +@pytest.mark.smoke() +def test_profile_instantiation(): + profile = Profile(load_gen_mode="constant", load_gen_rate=10) + assert profile.load_gen_mode == "constant" + assert profile.load_gen_rate == 10 + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("mode", "rate"), + [ + ("sweep", None), + ("synchronous", None), + ("throughput", None), + ("constant", 10), + ("constant", [10, 20, 30]), + ("poisson", 10), + ("poisson", [10, 20, 30]), + ], ) -from guidellm.scheduler import LoadGenerationMode - -# Fixed Rate Profile Generator - - -def test_fixed_rate_profile_generator_creation(): - rates = [1.0] - load_gen_mode = LoadGenerationMode.CONSTANT - test_profile_generator = ProfileGenerator.create( - ProfileGenerationMode.FIXED_RATE, - rates=rates, - load_gen_mode=load_gen_mode, - ) - assert isinstance(test_profile_generator, FixedRateProfileGenerator) - assert test_profile_generator._rates == rates - assert test_profile_generator._load_gen_mode == load_gen_mode - assert test_profile_generator._rate_index == 0 - - -def test_synchronous_mode_rate_list_error(): - rates = [1.0] - load_gen_mode = LoadGenerationMode.SYNCHRONOUS - with pytest.raises( - ValueError, - match="custom rates are not supported in synchronous mode", - ): - ProfileGenerator.create( - ProfileGenerationMode.FIXED_RATE, - rates=rates, - load_gen_mode=load_gen_mode, - ) - - -def test_next_with_multiple_rates(): - rates = [1.0, 2.0] - load_gen_mode = LoadGenerationMode.CONSTANT - test_profile_generator = ProfileGenerator.create( - ProfileGenerationMode.FIXED_RATE, - rates=rates, - load_gen_mode=load_gen_mode, - ) - mock_report = MagicMock(spec=TextGenerationBenchmarkReport) - for rate in rates: - current_profile = test_profile_generator.next(mock_report) - assert current_profile is not None - assert current_profile.load_gen_rate == rate - assert current_profile.load_gen_mode == LoadGenerationMode.CONSTANT - assert test_profile_generator.next(mock_report) is None - - -def test_next_with_sync_mode(): - load_gen_mode = LoadGenerationMode.SYNCHRONOUS - test_profile_generator = ProfileGenerator.create( - ProfileGenerationMode.FIXED_RATE, - load_gen_mode=load_gen_mode, - ) - mock_report = MagicMock(spec=TextGenerationBenchmarkReport) - current_profile = test_profile_generator.next(mock_report) - assert current_profile is not None - assert current_profile.load_gen_rate is None - assert current_profile.load_gen_mode == LoadGenerationMode.SYNCHRONOUS - assert test_profile_generator.next(mock_report) is None - - -# Sweep Profile Generator - - -def test_sweep_profile_generator_creation(): - test_profile_generator = ProfileGenerator.create( - ProfileGenerationMode.SWEEP, - ) - assert isinstance(test_profile_generator, SweepProfileGenerator) - assert not test_profile_generator._sync_run - assert not test_profile_generator._max_found - assert test_profile_generator._pending_rates is None - assert test_profile_generator._pending_rates is None - - -def test_first_profile_is_synchronous(): - test_profile_generator = ProfileGenerator.create(ProfileGenerationMode.SWEEP) - mock_report = MagicMock(spec=TextGenerationBenchmarkReport) - profile = test_profile_generator.next(mock_report) - assert profile is not None +def test_profile_generator_instantiation(mode, rate): + generator = ProfileGenerator(mode=mode, rate=rate) + assert generator.mode == mode + + if rate is None: + assert generator.rates is None + elif isinstance(rate, list): + assert generator.rates == rate + else: + assert generator.rates == [rate] + + if mode == "sweep": + assert len(generator) == settings.num_sweep_profiles + elif mode in ("throughput", "synchronous"): + assert len(generator) == 1 + else: + assert len(generator) == len(rate) if isinstance(rate, list) else 1 + + assert generator.generated_count == 0 + + +@pytest.mark.sanity() +@pytest.mark.parametrize( + ("mode", "rate"), + [ + # invalid modes + ("invalid_mode", None), + # rates supplied for non-applicable modes + ("sweep", 10), + ("sweep", [10, 20, 30]), + ("synchronous", 10), + ("synchronous", [10, 20, 30]), + ("throughput", 10), + ("throughput", [10, 20, 30]), + # invalid rates supplied for applicable modes + ("constant", None), + ("constant", -1), + ("constant", 0), + ("poisson", None), + ("poisson", -1), + ("poisson", 0), + ], +) +def test_profile_generator_invalid_instantiation(mode, rate): + with pytest.raises(ValueError): + ProfileGenerator(mode=mode, rate=rate) + + +@pytest.mark.sanity() +def test_profile_generator_next_sweep(): + generator = ProfileGenerator(mode="sweep") + current_report = TextGenerationBenchmarkReport() + + for index in range(settings.num_sweep_profiles): + profile: Profile = generator.next(current_report) # type: ignore + + if index == 0: + assert profile.load_gen_mode == "synchronous" + assert profile.load_gen_rate is None + mock_benchmark = create_autospec(TextGenerationBenchmark, instance=True) + mock_benchmark.completed_request_rate = 1 + current_report.add_benchmark(mock_benchmark) + elif index == 1: + assert profile.load_gen_mode == "throughput" + assert profile.load_gen_rate is None + mock_benchmark = create_autospec(TextGenerationBenchmark, instance=True) + mock_benchmark.completed_request_rate = 10 + current_report.add_benchmark(mock_benchmark) + else: + assert profile.load_gen_mode == "constant" + assert profile.load_gen_rate == index + + assert generator.generated_count == index + 1 + + for _ in range(3): + assert generator.next(current_report) is None + + +@pytest.mark.sanity() +def test_profile_generator_next_synchronous(): + generator = ProfileGenerator(mode="synchronous") + current_report = TextGenerationBenchmarkReport() + + profile: Profile = generator.next(current_report) # type: ignore + assert profile.load_gen_mode == "synchronous" assert profile.load_gen_rate is None - assert profile.load_gen_mode == LoadGenerationMode.SYNCHRONOUS - - -def test_rate_doubles(): - test_profile_generator = ProfileGenerator.create(ProfileGenerationMode.SWEEP) - mock_report = MagicMock(spec=TextGenerationBenchmarkReport) - mock_benchmark = MagicMock(spec=TextGenerationBenchmark) - mock_benchmark.overloaded = False - mock_benchmark.rate = 2.0 - mock_benchmark.request_rate = 2.0 - benchmarks = [mock_benchmark] - mock_report.benchmarks = benchmarks - test_profile_generator.next(mock_report) - - profile = test_profile_generator.next(mock_report) - assert profile is not None - assert profile.load_gen_rate == 4.0 - - -def test_max_found(): - test_profile_generator = ProfileGenerator.create(ProfileGenerationMode.SWEEP) - mock_report = MagicMock(spec=TextGenerationBenchmarkReport) - mock_benchmark = MagicMock(spec=TextGenerationBenchmark) - mock_benchmark.overloaded = False - mock_benchmark.rate = 2.0 - mock_benchmark.request_rate = 2.0 - mock_overloaded_benchmark = MagicMock(spec=TextGenerationBenchmark) - mock_overloaded_benchmark.overloaded = True - mock_overloaded_benchmark.rate = 4.0 - mock_overloaded_benchmark.request_rate = 4.0 - benchmarks = [mock_benchmark, mock_overloaded_benchmark] - mock_report.benchmarks = benchmarks - - test_profile_generator.next(mock_report) - profile = test_profile_generator.next(mock_report) - assert profile is not None - - # if benchmark wasn't overloaded, rates would have doubled to 8 - assert profile.load_gen_rate == 2.0 - - -def test_pending_rates(): - test_profile_generator = ProfileGenerator.create(ProfileGenerationMode.SWEEP) - mock_report = MagicMock(spec=TextGenerationBenchmarkReport) - mock_benchmark = MagicMock(spec=TextGenerationBenchmark) - mock_benchmark.overloaded = False - mock_benchmark.rate = 2.0 - mock_benchmark.request_rate = 2.0 - mock_overloaded_benchmark = MagicMock(spec=TextGenerationBenchmark) - mock_overloaded_benchmark.overloaded = True - mock_overloaded_benchmark.rate = 8.0 - mock_overloaded_benchmark.request_rate = 8.0 - benchmarks = [mock_benchmark, mock_overloaded_benchmark] - mock_report.benchmarks = benchmarks - profile = test_profile_generator.next(mock_report) - for expected_rate in np.linspace(2.0, 8.0, 10): - profile = test_profile_generator.next(mock_report) - assert profile is not None - assert profile.load_gen_rate == expected_rate + assert generator.generated_count == 1 + + for _ in range(3): + assert generator.next(current_report) is None + + +@pytest.mark.sanity() +def test_profile_generator_next_throughput(): + generator = ProfileGenerator(mode="throughput") + current_report = TextGenerationBenchmarkReport() + + profile: Profile = generator.next(current_report) # type: ignore + assert profile.load_gen_mode == "throughput" + assert profile.load_gen_rate is None + assert generator.generated_count == 1 + + for _ in range(3): + assert generator.next(current_report) is None + + +@pytest.mark.sanity() +@pytest.mark.parametrize( + "rate", + [ + 10, + [10, 20, 30], + ], +) +def test_profile_generator_next_constant(rate): + generator = ProfileGenerator(mode="constant", rate=rate) + test_rates = rate if isinstance(rate, list) else [rate] + current_report = TextGenerationBenchmarkReport() + + for index, test_rate in enumerate(test_rates): + profile: Profile = generator.next(current_report) # type: ignore + assert profile.load_gen_mode == "constant" + assert profile.load_gen_rate == test_rate + assert generator.generated_count == index + 1 + + for _ in range(3): + assert generator.next(current_report) is None + + +@pytest.mark.sanity() +@pytest.mark.parametrize( + "rate", + [ + 10, + [10, 20, 30], + ], +) +def test_profile_generator_next_poisson(rate): + generator = ProfileGenerator(mode="poisson", rate=rate) + test_rates = rate if isinstance(rate, list) else [rate] + current_report = TextGenerationBenchmarkReport() + + for index, test_rate in enumerate(test_rates): + profile: Profile = generator.next(current_report) # type: ignore + assert profile.load_gen_mode == "poisson" + assert profile.load_gen_rate == test_rate + assert generator.generated_count == index + 1 + + for _ in range(3): + assert generator.next(current_report) is None diff --git a/tests/unit/executor/test_sweep_profile_generation_mode.py b/tests/unit/executor/test_sweep_profile_generation_mode.py deleted file mode 100644 index e3b9b67..0000000 --- a/tests/unit/executor/test_sweep_profile_generation_mode.py +++ /dev/null @@ -1,45 +0,0 @@ -import pytest -from guidellm.core import TextGenerationBenchmark, TextGenerationBenchmarkReport -from guidellm.executor import Executor, ProfileGenerationMode -from guidellm.scheduler import LoadGenerationMode - -from tests import dummy - - -@pytest.mark.skip("SweepProfileGenerator never break.") -@pytest.mark.parametrize( - "load_gen_mode", - [ - LoadGenerationMode.SYNCHRONOUS, - LoadGenerationMode.POISSON, - LoadGenerationMode.CONSTANT, - ], -) -def test_executor_sweep_profile_generator_benchmark_report( - mocker, - openai_backend_factory, - load_gen_mode, -): - scheduler_run_patch = mocker.patch( - "guidellm.scheduler.scheduler.Scheduler.run", - return_value=TextGenerationBenchmark(mode="test", rate=1.0), - ) - request_genrator = dummy.services.TestRequestGenerator( - tokenizer="bert-base-uncased", - ) - profile_generator_kwargs = {"rate_type": load_gen_mode, "rate": 1.0} - - executor = Executor( - backend=openai_backend_factory(), - request_generator=request_genrator, - profile_mode=ProfileGenerationMode.SWEEP, - profile_args=profile_generator_kwargs, - max_requests=1, - max_duration=None, - ) - - report: TextGenerationBenchmarkReport = executor.run() - - assert scheduler_run_patch.call_count == 1 - assert len(report.benchmarks) == 1 - assert report.benchmarks[0].mode == "test" diff --git a/tests/unit/request/test_base.py b/tests/unit/request/test_base.py index 8c71d02..8b75be1 100644 --- a/tests/unit/request/test_base.py +++ b/tests/unit/request/test_base.py @@ -1,30 +1,31 @@ -from unittest.mock import Mock, patch +import re +import time +from typing import List +from unittest.mock import MagicMock, Mock, patch import pytest -from guidellm.core import TextGenerationRequest +from guidellm.core import TextGenerationRequest from tests.dummy.services import TestRequestGenerator @pytest.mark.smoke() -def test_request_generator_sync_constructor(): +def test_request_generator_sync_constructor(mock_auto_tokenizer): generator = TestRequestGenerator(mode="sync") assert generator.mode == "sync" assert generator.async_queue_size == 50 # Default value - assert generator.tokenizer is None @pytest.mark.smoke() -def test_request_generator_async_constructor(): +def test_request_generator_async_constructor(mock_auto_tokenizer): generator = TestRequestGenerator(mode="async", async_queue_size=10) assert generator.mode == "async" assert generator.async_queue_size == 10 - assert generator.tokenizer is None generator.stop() @pytest.mark.smoke() -def test_request_generator_sync_iter(): +def test_request_generator_sync_iter(mock_auto_tokenizer): generator = TestRequestGenerator(mode="sync") items = [] for item in generator: @@ -37,7 +38,7 @@ def test_request_generator_sync_iter(): @pytest.mark.smoke() -def test_request_generator_async_iter(): +def test_request_generator_async_iter(mock_auto_tokenizer): generator = TestRequestGenerator(mode="async") items = [] for item in generator: @@ -50,31 +51,8 @@ def test_request_generator_async_iter(): assert items[0].prompt == "Test prompt" -@pytest.mark.regression() -def test_request_generator_with_mock_tokenizer(): - mock_tokenizer = Mock() - generator = TestRequestGenerator(tokenizer=mock_tokenizer) - assert generator.tokenizer == mock_tokenizer - - with patch( - "guidellm.request.base.AutoTokenizer", - ) as MockAutoTokenizer: # noqa: N806 - MockAutoTokenizer.from_pretrained.return_value = mock_tokenizer - generator = TestRequestGenerator(tokenizer="mock-tokenizer") - assert generator.tokenizer == mock_tokenizer - MockAutoTokenizer.from_pretrained.assert_called_with("mock-tokenizer") - - -@pytest.mark.regression() -def test_request_generator_repr(): - generator = TestRequestGenerator(mode="sync", async_queue_size=100) - assert repr(generator) == ( - "RequestGenerator(mode=sync, async_queue_size=100, tokenizer=None)" - ) - - -@pytest.mark.regression() -def test_request_generator_iter_calls_create_item(): +@pytest.mark.smoke() +def test_request_generator_iter_calls_create_item(mock_auto_tokenizer): generator = TestRequestGenerator(mode="sync") generator.create_item = Mock( # type: ignore return_value=TextGenerationRequest(prompt="Mock prompt"), @@ -90,8 +68,8 @@ def test_request_generator_iter_calls_create_item(): generator.create_item.assert_called() -@pytest.mark.regression() -def test_request_generator_async_iter_calls_create_item(): +@pytest.mark.smoke() +def test_request_generator_async_iter_calls_create_item(mock_auto_tokenizer): generator = TestRequestGenerator(mode="sync") generator.create_item = Mock( # type: ignore return_value=TextGenerationRequest(prompt="Mock prompt"), @@ -106,3 +84,69 @@ def test_request_generator_async_iter_calls_create_item(): generator.stop() assert len(items) == 5 generator.create_item.assert_called() + + +@pytest.mark.sanity() +def test_request_generator_repr(mock_auto_tokenizer): + generator = TestRequestGenerator(mode="sync", async_queue_size=100) + repr_str = repr(generator) + assert repr_str.startswith("RequestGenerator(") + assert "mode=sync" in repr_str + assert "async_queue_size=100" in repr_str + assert "tokenizer= List[int]: + tokens = re.findall(r"\w+|[^\w\s]", text) + return [0] * len(tokens) + + mock_tokenizer = MagicMock() + mock_tokenizer.tokenize = MagicMock(side_effect=_fake_tokenize) + + generator = TestRequestGenerator(tokenizer=mock_tokenizer) + assert generator.tokenizer == mock_tokenizer + + with patch( + "guidellm.request.base.AutoTokenizer", + ) as MockAutoTokenizer: # noqa: N806 + MockAutoTokenizer.from_pretrained.return_value = mock_tokenizer + generator = TestRequestGenerator(tokenizer="mock-tokenizer") + assert generator.tokenizer == mock_tokenizer + MockAutoTokenizer.from_pretrained.assert_called_with("mock-tokenizer") + + +@pytest.mark.regression() +def test_request_generator_populate_queue(mock_auto_tokenizer): + generator = TestRequestGenerator(mode="async", async_queue_size=2) + generator.create_item = Mock( # type: ignore + return_value=TextGenerationRequest(prompt="Mock prompt") + ) + + time.sleep(0.2) # Allow some time for the queue to populate + generator.stop() + assert generator._queue.qsize() > 0 + + +@pytest.mark.regression() +def test_request_generator_async_stop_during_population(mock_auto_tokenizer): + generator = TestRequestGenerator(mode="async", async_queue_size=2) + generator.create_item = Mock( # type: ignore + return_value=TextGenerationRequest(prompt="Mock prompt") + ) + + time.sleep(0.1) # Allow some time for the queue to start populating + generator.stop() + + # Ensure the stop event is set and thread is no longer alive + assert generator._stop_event.is_set() + assert not generator._thread.is_alive() diff --git a/tests/unit/request/test_emulated.py b/tests/unit/request/test_emulated.py new file mode 100644 index 0000000..699b1d6 --- /dev/null +++ b/tests/unit/request/test_emulated.py @@ -0,0 +1,365 @@ +import json +import tempfile +from pathlib import Path +from typing import Tuple, Union + +import numpy as np +import pytest +from transformers import PreTrainedTokenizer # type: ignore + +from guidellm.core.request import TextGenerationRequest +from guidellm.request.emulated import ( + EmulatedConfig, + EmulatedRequestGenerator, + EndlessTokens, +) + + +@pytest.mark.smoke() +def test_emulated_config_construction(): + config = EmulatedConfig( + prompt_tokens=10, + prompt_tokens_variance=2, + prompt_tokens_min=5, + prompt_tokens_max=15, + generated_tokens=20, + generated_tokens_variance=4, + generated_tokens_min=10, + generated_tokens_max=30, + ) + assert config.prompt_tokens == 10 + assert config.prompt_tokens_variance == 2 + assert config.prompt_tokens_min == 5 + assert config.prompt_tokens_max == 15 + assert config.generated_tokens == 20 + assert config.generated_tokens_variance == 4 + assert config.generated_tokens_min == 10 + assert config.generated_tokens_max == 30 + + +@pytest.mark.smoke() +def test_emulated_config_create_dict(): + config_dict = { + "prompt_tokens": 10, + "prompt_tokens_variance": 2, + "prompt_tokens_min": 5, + "prompt_tokens_max": 15, + "generated_tokens": 20, + "generated_tokens_variance": 4, + "generated_tokens_min": 10, + "generated_tokens_max": 30, + } + config = EmulatedConfig.create_config(config_dict) + assert config.prompt_tokens == 10 + assert config.prompt_tokens_variance == 2 + assert config.prompt_tokens_min == 5 + assert config.prompt_tokens_max == 15 + assert config.generated_tokens == 20 + assert config.generated_tokens_variance == 4 + assert config.generated_tokens_min == 10 + assert config.generated_tokens_max == 30 + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("base", "variance", "min_tokens", "max_tokens", "expected_range"), + [ + (10, 2, None, None, (1, 10 + 5 * 2)), + (10, 2, 5, 15, (5, 15)), + (10, None, 5, 15, (5, 15)), + (10, 2, 1, None, (1, 10 + 5 * 2)), + ], +) +def test_emulated_config_token_range( + base: int, + variance: int, + min_tokens: int, + max_tokens: int, + expected_range: Tuple[int, int], +): + assert ( + EmulatedConfig._token_range(base, variance, min_tokens, max_tokens) + == expected_range + ) + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("base", "variance", "min_tokens", "max_tokens", "expected_range"), + [ + (10, None, None, None, (10, 10)), + (10, 5, None, None, (1, 10 + 5 * 2)), + (10, 5, 5, 15, (5, 15)), + (10, None, 5, 15, (5, 15)), + (10, 5, 2, None, (2, 10 + 5 * 2)), + (10, 5, None, 20, (1, 20)), + ], +) +def test_emulated_config_sample_tokens( + base: int, + variance: int, + min_tokens: int, + max_tokens: int, + expected_range: Tuple[int, int], +): + rng = np.random.default_rng() + + for _ in range(100): + token_count = EmulatedConfig._sample_tokens( + base, variance, min_tokens, max_tokens, rng + ) + assert token_count >= expected_range[0] + assert token_count <= expected_range[1] + + +@pytest.mark.sanity() +def test_emulated_config_create(): + test_dict = { + "prompt_tokens": 10, + "prompt_tokens_variance": 2, + "prompt_tokens_min": 5, + "prompt_tokens_max": 15, + "generated_tokens": 20, + "generated_tokens_variance": 4, + "generated_tokens_min": 10, + "generated_tokens_max": 30, + } + compare_config = EmulatedConfig(**test_dict) + + # test dict + test_config = EmulatedConfig.create_config(test_dict) + assert ( + test_config == compare_config + ), f"Dictionary creation failed: {test_config} != {compare_config}" + + # test json str + test_config = EmulatedConfig.create_config(json.dumps(test_dict)) + assert ( + test_config == compare_config + ), f"JSON string creation failed: {test_config} != {compare_config}" + + # test json file str path + with tempfile.TemporaryDirectory() as temp_dir: + test_path = Path(temp_dir) / "test.json" + test_path.write_text(json.dumps(test_dict)) + test_config = EmulatedConfig.create_config(str(test_path)) + assert ( + test_config == compare_config + ), f"JSON file path creation failed: {test_config} != {compare_config}" + + # test json file Path object + with tempfile.TemporaryDirectory() as temp_dir: + test_path = Path(temp_dir) / "test.json" + test_path.write_text(json.dumps(test_dict)) + test_config = EmulatedConfig.create_config(test_path) + assert ( + test_config == compare_config + ), f"JSON file Path object creation failed: {test_config} != {compare_config}" + + # test key value string + test_str = ( + f"prompt_tokens={test_dict['prompt_tokens']}, " + f"prompt_tokens_variance={test_dict['prompt_tokens_variance']}, " + f"prompt_tokens_min={test_dict['prompt_tokens_min']}, " + f"prompt_tokens_max={test_dict['prompt_tokens_max']}, " + f"generated_tokens={test_dict['generated_tokens']}, " + f"generated_tokens_variance={test_dict['generated_tokens_variance']}, " + f"generated_tokens_min={test_dict['generated_tokens_min']}, " + f"generated_tokens_max={test_dict['generated_tokens_max']}" + ) + test_config = EmulatedConfig.create_config(test_str) + assert ( + test_config == compare_config + ), f"Key value string creation failed: {test_config} != {compare_config}" + + +# EndlessTokens + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("data", "expected_words", "expected_indices"), + [ + ( + "word1 word2 word3\nword4 word5", + ["word1", "word2", "word3", "word4", "word5"], + [0, 3], + ), + ( + "word1 word2\n word3 word4\n word5", + ["word1", "word2", "word3", "word4", "word5"], + [0, 2, 4], + ), + ], +) +def test_endless_data_words_construction(data, expected_words, expected_indices): + tokens = EndlessTokens(data) + assert tokens == expected_words + assert tokens.line_indices == expected_indices + + +@pytest.mark.smoke() +def test_endless_data_words_create_from_basic_file(): + with tempfile.TemporaryDirectory() as temp_dir: + file_path = Path(temp_dir) / "test.txt" + file_path.write_text("word1 word2 word3\nword4 word5") + + tokens = EndlessTokens(file_path) + assert tokens == ["word1", "word2", "word3", "word4", "word5"] + assert tokens.line_indices == [0, 3] + + tokens = EndlessTokens(str(file_path)) + assert tokens == ["word1", "word2", "word3", "word4", "word5"] + assert tokens.line_indices == [0, 3] + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("data", "start", "length", "expected_text"), + [ + ("word1 word2 word3 word4", 0, 2, "word1 word2"), + ("word1 word2\nword3 word4", 1, 2, "word2\nword3"), + ( + "word1 word2\nword3 word4", + 1, + 6, + "word2\nword3 word4 word1 word2\nword3", + ), + ], +) +def test_endless_data_words_create_text(data, start, length, expected_text): + words = EndlessTokens(data) + text = words.create_text(start, length) + assert text == expected_text + + +# EmulatedRequestGenerator + + +@pytest.mark.smoke() +def test_emulated_request_generator_construction(mocker): + mocker.patch( + "guidellm.request.emulated.EmulatedConfig.create_config", + return_value=EmulatedConfig(prompt_tokens=10), + ) + mocker.patch( + "guidellm.request.emulated.EndlessTokens", + return_value=EndlessTokens("word1 word2"), + ) + generator = EmulatedRequestGenerator(config="mock_config", mode="sync") + assert isinstance(generator._config, EmulatedConfig) + assert isinstance(generator._tokens, EndlessTokens) + + +@pytest.mark.smoke() +def test_emulated_request_generator_create_item(mocker): + mocker.patch( + "guidellm.request.emulated.EndlessTokens", + return_value=EndlessTokens("word1 word2"), + ) + mock_tokenizer = mocker.Mock(PreTrainedTokenizer) + mock_tokenizer.tokenize.return_value = ["word1", "word2"] + generator = EmulatedRequestGenerator( + config={ + "prompt_tokens": 10, + }, + tokenizer=mock_tokenizer, + mode="sync", + ) + item = generator.create_item() + assert isinstance(item, TextGenerationRequest) + + +@pytest.mark.smoke() +def test_emulated_request_generator_sample_prompt(mocker, mock_auto_tokenizer): + mocker.patch( + "guidellm.request.emulated.EndlessTokens", + return_value=EndlessTokens("word1 word2"), + ) + generator = EmulatedRequestGenerator(config={"prompt_tokens": 3}, mode="sync") + prompt = generator.sample_prompt(3) + assert prompt == "word1 word2 word1" + + request = generator.create_item() + assert request.prompt_token_count == 3 + + +@pytest.mark.smoke() +def test_emulated_request_generator_random_seed(mocker): + mocker.patch( + "guidellm.request.emulated.EndlessTokens", + return_value=EndlessTokens("word1 word2"), + ) + + rand_gen = EmulatedRequestGenerator( + config={"prompt_tokens": 20, "prompt_tokens_variance": 10}, + random_seed=42, + mode="sync", + ) + rand_gen_comp_pos = EmulatedRequestGenerator( + config={"prompt_tokens": 20, "prompt_tokens_variance": 10}, + random_seed=42, + mode="sync", + ) + rand_gen_comp_neg = EmulatedRequestGenerator( + config={"prompt_tokens": 20, "prompt_tokens_variance": 10}, + random_seed=43, + mode="sync", + ) + + assert rand_gen.create_item().prompt == rand_gen_comp_pos.create_item().prompt + assert rand_gen.create_item().prompt != rand_gen_comp_neg.create_item().prompt + + +@pytest.mark.regression() +@pytest.mark.parametrize( + ("config_type", "config"), + [ + ("dict", {"prompt_tokens": 10, "generated_tokens": 20}), + ("dict", {"prompt_tokens": 10, "prompt_tokens_variance": 2}), + ( + "dict", + { + "prompt_tokens": 10, + "prompt_tokens_min": 5, + "prompt_tokens_max": 15, + "generated_tokens": 20, + }, + ), + ("json_str", json.dumps({"prompt_tokens": 10, "generated_tokens": 20})), + ("key_value_str", "prompt_tokens=10, generated_tokens=20"), + ("file_str", json.dumps({"prompt_tokens": 10, "generated_tokens": 20})), + ("file_path", json.dumps({"prompt_tokens": 10, "generated_tokens": 20})), + ], +) +def test_emulated_request_generator_lifecycle( + mock_requests_pride_and_prejudice, + mock_auto_tokenizer, + config_type: str, + config: Union[str, dict, Path], +): + if config_type in ["dict", "json_str", "key_value_str"]: + generator = EmulatedRequestGenerator(config) + elif config_type in ["file_str", "file_path"]: + with tempfile.TemporaryDirectory() as temp_dir: + file_path = Path(temp_dir) / "test.json" + file_path.write_text(config) # type: ignore + generator = EmulatedRequestGenerator( + str(file_path) if config_type == "file_str" else file_path + ) + + for _ in range(5): + request = generator.create_item() + prompt_range = generator._config.prompt_tokens_range + outputs_range = generator._config.output_tokens_range + + assert request.prompt_token_count >= prompt_range[0] # type: ignore + assert request.prompt_token_count <= prompt_range[1] # type: ignore + + prompt_tokens = len(generator.tokenizer.tokenize(request.prompt)) + assert request.prompt_token_count == prompt_tokens + + if generator._config.generated_tokens: + assert len(outputs_range) == 2 + assert request.output_token_count >= outputs_range[0] # type: ignore + assert request.output_token_count <= outputs_range[1] # type: ignore diff --git a/tests/unit/request/test_file.py b/tests/unit/request/test_file.py new file mode 100644 index 0000000..b214e41 --- /dev/null +++ b/tests/unit/request/test_file.py @@ -0,0 +1,98 @@ +import tempfile +from pathlib import Path + +import pytest + +from guidellm.core.request import TextGenerationRequest +from guidellm.request.file import FileRequestGenerator + + +@pytest.mark.smoke() +def test_file_request_generator_constructor(mock_auto_tokenizer): + with tempfile.TemporaryDirectory() as temp_dir: + file_path = Path(temp_dir) / "example.txt" + file_path.write_text("This is a test.\nThis is another test.") + generator = FileRequestGenerator(file_path) + assert generator._path == file_path + assert generator._data == ["This is a test.", "This is another test."] + assert generator._iterator is not None + + +@pytest.mark.smoke() +def test_file_request_generator_create_item(mock_auto_tokenizer): + with tempfile.TemporaryDirectory() as temp_dir: + file_path = Path(temp_dir) / "example.txt" + file_path.write_text("This is a test.\nThis is another test.") + generator = FileRequestGenerator(file_path, mode="sync") + request = generator.create_item() + assert isinstance(request, TextGenerationRequest) + assert request.prompt == "This is a test." + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("file_extension", "file_content"), + [ + ("txt", "Test content 1.\nTest content 2.\nTest content 3.\n"), + ( + "csv", + "text,label,extra\n" + "Test content 1.,1,extra 1\n" + "Test content 2.,2,extra 2\n" + "Test content 3.,3,extra 3\n", + ), + ( + "jsonl", + '{"text": "Test content 1."}\n' + '{"text": "Test content 2."}\n' + '{"text": "Test content 3."}\n', + ), + ( + "csv", + "prompt,text,extra\n" + "Test content 1., text 1, extra 1\n" + "Test content 2., text 2, extra 2\n" + "Test content 3., text 3, extra 3\n", + ), + ( + "json", + '[{"text": "Test content 1."}, ' + '{"text": "Test content 2."}, ' + '{"text": "Test content 3."}]\n', + ), + ( + "json", + '{"object_1": {"text": "Test content 1."}, ' + '"object_2": {"text": "Test content 2."}, ' + '"object_3": {"text": "Test content 3."}}\n', + ), + ( + "yaml", + "items:\n" + " - text: Test content 1.\n" + " - text: Test content 2.\n" + " - text: Test content 3.\n", + ), + ( + "yaml", + "object_1:\n text: Test content 1.\n" + "object_2:\n text: Test content 2.\n" + "object_3:\n text: Test content 3.\n", + ), + ], +) +def test_file_request_generator_file_types_lifecycle( + mock_auto_tokenizer, file_extension, file_content +): + with tempfile.TemporaryDirectory() as temp_dir: + file_path = Path(temp_dir) / f"example.{file_extension}" + file_path.write_text(file_content) + generator = FileRequestGenerator(file_path) + + for index, request in enumerate(generator): + assert isinstance(request, TextGenerationRequest) + assert request.prompt == f"Test content {index + 1}." + assert request.prompt_token_count == 3 + + if index == 2: + break diff --git a/tests/unit/request/test_transformers.py b/tests/unit/request/test_transformers.py new file mode 100644 index 0000000..fcf933b --- /dev/null +++ b/tests/unit/request/test_transformers.py @@ -0,0 +1,95 @@ +from unittest.mock import patch + +import pytest + +from guidellm.core.request import TextGenerationRequest +from guidellm.request.transformers import TransformersDatasetRequestGenerator +from tests.dummy.data.transformers import ( + create_sample_dataset, + create_sample_dataset_dict, + create_sample_iterable_dataset, + create_sample_iterable_dataset_dict, +) + + +@pytest.mark.smoke() +def test_transformers_dataset_request_generator_constructor( + mock_auto_tokenizer, +): + dataset = create_sample_dataset() + with patch( + "guidellm.request.transformers.load_transformers_dataset", + return_value=dataset, + ), patch( + "guidellm.request.transformers.resolve_transformers_dataset_column", + return_value="text", + ): + generator = TransformersDatasetRequestGenerator( + dataset="dummy_dataset", + split="train", + column="text", + ) + assert generator._dataset == "dummy_dataset" + assert generator._split == "train" + assert generator._column == "text" + assert generator._hf_dataset == dataset + assert generator._hf_column == "text" + assert generator._hf_dataset_iterator is not None + + +@pytest.mark.smoke() +def test_transformers_dataset_request_generator_create_item( + mock_auto_tokenizer, +): + generator = TransformersDatasetRequestGenerator( + dataset=create_sample_dataset_dict(), + split="train", + column="text", + mode="sync", + ) + request = generator.create_item() + assert isinstance(request, TextGenerationRequest) + assert request.prompt == "sample text 1" + assert request.prompt_token_count == 3 + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("dataset_arg", "dataset"), + [ + ( + "mock/directory/file.csv", + create_sample_dataset_dict(splits=["train"]), + ), + ( + "mock/directory/file.json", + create_sample_dataset(column="prompt"), + ), + ( + "mock/directory/file.py", + create_sample_dataset_dict(splits=["test"], column="output"), + ), + (create_sample_dataset_dict(splits=["val", "train"], column="custom"), None), + (create_sample_dataset(), None), + (create_sample_iterable_dataset_dict(splits=["validation"]), None), + (create_sample_iterable_dataset(), None), + ], +) +def test_transformers_dataset_request_generator_lifecycle( + mock_auto_tokenizer, dataset_arg, dataset +): + with patch( + "guidellm.utils.transformers.load_dataset", + return_value=dataset, + ): + generator = TransformersDatasetRequestGenerator( + dataset=dataset_arg, mode="sync" + ) + + for index, request in enumerate(generator): + assert isinstance(request, TextGenerationRequest) + assert request.prompt == f"sample text {index + 1}" + assert request.prompt_token_count == 3 + + if index == 2: + break diff --git a/tests/unit/scheduler/conftest.py b/tests/unit/scheduler/conftest.py deleted file mode 100644 index c79d27c..0000000 --- a/tests/unit/scheduler/conftest.py +++ /dev/null @@ -1,15 +0,0 @@ -import pytest -from guidellm.core import TextGenerationRequest, TextGenerationResult - - -@pytest.fixture(autouse=True) -def backend_submit_patch(mocker): - patch = mocker.patch( - "guidellm.backend.base.Backend.submit", - return_value=TextGenerationResult( - request=TextGenerationRequest(prompt="Test prompt"), - ), - ) - patch.__name__ = "Backend.submit fallbackBackend.submit fallback" - - return patch diff --git a/tests/unit/scheduler/test_base.py b/tests/unit/scheduler/test_base.py new file mode 100644 index 0000000..093b57a --- /dev/null +++ b/tests/unit/scheduler/test_base.py @@ -0,0 +1,249 @@ +import asyncio +import time +from unittest.mock import AsyncMock, create_autospec + +import pytest + +from guidellm.backend import Backend +from guidellm.core import ( + TextGenerationBenchmark, + TextGenerationError, + TextGenerationRequest, + TextGenerationResult, +) +from guidellm.request import RequestGenerator +from guidellm.scheduler import ( + LoadGenerator, + Scheduler, + SchedulerResult, +) + + +@pytest.mark.smoke() +def test_scheduler_result(): + benchmark = create_autospec(TextGenerationBenchmark, instance=True) + result = TextGenerationResult( + request=TextGenerationRequest(prompt="prompt"), output="Test output" + ) + scheduler_result = SchedulerResult( + completed=True, + count_total=10, + count_completed=5, + benchmark=benchmark, + current_result=result, + ) + + assert scheduler_result.completed is True + assert scheduler_result.count_total == 10 + assert scheduler_result.count_completed == 5 + assert scheduler_result.benchmark == benchmark + assert scheduler_result.current_result == result + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("mode", "rate", "max_number", "max_duration"), + [ + ("synchronous", None, 10, None), + ("throughput", 5.0, None, 60.0), + ("poisson", 10.0, 100, None), + ("constant", 1.0, None, 120.0), + ], +) +def test_scheduler_instantiation(mode, rate, max_number, max_duration): + generator = create_autospec(RequestGenerator, instance=True) + worker = create_autospec(Backend, instance=True) + scheduler = Scheduler( + generator, + worker, + mode=mode, + rate=rate, + max_number=max_number, + max_duration=max_duration, + ) + + assert scheduler.generator == generator + assert scheduler.worker == worker + assert scheduler.mode == mode + assert scheduler.rate == rate + assert scheduler.max_number == max_number + assert scheduler.max_duration == max_duration + assert isinstance(scheduler.load_generator, LoadGenerator) + assert scheduler.benchmark_mode in {"synchronous", "asynchronous", "throughput"} + + +@pytest.mark.sanity() +@pytest.mark.parametrize( + ("mode", "rate", "max_number", "max_duration"), + [ + # invalid modes + ("invalid_mode", None, 10, None), + # invalid max settings + ("synchronous", None, None, None), + ("synchronous", None, -1, 10), + ("synchronous", None, 10, -1), + # invalid rate settings + ("constant", -1, None, 10), + ("constant", None, None, 10), + ("poisson", -1, None, 10), + ("poisson", None, None, 10), + ], +) +def test_scheduler_invalid_instantiation( + mode, + rate, + max_number, + max_duration, +): + generator = create_autospec(RequestGenerator, instance=True) + worker = create_autospec(Backend, instance=True) + + with pytest.raises(ValueError): + Scheduler( + generator, + worker, + mode=mode, + rate=rate, + max_number=max_number, + max_duration=max_duration, + ) + + +@pytest.mark.sanity() +@pytest.mark.asyncio() +@pytest.mark.parametrize( + "mode", + [ + "synchronous", + "throughput", + "poisson", + "constant", + ], +) +async def test_scheduler_run_number(mode): + rate = 10.0 + max_number = 20 + generator = create_autospec(RequestGenerator, instance=True) + worker = create_autospec(Backend, instance=True) + + # Mock the request generator and backend submit behavior + generator.__iter__.return_value = iter( + [TextGenerationRequest(prompt="Test")] * (max_number * 2) + ) + worker.submit = AsyncMock() + + def _submit(req): + res = TextGenerationResult(request=req, output="Output") + res.start(prompt=req.prompt) + res.output_token("token") + res.end() + return res + + worker.submit.side_effect = _submit + + scheduler = Scheduler( + generator, + worker, + mode=mode, + rate=rate, + max_number=max_number, + ) + + count_completed = 0 + received_init = False + received_final = False + async for result in scheduler.run(): + assert result.count_total == max_number + assert result.benchmark is not None + assert isinstance(result.benchmark, TextGenerationBenchmark) + + if result.current_result: + assert isinstance(result.current_result, TextGenerationResult) + count_completed += 1 + assert result.count_completed == count_completed + assert not result.completed + elif not received_init: + assert count_completed == 0 + assert result.count_completed == 0 + assert not result.completed + received_init = True + elif not received_final: + assert count_completed == max_number + assert result.count_completed == max_number + assert result.completed + received_final = True + + assert count_completed == max_number + + +@pytest.mark.sanity() +@pytest.mark.asyncio() +@pytest.mark.parametrize( + "mode", + [ + "synchronous", + "throughput", + "poisson", + "constant", + ], +) +@pytest.mark.flaky(reruns=5) +async def test_scheduler_run_duration(mode): + rate = 10 + max_duration = 2 + generator = create_autospec(RequestGenerator, instance=True) + worker = create_autospec(Backend, instance=True) + + # Mock the request generator and backend submit behavior + generator.__iter__.return_value = iter( + [TextGenerationRequest(prompt="Test")] * (rate * max_duration * 100) + ) + worker.submit = AsyncMock() + + async def _submit(req): + await asyncio.sleep(0.1) + res = TextGenerationResult(request=req, output="Output") + res.start(prompt=req.prompt) + res.output_token("token") + res.end() + return res + + worker.submit.side_effect = _submit + + scheduler = Scheduler( + generator, + worker, + mode=mode, + rate=rate, + max_duration=max_duration, + ) + + count_completed = 0 + received_init = False + received_final = False + start_time = time.time() + async for result in scheduler.run(): + assert result.count_total == max_duration + assert result.benchmark is not None + assert isinstance(result.benchmark, TextGenerationBenchmark) + + if result.current_result: + assert isinstance( + result.current_result, (TextGenerationResult, TextGenerationError) + ) + count_completed += 1 + assert result.count_completed == round(time.time() - start_time) + assert not result.completed + elif not received_init: + assert count_completed == 0 + assert result.count_completed == 0 + assert not result.completed + received_init = True + elif not received_final: + assert result.count_completed == max_duration + assert result.completed + received_final = True + + end_time = time.time() + assert pytest.approx(end_time - start_time, abs=0.1) == max_duration + assert pytest.approx(count_completed, abs=5) == max_duration * rate diff --git a/tests/unit/scheduler/test_basics.py b/tests/unit/scheduler/test_basics.py deleted file mode 100644 index f0ca04a..0000000 --- a/tests/unit/scheduler/test_basics.py +++ /dev/null @@ -1,50 +0,0 @@ -from typing import Callable - -import pytest -from guidellm.backend import OpenAIBackend -from guidellm.scheduler import LoadGenerationMode, Scheduler - -from tests import dummy - - -@pytest.mark.parametrize( - ("load_gen_mode", "max_requests", "max_duration", "load_gen_rate"), - [ - # Sync load generation mode payload - (LoadGenerationMode.SYNCHRONOUS, None, None, None), - (LoadGenerationMode.SYNCHRONOUS, 1, -1, 1.0), - (LoadGenerationMode.SYNCHRONOUS, -1, 1, 1.0), - (LoadGenerationMode.SYNCHRONOUS, None, -1, 1.0), - # Constant load generation mode payload - (LoadGenerationMode.CONSTANT, None, None, 1.0), - (LoadGenerationMode.CONSTANT, -1, 1, 1.0), - (LoadGenerationMode.CONSTANT, 1, 1, None), - (LoadGenerationMode.CONSTANT, 1, 0, None), - (LoadGenerationMode.CONSTANT, 0, 0, None), - # Poisson load generation mode payload - (LoadGenerationMode.POISSON, None, None, 1.0), - (LoadGenerationMode.POISSON, -1, 1, 1.0), - (LoadGenerationMode.POISSON, 1, 1, None), - (LoadGenerationMode.POISSON, 1, 0, None), - (LoadGenerationMode.POISSON, 0, 0, None), - ], -) -def test_scheduler_invalid_parameters( - openai_backend_factory: Callable[..., OpenAIBackend], - load_gen_mode, - max_requests, - max_duration, - load_gen_rate, -): - """ - Test scheduler initializer parameters validation. - """ - with pytest.raises(ValueError): - Scheduler( - request_generator=dummy.services.TestRequestGenerator(), - backend=openai_backend_factory(), - load_gen_mode=load_gen_mode, - load_gen_rate=load_gen_rate, - max_requests=max_requests, - max_duration=max_duration, - ) diff --git a/tests/unit/scheduler/test_constant_mode.py b/tests/unit/scheduler/test_constant_mode.py deleted file mode 100644 index 5910329..0000000 --- a/tests/unit/scheduler/test_constant_mode.py +++ /dev/null @@ -1,60 +0,0 @@ -import time -from typing import Callable - -import pytest -from guidellm.backend import OpenAIBackend -from guidellm.core import TextGenerationBenchmark -from guidellm.scheduler import LoadGenerationMode, Scheduler - -from tests import dummy - - -@pytest.mark.sanity() -@pytest.mark.parametrize("max_requests", [1, 2, 3]) -def test_scheduler_max_requests_limitation( - openai_backend_factory: Callable[..., OpenAIBackend], - max_requests: int, -): - request_genrator = dummy.services.TestRequestGenerator( - tokenizer="bert-base-uncased", - ) - - scheduler = Scheduler( - request_generator=request_genrator, - backend=openai_backend_factory(), - load_gen_mode=LoadGenerationMode.CONSTANT, - load_gen_rate=1.0, - max_requests=max_requests, - max_duration=None, - ) - - benchmark: TextGenerationBenchmark = scheduler.run() - - assert len(benchmark.results) == max_requests - assert benchmark.errors == [] - - -@pytest.mark.sanity() -@pytest.mark.parametrize("max_duration", [1, 2, 3]) -def test_scheduler_max_duration_limitation( - openai_backend_factory: Callable[..., OpenAIBackend], - max_duration: int, -): - request_genrator = dummy.services.TestRequestGenerator( - tokenizer="bert-base-uncased", - ) - - scheduler = Scheduler( - request_generator=request_genrator, - backend=openai_backend_factory(), - load_gen_mode=LoadGenerationMode.CONSTANT, - load_gen_rate=1.0, - max_requests=None, - max_duration=max_duration, - ) - - start_time = time.perf_counter() - scheduler.run() - end_time = time.perf_counter() - start_time - - assert round(end_time) == max_duration diff --git a/tests/unit/scheduler/test_load_generator.py b/tests/unit/scheduler/test_load_generator.py new file mode 100644 index 0000000..6b84ee0 --- /dev/null +++ b/tests/unit/scheduler/test_load_generator.py @@ -0,0 +1,153 @@ +import time +from typing import get_args + +import pytest +from scipy.stats import kstest # type: ignore + +from guidellm.scheduler import LoadGenerationMode, LoadGenerator + + +@pytest.mark.smoke() +def test_load_generator_mode(): + assert set(get_args(LoadGenerationMode)) == { + "synchronous", + "constant", + "poisson", + "throughput", + } + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("mode", "rate"), + [ + ("constant", 10), + ("poisson", 5), + ("throughput", None), + ("synchronous", None), + ], +) +def test_load_generator_instantiation(mode, rate): + generator = LoadGenerator(mode=mode, rate=rate) + assert generator.mode == mode + assert generator.rate == rate + + +@pytest.mark.regression() +@pytest.mark.parametrize( + ("mode", "rate", "expected_error"), + [ + ("invalid_mode", None, ValueError), + ("constant", 0, ValueError), + ("poisson", -1, ValueError), + ], +) +def test_load_generator_invalid_instantiation(mode, rate, expected_error): + with pytest.raises(expected_error): + LoadGenerator(mode=mode, rate=rate) + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("mode", "rate"), + [ + ("synchronous", None), + ("throughput", None), + ("constant", 1), + ("poisson", 5), + ], +) +def test_load_generator_times(mode, rate): + # first check that the proper method is called + generator = LoadGenerator(mode=mode, rate=rate) + func_name = f"{mode}_times" + assert hasattr(generator, func_name) + assert callable(getattr(generator, func_name)) + + call_count = 0 + + def _increment_call_count(): + nonlocal call_count + call_count += 1 + yield -1.0 + + setattr(generator, func_name, _increment_call_count) + for time_ in generator.times(): + assert time_ == -1.0 + break + assert call_count == 1 + + # now check that the method generates reasonable timestamps + generator = LoadGenerator(mode=mode, rate=rate) + start_time = time.time() + for index, time_ in enumerate(generator.times()): + if index > 10: + break + + if mode == "synchronous": + assert time_ == -1.0 + else: + assert time_ >= start_time + + +@pytest.mark.smoke() +def test_load_generator_invalid_times(): + generator = LoadGenerator(mode="synchronous") + + for index, time_ in enumerate(generator.synchronous_times()): + if index > 10: + break + + assert time_ == -1.0 + + +@pytest.mark.smoke() +def test_load_generator_throughput_times(): + generator = LoadGenerator(mode="throughput") + + for index, time_ in enumerate(generator.throughput_times()): + if index > 10: + break + + assert time_ <= time.time() + + +@pytest.mark.smoke() +@pytest.mark.parametrize("rate", [1, 10, 42]) +def test_load_generator_constant_times(rate): + generator = LoadGenerator(mode="constant", rate=rate) + start_time = time.time() + + for index, time_ in enumerate(generator.constant_times()): + if index > 10: + break + + assert time_ == pytest.approx(start_time + index / rate, rel=1e-5) + + +@pytest.mark.smoke() +@pytest.mark.flaky(reruns=5) +def test_load_generator_poisson_times(): + rate = 5 + generator = LoadGenerator(mode="poisson", rate=rate) + start_time = time.time() + + times = [] + prev_time = start_time + + for index, current_time in enumerate(generator.poisson_times()): + if index > 100: + break + + times.append(current_time - prev_time) + prev_time = current_time + + mean_inter_arrival_time = 1 / rate + + # Perform Kolmogorov-Smirnov test to compare the sample distribution + # to the expected exponential distribution + ks_statistic, p_value = kstest(times, "expon", args=(0, mean_inter_arrival_time)) + assert p_value > 0.025, ( + f"Poisson-generated inter-arrival times do not " + f"match the expected exponential distribution (p-value: {p_value})" + ) diff --git a/tests/unit/scheduler/test_poission_mode.py b/tests/unit/scheduler/test_poission_mode.py deleted file mode 100644 index 1f10b2a..0000000 --- a/tests/unit/scheduler/test_poission_mode.py +++ /dev/null @@ -1,63 +0,0 @@ -import time -from typing import Callable - -import pytest -from guidellm.backend import OpenAIBackend -from guidellm.core import TextGenerationBenchmark -from guidellm.scheduler import LoadGenerationMode, Scheduler - -from tests import dummy - - -@pytest.mark.sanity() -@pytest.mark.parametrize("max_requests", [1, 5]) -def test_scheduler_max_requests_limitation( - openai_backend_factory: Callable[..., OpenAIBackend], - backend_submit_patch, - max_requests: int, -): - request_genrator = dummy.services.TestRequestGenerator( - tokenizer="bert-base-uncased", - ) - - scheduler = Scheduler( - request_generator=request_genrator, - backend=openai_backend_factory(), - load_gen_mode=LoadGenerationMode.POISSON, - load_gen_rate=1.0, - max_requests=max_requests, - max_duration=None, - ) - - benchmark: TextGenerationBenchmark = scheduler.run() - - assert backend_submit_patch.call_count == max_requests - assert len(benchmark.results) == max_requests - assert benchmark.errors == [] - - -@pytest.mark.skip("Poission can't be limited with max duration. TBD") -@pytest.mark.sanity() -@pytest.mark.parametrize("max_duration", [1, 3]) -def test_scheduler_max_duration_limitation( - openai_backend_factory: Callable[..., OpenAIBackend], - max_duration: int, -): - request_genrator = dummy.services.TestRequestGenerator( - tokenizer="bert-base-uncased", - ) - - scheduler = Scheduler( - request_generator=request_genrator, - backend=openai_backend_factory(), - load_gen_mode=LoadGenerationMode.POISSON, - load_gen_rate=1.0, - max_requests=None, - max_duration=max_duration, - ) - - start_time = time.perf_counter() - scheduler.run() - end_time = time.perf_counter() - start_time - - assert round(end_time) == max_duration diff --git a/tests/unit/scheduler/test_sync_mode.py b/tests/unit/scheduler/test_sync_mode.py deleted file mode 100644 index 7d08e6d..0000000 --- a/tests/unit/scheduler/test_sync_mode.py +++ /dev/null @@ -1,62 +0,0 @@ -import time -from typing import Callable - -import pytest -from guidellm.backend import OpenAIBackend -from guidellm.core import TextGenerationBenchmark -from guidellm.scheduler import LoadGenerationMode, Scheduler - -from tests import dummy - - -@pytest.mark.sanity() -@pytest.mark.parametrize("max_requests", [1, 5]) -def test_scheduler_max_requests_limitation( - openai_backend_factory: Callable[..., OpenAIBackend], - backend_submit_patch, - max_requests: int, -): - request_genrator = dummy.services.TestRequestGenerator( - tokenizer="bert-base-uncased", - ) - - scheduler = Scheduler( - request_generator=request_genrator, - backend=openai_backend_factory(), - load_gen_mode=LoadGenerationMode.SYNCHRONOUS, - load_gen_rate=None, - max_requests=max_requests, - max_duration=None, - ) - - benchmark: TextGenerationBenchmark = scheduler.run() - - assert backend_submit_patch.call_count == max_requests - assert len(benchmark.results) == max_requests - assert benchmark.errors == [] - - -@pytest.mark.sanity() -@pytest.mark.parametrize("max_duration", [1, 3]) -def test_scheduler_max_duration_limitation( - openai_backend_factory: Callable[..., OpenAIBackend], - max_duration: int, -): - request_genrator = dummy.services.TestRequestGenerator( - tokenizer="bert-base-uncased", - ) - - scheduler = Scheduler( - request_generator=request_genrator, - backend=openai_backend_factory(), - load_gen_mode=LoadGenerationMode.SYNCHRONOUS, - load_gen_rate=None, - max_requests=None, - max_duration=max_duration, - ) - - start_time = time.perf_counter() - scheduler.run() - end_time = time.perf_counter() - start_time - - assert round(end_time) == max_duration diff --git a/tests/unit/config/test_base.py b/tests/unit/test_config.py similarity index 98% rename from tests/unit/config/test_base.py rename to tests/unit/test_config.py index 0e9dadb..13e1699 100644 --- a/tests/unit/config/test_base.py +++ b/tests/unit/test_config.py @@ -1,5 +1,6 @@ import pytest -from guidellm.config.base import ( + +from guidellm.config import ( Environment, LoggingSettings, OpenAISettings, diff --git a/tests/unit/test_logger.py b/tests/unit/test_logger.py index 91e6649..943d6e1 100644 --- a/tests/unit/test_logger.py +++ b/tests/unit/test_logger.py @@ -1,12 +1,13 @@ from pathlib import Path import pytest + from guidellm import configure_logger, logger -from guidellm.config.base import LoggingSettings +from guidellm.config import LoggingSettings @pytest.fixture(autouse=True) -def reset_logger(): +def reset_logger(): # noqa: PT004 # Ensure logger is reset before each test logger.remove() yield @@ -105,5 +106,5 @@ def test_logging_disabled(capsys): logger.error("Error message") captured = capsys.readouterr() - assert captured.out == "" - assert captured.err == "" + assert not captured.out + assert not captured.err diff --git a/tests/unit/utils/test_injector.py b/tests/unit/utils/test_injector.py index 87484de..9a58575 100644 --- a/tests/unit/utils/test_injector.py +++ b/tests/unit/utils/test_injector.py @@ -1,16 +1,11 @@ from pathlib import Path -from unittest.mock import mock_open, patch import pytest -import requests -from guidellm.config import settings -from guidellm.utils.constants import ( - REPORT_HTML_MATCH, - REPORT_HTML_PLACEHOLDER, -) -from guidellm.utils.injector import create_report, inject_data, load_html_file from pydantic import BaseModel +from guidellm.config import settings +from guidellm.utils.injector import create_report, inject_data + class ExampleModel(BaseModel): name: str @@ -23,50 +18,15 @@ def test_inject_data(): html = "window.report_data = {};" expected_html = 'window.report_data = {"name":"Example App","version":"1.0.0"};' - result = inject_data(model, html, REPORT_HTML_MATCH, REPORT_HTML_PLACEHOLDER) + result = inject_data( + model, + html, + settings.report_generation.report_html_match, + settings.report_generation.report_html_placeholder, + ) assert result == expected_html -@pytest.mark.smoke() -def test_load_html_file_from_url(requests_mock): - url = "http://example.com/report.html" - mock_content = "Sample Report" - requests_mock.get(url, text=mock_content) - - result = load_html_file(url) - assert result == mock_content - - -@pytest.mark.sanity() -def test_load_html_file_from_invalid_url(requests_mock): - url = "http://example.com/404.html" - requests_mock.get(url, status_code=404) - - with pytest.raises(requests.exceptions.HTTPError): - load_html_file(url) - - -@pytest.mark.smoke() -def test_load_html_file_from_path(): - path = "sample_report.html" - mock_content = "Sample Report" - - with patch("pathlib.Path.open", mock_open(read_data=mock_content)), patch( - "pathlib.Path.exists", return_value=True - ): - result = load_html_file(path) - - assert result == mock_content - - -@pytest.mark.sanity() -def test_load_html_file_from_invalid_path(): - path = "invalid_report.html" - - with pytest.raises(FileNotFoundError): - load_html_file(path) - - @pytest.mark.smoke() def test_create_report_to_file(tmpdir): model = ExampleModel(name="Example App", version="1.0.0") diff --git a/tests/unit/utils/test_text.py b/tests/unit/utils/test_text.py new file mode 100644 index 0000000..1d89ee3 --- /dev/null +++ b/tests/unit/utils/test_text.py @@ -0,0 +1,394 @@ +from pathlib import Path +from unittest.mock import patch + +import pytest +import requests + +from guidellm.utils.text import ( + clean_text, + filter_text, + is_path, + is_path_like, + is_url, + load_text, + load_text_lines, + parse_text_objects, + split_lines_by_punctuation, + split_text, +) + + +@pytest.fixture() +def sample_text(): + return "This is a sample text.\nThis is another line!" + + +@pytest.fixture() +def sample_dict_data(): + return [{"text": "line 1"}, {"text": "line 2"}, {"text": "line 3"}] + + +@pytest.fixture() +def sample_csv_data(): + return "text\nline 1\nline 2\nline 3" + + +@pytest.fixture() +def sample_jsonl_data(): + return '{"text": "line 1"}\n{"text": "line 2"}\n{"text": "line 3"}' + + +@pytest.fixture() +def sample_yaml_data(): + return """ + text: + - line 1 + - line 2 + - line 3 + """ + + +@pytest.fixture() +def mock_response(): + response = requests.Response() + response.status_code = 200 + response._content = b"Mock content" + return response + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("text", "start", "end", "expected"), + [ + ("hello world", "hello", "world", "hello "), + ("hello world", "world", None, "world"), + ("hello world", None, "hello", ""), + ("hello world", None, None, "hello world"), + ], +) +def test_filter_text(text, start, end, expected): + assert filter_text(text, start, end) == expected + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ( + "text", + "fix_encoding", + "clean_whitespace", + "remove_empty_lines", + "force_new_line_punctuation", + "expected", + ), + [ + ( + "This is\ta test.\n New line.", + True, + True, + False, + False, + "This is a test.\nNew line.", + ), + ( + "This is\ta test.\n New line.", + True, + True, + True, + False, + "This is a test.\nNew line.", + ), + ( + "This is a test. New line.", + True, + False, + False, + True, + "This is a test.\nNew line.", + ), + ], +) +def test_clean_text( + text, + fix_encoding, + clean_whitespace, + remove_empty_lines, + force_new_line_punctuation, + expected, +): + assert ( + clean_text( + text, + fix_encoding, + clean_whitespace, + remove_empty_lines, + force_new_line_punctuation, + ) + == expected + ) + + +@pytest.mark.smoke() +def test_split_lines_by_punctuation(sample_text): + expected = ["This is a sample text.", "This is another line!"] + assert split_lines_by_punctuation(sample_text) == expected + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("url", "expected"), + [ + ("https://example.com", True), + ("ftp://example.com", True), + ("not a url", False), + ], +) +def test_is_url(url, expected): + assert is_url(url) == expected + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("path", "expected"), + [ + (str(Path(__file__)), True), + ("/non/existent/path", False), + ], +) +def test_is_path(path, expected): + assert is_path(path) == expected + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("path", "enforce_file", "expected"), + [ + (str(Path(__file__)), True, True), + ("/non/existent/path", False, True), + ("https://example.com", False, False), + ], +) +def test_is_path_like(path, enforce_file, expected): + assert is_path_like(path, enforce_file) == expected + + +@pytest.mark.smoke() +def test_split_text(sample_text): + words, separators, new_lines = split_text(sample_text) + assert words == [ + "This", + "is", + "a", + "sample", + "text.", + "This", + "is", + "another", + "line!", + ] + assert separators == [" ", " ", " ", " ", "\n", " ", " ", " ", " "] + assert new_lines == [0, 5] + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("data", "format_", "expected"), + [ + ("text\nline 1\nline 2", "csv", [{"text": "line 1"}, {"text": "line 2"}]), + ( + '{"text": "line 1"}\n{"text": "line 2"}', + "jsonl", + [{"text": "line 1"}, {"text": "line 2"}], + ), + ], +) +def test_parse_text_objects(data, format_, expected): + assert parse_text_objects(data, format_) == expected + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("data", "expected"), + [ + ("https://example.com", "Mock content"), + (str(Path(__file__)), Path(__file__).read_text()), + ], +) +def test_load_text(data, expected, mock_response): + with patch("requests.get", return_value=mock_response): + assert load_text(data) == expected + + +@pytest.mark.regression() +def test_load_text_file_not_found(): + with pytest.raises(FileNotFoundError): + load_text("/non/existent/file.txt") + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("data", "format_", "filters", "expected"), + [ + ("text\nline 1\nline 2", "csv", None, ["line 1", "line 2"]), + ('{"text": "line 1"}\n{"text": "line 2"}', "jsonl", None, ["line 1", "line 2"]), + ("text\nline 1\nline 2", "txt", None, ["text", "line 1", "line 2"]), + ], +) +def test_load_text_lines(data, format_, filters, expected): + assert load_text_lines(data, format_=format_, filters=filters) == expected + + +@pytest.mark.regression() +def test_load_text_lines_invalid_data(): + with pytest.raises(ValueError): + load_text_lines(123) # type: ignore + + +@pytest.mark.regression() +def test_parse_text_objects_invalid_format(): + with pytest.raises(ValueError): + parse_text_objects("text", format_="unsupported") + + +@pytest.mark.regression() +def test_parse_text_objects_invalid_data(): + with pytest.raises(ValueError): + parse_text_objects(123) # type: ignore + + +@pytest.mark.regression() +@pytest.mark.parametrize( + ("data", "format_", "filters", "expected"), + [ + ( + "text\nline 1\nline 2\n", + "csv", + ["text"], + ["line 1", "line 2"], + ), + ], +) +def test_load_text_lines_with_filters(data, format_, filters, expected): + assert load_text_lines(data, format_=format_, filters=filters) == expected + + +@pytest.mark.regression() +def test_is_path_with_symlink(tmp_path): + # Create a symlink to a temporary file + target_file = tmp_path / "target_file.txt" + target_file.write_text("Sample content") + symlink_path = tmp_path / "symlink" + symlink_path.symlink_to(target_file) + + assert is_path(str(symlink_path)) is True + + +@pytest.mark.regression() +def test_is_path_like_with_symlink(tmp_path): + # Create a symlink to a temporary file + target_file = tmp_path / "target_file.txt" + target_file.write_text("Sample content") + symlink_path = tmp_path / "symlink.file" + symlink_path.symlink_to(target_file) + + assert is_path_like(str(symlink_path), enforce_file=True) is True + + +@pytest.mark.regression() +def test_load_text_lines_empty(): + # Test loading text lines from an empty string + assert load_text_lines("") == [] + + +@pytest.mark.regression() +def test_split_text_with_empty_string(): + words, separators, new_lines = split_text("") + assert words == [] + assert separators == [] + assert new_lines == [] + + +@pytest.mark.regression() +def test_split_lines_by_punctuation_with_no_punctuation(): + text = "This is a test without punctuation" + assert split_lines_by_punctuation(text) == [text] + + +@pytest.mark.regression() +def test_is_path_invalid_type(): + assert not is_path(None) + assert not is_path(123) + assert not is_path(["not", "a", "path"]) + + +@pytest.mark.regression() +def test_is_path_like_invalid_type(): + assert not is_path_like(None, enforce_file=False) + assert not is_path_like(123, enforce_file=True) + assert not is_path_like(["not", "a", "path"], enforce_file=False) + + +@pytest.mark.regression() +def test_load_text_invalid_url(): + with pytest.raises(requests.ConnectionError): + load_text("http://invalid.url") + + +@pytest.mark.regression() +def test_parse_text_objects_empty_csv(): + assert parse_text_objects("text\n", "csv") == [] + + +@pytest.mark.regression() +def test_parse_text_objects_empty_jsonl(): + assert parse_text_objects("", "jsonl") == [] + + +@pytest.mark.regression() +def test_parse_text_objects_invalid_jsonl(): + with pytest.raises(ValueError): + parse_text_objects("{invalid_json}", "jsonl") + + +@pytest.mark.regression() +def test_parse_text_objects_empty_yaml(): + assert parse_text_objects("", "yaml") == [] + + +@pytest.mark.regression() +def test_clean_text_with_unicode(): + text = "This is a test with unicode: \u2013 \u2014" + cleaned_text = clean_text(text, fix_encoding=True, clean_whitespace=True) + assert cleaned_text == "This is a test with unicode: – —" + + +@pytest.mark.regression() +def test_split_lines_by_punctuation_with_multiple_punctuations(): + text = "First sentence. Second sentence? Third sentence!" + expected = ["First sentence.", "Second sentence?", "Third sentence!"] + assert split_lines_by_punctuation(text) == expected + + +@pytest.mark.regression() +def test_is_url_empty_string(): + assert not is_url("") + + +@pytest.mark.regression() +def test_load_text_invalid_data(): + with pytest.raises(TypeError): + load_text(123) # type: ignore + + +@pytest.mark.regression() +def test_load_text_lines_empty_format(): + data = "text\nline 1\nline 2" + assert load_text_lines(data, format_="") == ["text", "line 1", "line 2"] + + +@pytest.mark.regression() +def test_split_text_with_mixed_separators(): + text = "This\tis a test\nwith mixed separators." + words, separators, new_lines = split_text(text) + assert words == ["This", "is", "a", "test", "with", "mixed", "separators."] + assert separators == ["\t", " ", " ", "\n", " ", " ", " "] + assert new_lines == [0, 4] diff --git a/tests/unit/utils/test_transformers.py b/tests/unit/utils/test_transformers.py new file mode 100644 index 0000000..5153da3 --- /dev/null +++ b/tests/unit/utils/test_transformers.py @@ -0,0 +1,236 @@ +from unittest.mock import patch + +import pytest +from datasets import ( # type: ignore + Dataset, + DatasetDict, + IterableDataset, + IterableDatasetDict, +) + +from guidellm.utils.transformers import ( + load_transformers_dataset, + resolve_transformers_dataset, + resolve_transformers_dataset_column, + resolve_transformers_dataset_split, +) +from tests.dummy.data.transformers import ( + create_sample_dataset, + create_sample_dataset_dict, + create_sample_iterable_dataset, + create_sample_iterable_dataset_dict, +) + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("dataset_arg", "dataset", "split", "preferred_splits", "expected_type"), + [ + ( + "mock/directory/file.csv", + create_sample_dataset_dict(splits=["train"]), + "train", + None, + Dataset, + ), + ( + "mock/directory/file.json", + create_sample_dataset_dict(splits=["test"]), + None, + ("train", "test"), + Dataset, + ), + ( + "mock/directory/file.py", + create_sample_dataset_dict(splits=["test"], column="output"), + None, + None, + Dataset, + ), + ( + create_sample_dataset_dict(splits=["val", "train"], column="custom"), + None, + "val", + None, + Dataset, + ), + ( + create_sample_dataset(), + None, + None, + None, + Dataset, + ), + ( + create_sample_iterable_dataset_dict(splits=["validation"]), + None, + None, + None, + IterableDataset, + ), + ( + create_sample_iterable_dataset(), + None, + "validation", + None, + IterableDataset, + ), + ], +) +def test_load_transformers_dataset( + dataset_arg, dataset, split, preferred_splits, expected_type +): + with patch( + "guidellm.utils.transformers.load_dataset", + return_value=dataset, + ): + loaded_dataset = load_transformers_dataset( + dataset_arg, split=split, preferred_splits=preferred_splits + ) + assert isinstance(loaded_dataset, expected_type) + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("dataset_arg", "dataset", "split", "preferred_splits", "expected_type"), + [ + ( + "mock/directory/file.csv", + create_sample_dataset(), + "train", + None, + Dataset, + ), + ( + "mock/directory/file.json", + create_sample_dataset_dict(splits=["test"]), + None, + ("train", "test"), + DatasetDict, + ), + ( + "mock/directory/file.py", + create_sample_dataset_dict(splits=["test"], column="output"), + None, + None, + DatasetDict, + ), + ( + "mock/directory/file.unk", + create_sample_dataset_dict(splits=["test"], column="output"), + None, + None, + DatasetDict, + ), + ( + create_sample_dataset_dict(splits=["val", "train"], column="custom"), + None, + "val", + None, + DatasetDict, + ), + ( + create_sample_dataset(), + None, + None, + None, + Dataset, + ), + ( + create_sample_iterable_dataset_dict(splits=["validation"]), + None, + None, + None, + IterableDatasetDict, + ), + ( + create_sample_iterable_dataset(), + None, + "validation", + None, + IterableDataset, + ), + ], +) +def test_resolve_transformers_dataset( + dataset_arg, dataset, split, preferred_splits, expected_type +): + with patch( + "guidellm.utils.transformers.load_dataset", + return_value=dataset, + ): + loaded_dataset = resolve_transformers_dataset( + dataset_arg, split=split, preferred_splits=preferred_splits + ) + assert isinstance(loaded_dataset, expected_type) + + +@pytest.mark.sanity() +def test_resolve_transformers_dataset_invalid(): + with pytest.raises(ValueError): + resolve_transformers_dataset(123) + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("dataset", "split", "preferred_splits", "expected_type"), + [ + ( + create_sample_dataset(), + None, + None, + Dataset, + ), + ( + create_sample_iterable_dataset_dict(splits=["validation"]), + None, + None, + IterableDataset, + ), + ( + create_sample_iterable_dataset(), + "validation", + None, + IterableDataset, + ), + ], +) +def test_resolve_transformers_dataset_split( + dataset, split, preferred_splits, expected_type +): + loaded_dataset = resolve_transformers_dataset_split( + dataset, split=split, preferred_splits=preferred_splits + ) + assert isinstance(loaded_dataset, expected_type) + + +def test_resolve_transformers_dataset_split_missing(): + dataset = create_sample_dataset_dict() + with pytest.raises(ValueError): + resolve_transformers_dataset_split(dataset, split="missing") + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("dataset", "column", "preferred_columns", "expected_column"), + [ + (create_sample_dataset(), None, None, "text"), + (create_sample_dataset(), "text", None, "text"), + (create_sample_dataset(), None, ["text"], "text"), + (create_sample_dataset(), None, ["data"], "text"), + (create_sample_iterable_dataset(), None, None, "text"), + ], +) +def test_resolve_transformers_dataset_column( + dataset, column, preferred_columns, expected_column +): + resolved_column = resolve_transformers_dataset_column( + dataset, column=column, preferred_columns=preferred_columns + ) + assert resolved_column == expected_column + + +def test_resolve_transformers_dataset_column_missing(): + dataset = create_sample_dataset() + with pytest.raises(ValueError): + resolve_transformers_dataset_column(dataset, column="missing") diff --git a/tox.ini b/tox.ini index 076f7b8..d2fa64c 100644 --- a/tox.ini +++ b/tox.ini @@ -48,8 +48,8 @@ description = Run style checks and fixes deps = .[dev] commands = - ruff check --fix ruff format + ruff check --fix [testenv:types] From 2c340ec7b54b4f8c8d489fe17e83588b9aa26b4c Mon Sep 17 00:00:00 2001 From: Dmytro Parfeniuk Date: Wed, 21 Aug 2024 22:00:55 +0300 Subject: [PATCH 2/4] =?UTF-8?q?=F0=9F=91=B7=20utils/inject=5Fbuild=5Fprops?= =?UTF-8?q?.py=20controls=20the=20dist=20names?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/development.yml | 4 +++- .github/workflows/nightly.yml | 4 +++- .github/workflows/release.yml | 4 +++- .github/workflows/staging.yml | 4 +++- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/.github/workflows/development.yml b/.github/workflows/development.yml index 914df24..3f51ee9 100644 --- a/.github/workflows/development.yml +++ b/.github/workflows/development.yml @@ -51,6 +51,9 @@ jobs: name: Build & Publish the distribution needs: [unit-tests, integration-tests, e2e-tests] runs-on: ubuntu-latest + env: + GUIDELLM_BUILD_TYPE: dev + GUIDELLM_BUILD_NUMBER: ${{ github.event.pull_request.number || github.run_number }} steps: - uses: actions/checkout@v4 - name: Set up Python @@ -62,4 +65,3 @@ jobs: with: publish_pypi: false publish_pypi_internal: true - build_type: "dev" diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 86cbddf..1b53026 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -71,6 +71,9 @@ jobs: publish: needs: [unit-tests, integration-tests, e2e-tests] runs-on: ubuntu-latest + env: + GUIDELLM_BUILD_TYPE: nightly + GUIDELLM_BUILD_NUMBER: ${{ github.event.pull_request.number || github.run_number }} steps: - uses: actions/checkout@v4 - name: Set up Python @@ -82,4 +85,3 @@ jobs: with: publish_pypi: true publish_pypi_internal: true - build_type: "nightly" diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index fac9898..fd0975e 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -75,6 +75,9 @@ jobs: name: Build & Publish the distribution needs: [unit-tests, integration-tests, e2e-tests] runs-on: ubuntu-latest + env: + GUIDELLM_BUILD_TYPE: release + GUIDELLM_BUILD_NUMBER: ${{ github.event.pull_request.number || github.run_number }} steps: - uses: actions/checkout@v4 - name: Set up Python @@ -86,4 +89,3 @@ jobs: with: publish_pypi: true publish_pypi_internal: true - build_type: "release" diff --git a/.github/workflows/staging.yml b/.github/workflows/staging.yml index 44ec439..572e2d9 100644 --- a/.github/workflows/staging.yml +++ b/.github/workflows/staging.yml @@ -73,6 +73,9 @@ jobs: name: Build & Publish the distribution needs: [unit-tests, integration-tests, e2e-tests] runs-on: ubuntu-latest + env: + GUIDELLM_BUILD_TYPE: release + GUIDELLM_BUILD_NUMBER: ${{ github.event.pull_request.number || github.run_number }} steps: - uses: actions/checkout@v4 - name: Set up Python @@ -84,4 +87,3 @@ jobs: with: publish_pypi: false publish_pypi_internal: true - build_type: "release" From a174a8e72203bf249bd7803097c70294c4ba2f31 Mon Sep 17 00:00:00 2001 From: Dmytro Parfeniuk Date: Thu, 22 Aug 2024 18:57:23 +0300 Subject: [PATCH 3/4] =?UTF-8?q?=F0=9F=94=A5=20utils/inject=5Fbuild=5Fprops?= =?UTF-8?q?=20->=20/nm-actions=20repository?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tox.ini | 1 - utils/__init__.py | 0 utils/inject_build_props.py | 79 ------------------------------------- 3 files changed, 80 deletions(-) delete mode 100644 utils/__init__.py delete mode 100644 utils/inject_build_props.py diff --git a/tox.ini b/tox.ini index 076f7b8..5387b53 100644 --- a/tox.ini +++ b/tox.ini @@ -69,7 +69,6 @@ deps = loguru toml commands = - python utils/inject_build_props.py python -m build diff --git a/utils/__init__.py b/utils/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/utils/inject_build_props.py b/utils/inject_build_props.py deleted file mode 100644 index 74000dd..0000000 --- a/utils/inject_build_props.py +++ /dev/null @@ -1,79 +0,0 @@ -import os -import re -from datetime import datetime -from pathlib import Path - -import toml -from loguru import logger - - -def get_build_type(): - return os.getenv("GUIDELLM_BUILD_TYPE", "dev") - - -def get_build_number(): - return os.getenv("GUIDELLM_BUILD_NUMBER", "0") - - -def construct_project_name_and_version(build_type, build_number, current_version): - if not re.match(r"^\d+\.\d+\.\d+$", current_version): - raise ValueError( - f"Version '{current_version}' does not match the " - f"semantic versioning pattern '#.#.#'", - ) - - if build_type == "dev": - project_name = "guidellm_dev" - version = f"{current_version}.dev{build_number}" - elif build_type == "nightly": - project_name = "guidellm_nightly" - date_str = datetime.now().strftime("%Y%m%d") - version = f"{current_version}.{date_str}" - elif build_type == "release": - project_name = "guidellm" - version = current_version - else: - raise ValueError(f"Unknown build type: {build_type}") - - return project_name, version - - -def update_pyproject_toml(project_name, version): - try: - with Path("pyproject.toml").open() as file: - data = toml.load(file) - - data["project"]["name"] = project_name - data["project"]["version"] = version - - with Path("pyproject.toml").open("w") as file: - toml.dump(data, file) - - logger.info( - f"Updated project name to: {project_name} and version to: {version}", - ) - except (FileNotFoundError, toml.TomlDecodeError) as e: - logger.error(f"Error reading or writing pyproject.toml: {e}") - raise - - -def main(): - build_type = get_build_type() - build_number = get_build_number() - - with Path("pyproject.toml").open() as file: - pyproject_data = toml.load(file) - - current_version = pyproject_data["project"]["version"] - project_name, version = construct_project_name_and_version( - build_type, - build_number, - current_version, - ) - - if build_type != "release": - update_pyproject_toml(project_name, version) - - -if __name__ == "__main__": - main() From 59ba130275f27a6eff6acc8515244517a1f1a22b Mon Sep 17 00:00:00 2001 From: Mark Kurtz Date: Fri, 23 Aug 2024 14:22:22 -0400 Subject: [PATCH 4/4] Add Detailed Benchmark Progress Tracking and Enhanced Report Generation Features (#28) ## Summary This PR introduces enhancements to the benchmarking and report generation functionalities in the GuideLLM project. Key updates include the integration of detailed progress tracking using the rich library, improvements to performance statistics, and adjustments to report output formats. ## Details - Added `BenchmarkReportProgress` class to manage real-time progress updates for benchmarks and report generation using `rich`. - Updated the report structure to include comprehensive performance statistics, such as request latency, time to first token, and inter-token latency. - Introduced new methods to generate detailed tables summarizing requests, data tokens, and performance metrics in reports. - Enhanced the executor logic to handle different benchmark modes (e.g., sweep, synchronous, throughput) more effectively. - Refined error handling and logging for better clarity and debugging. - Modified unit tests to cover new functionalities and ensure robustness across various scenarios. - Adjusted configuration and settings to optimize benchmarking capabilities and ensure consistent output. ## Test Plan - Added new unit tests to verify the functionality of `BenchmarkReportProgress` and other report generation methods. - Performed end-to-end testing of different benchmark modes to ensure accurate real-time progress tracking and report generation. - Verified that all existing tests pass without any regression failures. Screen Shot 2024-08-22 at 4 21 29 AM Screen Shot 2024-08-22 at 4 59 21 AM --- .pre-commit-config.yaml | 1 + pyproject.toml | 2 + src/guidellm/__init__.py | 8 + src/guidellm/backend/base.py | 41 ++ src/guidellm/backend/openai.py | 24 +- src/guidellm/config.py | 8 +- src/guidellm/core/report.py | 310 ++++++++++++- src/guidellm/core/result.py | 179 +++++++- src/guidellm/executor/__init__.py | 3 +- src/guidellm/executor/base.py | 38 +- src/guidellm/executor/profile_generator.py | 83 ++-- src/guidellm/main.py | 41 +- src/guidellm/request/__init__.py | 3 +- src/guidellm/request/base.py | 28 ++ src/guidellm/request/emulated.py | 8 +- src/guidellm/request/file.py | 8 +- src/guidellm/request/transformers.py | 8 +- src/guidellm/scheduler/base.py | 121 ++--- src/guidellm/utils/__init__.py | 2 + src/guidellm/utils/progress.py | 196 ++++++++ tests/dummy/services/requests.py | 18 +- tests/unit/backend/test_base.py | 24 + tests/unit/backend/test_openai_backend.py | 6 +- tests/unit/conftest.py | 4 +- tests/unit/core/test_report.py | 47 +- tests/unit/core/test_result.py | 4 +- tests/unit/executor/test_base.py | 428 ++++++++++++++---- tests/unit/executor/test_profile_generator.py | 14 +- tests/unit/scheduler/test_base.py | 64 ++- tests/unit/test_logger.py | 5 +- tests/unit/utils/test_progress.py | 116 +++++ 31 files changed, 1592 insertions(+), 250 deletions(-) create mode 100644 src/guidellm/utils/progress.py create mode 100644 tests/unit/utils/test_progress.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4bd9dc5..2a085bb 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -26,6 +26,7 @@ repos: pydantic_settings, pyyaml, requests, + rich, transformers, # dev dependencies diff --git a/pyproject.toml b/pyproject.toml index 4d54edc..5854c77 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,6 +35,7 @@ dependencies = [ "pydantic-settings>=2.0.0", "pyyaml>=6.0.0", "requests", + "rich", "transformers", ] @@ -182,6 +183,7 @@ select = [ "N806", # allow uppercase variable names in tests "PGH003", # allow general ignores in tests "S106", # allow hardcoded passwords in tests + "PLR0915", # allow complext statements in tests ] [tool.ruff.lint.isort] diff --git a/src/guidellm/__init__.py b/src/guidellm/__init__.py index de2a220..fc660fc 100644 --- a/src/guidellm/__init__.py +++ b/src/guidellm/__init__.py @@ -3,7 +3,15 @@ evaluating and benchmarking large language models (LLMs). """ +import os + +import transformers # type: ignore # noqa: PGH003 + from .config import settings from .logger import configure_logger, logger __all__ = ["configure_logger", "logger", "settings"] + + +os.environ["TOKENIZERS_PARALLELISM"] = "false" # Silence warnings for tokenizers +transformers.logging.set_verbosity_error() # Silence warnings for transformers diff --git a/src/guidellm/backend/base.py b/src/guidellm/backend/base.py index a5fc35e..c22b477 100644 --- a/src/guidellm/backend/base.py +++ b/src/guidellm/backend/base.py @@ -51,6 +51,12 @@ class Backend(ABC): :cvar _registry: A dictionary that maps BackendEngine types to backend classes. :type _registry: Dict[BackendEngine, Type[Backend]] + :param type_: The type of the backend. + :type type_: BackendEngine + :param target: The target URL for the backend. + :type target: str + :param model: The model used by the backend. + :type model: str """ _registry: Dict[BackendEngine, "Type[Backend]"] = {} @@ -96,6 +102,11 @@ def create(cls, backend_type: BackendEngine, **kwargs) -> "Backend": return Backend._registry[backend_type](**kwargs) + def __init__(self, type_: BackendEngine, target: str, model: str): + self._type = type_ + self._target = target + self._model = model + @property def default_model(self) -> str: """ @@ -107,6 +118,36 @@ def default_model(self) -> str: """ return _cachable_default_model(self) + @property + def type_(self) -> BackendEngine: + """ + Get the type of the backend. + + :return: The type of the backend. + :rtype: BackendEngine + """ + return self._type + + @property + def target(self) -> str: + """ + Get the target URL for the backend. + + :return: The target URL. + :rtype: str + """ + return self._target + + @property + def model(self) -> str: + """ + Get the model used by the backend. + + :return: The model name. + :rtype: str + """ + return self._model + async def submit(self, request: TextGenerationRequest) -> TextGenerationResult: """ Submit a text generation request and return the result. diff --git a/src/guidellm/backend/openai.py b/src/guidellm/backend/openai.py index 3bebb7e..2a12882 100644 --- a/src/guidellm/backend/openai.py +++ b/src/guidellm/backend/openai.py @@ -45,10 +45,19 @@ def __init__( self._request_args: Dict = request_args api_key: str = openai_api_key or settings.openai.api_key + if not api_key: + err = ValueError( + "`GUIDELLM__OPENAI__API_KEY` environment variable or " + "--openai-api-key CLI parameter must be specified for the " + "OpenAI backend." + ) + logger.error("{}", err) + raise err + if target: base_url = target elif host and port: - base_url = f"{host}:{port}" + base_url = f"{host}:{port}/v1" elif settings.openai.base_url: base_url = settings.openai.base_url else: @@ -61,22 +70,11 @@ def __init__( self._async_client = AsyncOpenAI(api_key=api_key, base_url=base_url) self._client = OpenAI(api_key=api_key, base_url=base_url) - - self.validate_connection() self._model = model or self.default_model + super().__init__(type_="openai_server", target=base_url, model=self._model) logger.info("OpenAI {} Backend listening on {}", self._model, base_url) - @property - def model(self) -> str: - """ - Get the model used by this backend. - - :return: The model name. - :rtype: str - """ - return self._model - async def make_request( self, request: TextGenerationRequest, diff --git a/src/guidellm/config.py b/src/guidellm/config.py index fc15eff..d6acfd4 100644 --- a/src/guidellm/config.py +++ b/src/guidellm/config.py @@ -43,7 +43,7 @@ class LoggingSettings(BaseModel): disabled: bool = False clear_loggers: bool = True - console_log_level: str = "INFO" + console_log_level: str = "WARNING" log_file: Optional[str] = None log_file_level: Optional[str] = None @@ -98,7 +98,7 @@ class OpenAISettings(BaseModel): """ # OpenAI API key. - api_key: str = "" + api_key: str = "invalid_token" # OpenAI-compatible server URL # NOTE: The default value is default address of llama.cpp web server @@ -141,8 +141,8 @@ class Settings(BaseSettings): # general settings env: Environment = Environment.PROD request_timeout: int = 30 - max_concurrency: int = 128 - num_sweep_profiles: int = 10 + max_concurrency: int = 512 + num_sweep_profiles: int = 9 logging: LoggingSettings = LoggingSettings() # Data settings diff --git a/src/guidellm/core/report.py b/src/guidellm/core/report.py index 12375fc..b6791e4 100644 --- a/src/guidellm/core/report.py +++ b/src/guidellm/core/report.py @@ -1,21 +1,321 @@ -from typing import List +import time +from datetime import datetime +from typing import List, Optional +from loguru import logger from pydantic import Field +from rich.console import Console, Group +from rich.live import Live +from rich.panel import Panel +from rich.table import Table -from guidellm.core.result import TextGenerationBenchmarkReport +from guidellm.core.result import TextGenerationBenchmark, TextGenerationBenchmarkReport from guidellm.core.serializable import Serializable -__all__ = [ - "GuidanceReport", -] +__all__ = ["GuidanceReport"] + + +def _create_benchmark_report_details(report: TextGenerationBenchmarkReport) -> str: + """ + Create a detailed string representation of a benchmark report. + + :param report: The benchmark report to generate details for. + :type report: TextGenerationBenchmarkReport + :return: A string containing the backend, data, rate, and limits of + the benchmark report. + :rtype: str + """ + backend = ( + f"Backend(type={report.args.get('backend_type', 'N/A')}, " + f"target={report.args.get('target', 'N/A')}, " + f"model={report.args.get('model', 'N/A')})" + ) + data = ( + f"Data(type={report.args.get('data_type', 'N/A')}, " + f"source={report.args.get('data', 'N/A')}, " + f"tokenizer={report.args.get('tokenizer', 'N/A')})" + ) + rate = ( + f"Rate(type={report.args.get('mode', 'N/A')}, " + f"rate={report.args.get('rate', 'N/A')})" + ) + limits = ( + f"Limits(max_number={report.args.get('max_number', 'N/A')} requests, " + f"max_duration={report.args.get('max_duration', 'N/A')} sec)" + ) + + logger.debug( + "Created benchmark report details for backend={}, data={}, rate={}, limits={}", + backend, + data, + rate, + limits, + ) + + return backend + "\n" + data + "\n" + rate + "\n" + limits + "\n" + + +def _benchmark_rate_id(benchmark: TextGenerationBenchmark) -> str: + """ + Generate a string identifier for a benchmark rate. + + :param benchmark: The benchmark for which to generate the rate ID. + :type benchmark: TextGenerationBenchmark + :return: A string representing the benchmark rate ID. + :rtype: str + """ + rate_id = ( + f"{benchmark.mode}@{benchmark.rate:.2f} req/sec" + if benchmark.rate + else f"{benchmark.mode}" + ) + logger.debug("Generated benchmark rate ID: {}", rate_id) + return rate_id + + +def _create_benchmark_report_requests_summary( + report: TextGenerationBenchmarkReport, +) -> Table: + """ + Create a table summarizing the requests of a benchmark report. + + :param report: The benchmark report to summarize. + :type report: TextGenerationBenchmarkReport + :return: A rich Table object summarizing the requests. + :rtype: Table + """ + table = Table( + "Benchmark", + "Requests Completed", + "Request Failed", + "Duration", + "Start Time", + "End Time", + title="[magenta]Requests Data by Benchmark[/magenta]", + title_style="bold", + title_justify="left", + show_header=True, + ) + + for benchmark in report.benchmarks_sorted: + start_time_str = ( + datetime.fromtimestamp(benchmark.start_time).strftime("%H:%M:%S") + if benchmark.start_time + else "N/A" + ) + end_time_str = ( + datetime.fromtimestamp(benchmark.end_time).strftime("%H:%M:%S") + if benchmark.end_time + else "N/A" + ) + + table.add_row( + _benchmark_rate_id(benchmark), + f"{benchmark.request_count}/{benchmark.total_count}", + f"{benchmark.error_count}/{benchmark.total_count}", + f"{benchmark.duration:.2f} sec", + f"{start_time_str}", + f"{end_time_str}", + ) + logger.debug("Created requests summary table for the report.") + return table + + +def _create_benchmark_report_data_tokens_summary( + report: TextGenerationBenchmarkReport, +) -> Table: + """ + Create a table summarizing data tokens of a benchmark report. + + :param report: The benchmark report to summarize. + :type report: TextGenerationBenchmarkReport + :return: A rich Table object summarizing the data tokens. + :rtype: Table + """ + table = Table( + "Benchmark", + "Prompt", + "Prompt (1%, 5%, 50%, 95%, 99%)", + "Output", + "Output (1%, 5%, 50%, 95%, 99%)", + title="[magenta]Tokens Data by Benchmark[/magenta]", + title_style="bold", + title_justify="left", + show_header=True, + ) + + for benchmark in report.benchmarks_sorted: + table.add_row( + _benchmark_rate_id(benchmark), + f"{benchmark.prompt_token_distribution.mean:.2f}", + ", ".join( + f"{percentile:.1f}" + for percentile in benchmark.prompt_token_distribution.percentiles( + [1, 5, 50, 95, 99] + ) + ), + f"{benchmark.output_token_distribution.mean:.2f}", + ", ".join( + f"{percentile:.1f}" + for percentile in benchmark.output_token_distribution.percentiles( + [1, 5, 50, 95, 99] + ) + ), + ) + logger.debug("Created data tokens summary table for the report.") + return table + + +def _create_benchmark_report_dist_perf_summary( + report: TextGenerationBenchmarkReport, +) -> Table: + """ + Create a table summarizing distribution performance of a benchmark report. + + :param report: The benchmark report to summarize. + :type report: TextGenerationBenchmarkReport + :return: A rich Table object summarizing the performance statistics. + :rtype: Table + """ + table = Table( + "Benchmark", + "Request Latency [1%, 5%, 10%, 50%, 90%, 95%, 99%] (sec)", + "Time to First Token [1%, 5%, 10%, 50%, 90%, 95%, 99%] (ms)", + "Inter Token Latency [1%, 5%, 10%, 50%, 90% 95%, 99%] (ms)", + title="[magenta]Performance Stats by Benchmark[/magenta]", + title_style="bold", + title_justify="left", + show_header=True, + ) + + for benchmark in report.benchmarks_sorted: + table.add_row( + _benchmark_rate_id(benchmark), + ", ".join( + f"{percentile:.2f}" + for percentile in benchmark.request_latency_distribution.percentiles( + [1, 5, 10, 50, 90, 95, 99] + ) + ), + ", ".join( + f"{percentile * 1000:.1f}" + for percentile in benchmark.ttft_distribution.percentiles( + [1, 5, 10, 50, 90, 95, 99] + ) + ), + ", ".join( + f"{percentile * 1000:.1f}" + for percentile in benchmark.itl_distribution.percentiles( + [1, 5, 10, 50, 90, 95, 99] + ) + ), + ) + logger.debug("Created distribution performance summary table for the report.") + return table + + +def _create_benchmark_report_summary(report: TextGenerationBenchmarkReport) -> Table: + """ + Create a summary table for a benchmark report. + + :param report: The benchmark report to summarize. + :type report: TextGenerationBenchmarkReport + :return: A rich Table object summarizing overall performance. + :rtype: Table + """ + table = Table( + "Benchmark", + "Requests per Second", + "Request Latency", + "Time to First Token", + "Inter Token Latency", + "Output Token Throughput", + title="[magenta]Performance Summary by Benchmark[/magenta]", + title_style="bold", + title_justify="left", + show_header=True, + ) + + for benchmark in report.benchmarks_sorted: + table.add_row( + _benchmark_rate_id(benchmark), + f"{benchmark.completed_request_rate:.2f} req/sec", + f"{benchmark.request_latency:.2f} sec", + f"{benchmark.time_to_first_token:.2f} ms", + f"{benchmark.inter_token_latency:.2f} ms", + f"{benchmark.output_token_throughput:.2f} tokens/sec", + ) + logger.debug("Created overall performance summary table for the report.") + return table class GuidanceReport(Serializable): """ A class to manage the guidance reports that include the benchmarking details, potentially across multiple runs, for saving and loading from disk. + + :param benchmarks: The list of benchmarking reports. + :type benchmarks: List[TextGenerationBenchmarkReport] """ benchmarks: List[TextGenerationBenchmarkReport] = Field( default_factory=list, description="The list of benchmark reports." ) + + def print( + self, save_path: Optional[str] = None, continual_refresh: bool = False + ) -> None: + """ + Print the guidance report to the console. + + :param save_path: Optional path to save the report to disk. + :type save_path: Optional[str] + :param continual_refresh: Whether to continually refresh the report. + :type continual_refresh: bool + :return: None + """ + logger.info("Printing guidance report to console with save_path={}", save_path) + report_viz = Panel( + Group( + *[ + Panel( + Group( + _create_benchmark_report_details(benchmark), + "", + _create_benchmark_report_requests_summary(benchmark), + "", + _create_benchmark_report_data_tokens_summary(benchmark), + "", + _create_benchmark_report_dist_perf_summary(benchmark), + "", + _create_benchmark_report_summary(benchmark), + ), + title=( + f"[bold magenta]Benchmark Report " + f"{index + 1}[/bold magenta]" + ), + expand=True, + title_align="left", + ) + for index, benchmark in enumerate(self.benchmarks) + ], + ), + title=( + "[bold cyan]GuideLLM Benchmarks Report[/bold cyan] [italic]" + f"({save_path})[/italic]" + ), + expand=True, + title_align="left", + ) + console = Console() + + if continual_refresh: + logger.info("Starting live report with continual refresh.") + with Live(report_viz, refresh_per_second=1, console=console) as live: + while True: + live.update(report_viz) + time.sleep(1) + else: + console.print(report_viz) + + logger.info("Guidance report printing completed.") diff --git a/src/guidellm/core/result.py b/src/guidellm/core/result.py index 4b04551..f218784 100644 --- a/src/guidellm/core/result.py +++ b/src/guidellm/core/result.py @@ -174,9 +174,6 @@ class TextGenerationError(Serializable): description="The error message that occurred during text generation.", ) - def model_post_init(self, _: Any): - logger.error(f"Text generation error occurred: {self.message}") - class RequestConcurrencyMeasurement(Serializable): """ @@ -191,7 +188,7 @@ class RequestConcurrencyMeasurement(Serializable): class TextGenerationBenchmark(Serializable): """ - A class to represent a benchmark of text generation requests + A class to represent a report of text generation requests (results and errors) for generative AI workloads. This is a set of results and errors for a specific mode and rate. """ @@ -244,6 +241,55 @@ def error_count(self) -> int: """ return len(self.errors) + @property + def total_count(self) -> int: + """ + Get the total number of requests in the result. + + :return: The total number of requests. + :rtype: int + """ + return self.request_count + self.error_count + + @property + def start_time(self) -> Optional[float]: + """ + Get the start time of the first request in the result. + + :return: The start time of the first request. + :rtype: Optional[float] + """ + if not self.results: + return None + + return self.results[0].start_time + + @property + def end_time(self) -> Optional[float]: + """ + Get the end time of the last request in the result. + + :return: The end time of the last request. + :rtype: Optional[float] + """ + if not self.results: + return None + + return self.results[-1].end_time + + @property + def duration(self) -> float: + """ + Get the duration of the result in seconds. + + :return: The duration of the result. + :rtype: float + """ + if not self.results or not self.start_time or not self.end_time: + return 0.0 + + return self.end_time - self.start_time + @property def completed_request_rate(self) -> float: """ @@ -252,15 +298,130 @@ def completed_request_rate(self) -> float: :return: The rate of requests per second. :rtype: float """ + if not self.results or not self.duration: + return 0.0 + + return len(self.results) / self.duration + + @property + def request_latency(self) -> float: + """ + Get the average request latency in seconds. + + :return: The average request latency in seconds. + :rtype: float + """ if not self.results: return 0.0 - if self.results[0].start_time is None or self.results[-1].end_time is None: - raise ValueError("Start time and End time are not defined") + return self.request_latency_distribution.mean - time_diff = self.results[-1].end_time - self.results[0].start_time + @property + def request_latency_distribution(self) -> Distribution: + """ + Get the distribution of request latencies. - return len(self.results) / time_diff + :return: The distribution of request latencies. + :rtype: Distribution + """ + return Distribution( + data=[ + result.end_time - result.start_time + for result in self.results + if result.end_time is not None and result.start_time is not None + ] + ) + + @property + def time_to_first_token(self) -> float: + """ + Get the time taken to decode the first token in milliseconds. + + :return: The time taken to decode the first token in milliseconds. + :rtype: float + """ + if not self.results: + return 0.0 + + return 1000 * self.ttft_distribution.mean + + @property + def ttft_distribution(self) -> Distribution: + """ + Get the distribution of time taken to decode the first token. + + :return: The distribution of time taken to decode the first token. + :rtype: Distribution + """ + return Distribution( + data=[ + result.first_token_time + for result in self.results + if result.first_token_time is not None + ] + ) + + @property + def inter_token_latency(self) -> float: + """ + Get the average time between tokens in milliseconds. + + :return: The average time between tokens. + :rtype: float + """ + if not self.results: + return 0.0 + + return 1000 * self.itl_distribution.mean + + @property + def itl_distribution(self) -> Distribution: + """ + Get the distribution of time between tokens. + + :return: The distribution of time between tokens. + :rtype: Distribution + """ + return Distribution( + data=[ + decode for result in self.results for decode in result.decode_times.data + ] + ) + + @property + def output_token_throughput(self) -> float: + """ + Get the average token throughput in tokens per second. + + :return: The average token throughput. + :rtype: float + """ + if not self.results or not self.duration: + return 0.0 + + total_tokens = sum(result.output_token_count for result in self.results) + + return total_tokens / self.duration + + @property + def prompt_token_distribution(self) -> Distribution: + """ + Get the distribution of prompt token counts. + + :return: The distribution of prompt token counts. + :rtype: Distribution + """ + return Distribution(data=[result.prompt_token_count for result in self.results]) + + @property + def output_token_distribution(self) -> Distribution: + """ + Get the distribution of output token counts. + + :return: The distribution of output token counts. + :rtype: Distribution + """ + return Distribution(data=[result.output_token_count for result in self.results]) @property def overloaded(self) -> bool: @@ -321,7 +482,7 @@ def request_completed( if isinstance(result, TextGenerationError): is_error = True self.errors.append(result) - logger.warning( + logger.info( "Text generation request resulted in error: {}", result.message, ) diff --git a/src/guidellm/executor/__init__.py b/src/guidellm/executor/__init__.py index 715363c..d5858d0 100644 --- a/src/guidellm/executor/__init__.py +++ b/src/guidellm/executor/__init__.py @@ -1,8 +1,9 @@ -from .base import Executor +from .base import Executor, ExecutorResult from .profile_generator import Profile, ProfileGenerationMode, ProfileGenerator __all__ = [ "Executor", + "ExecutorResult", "Profile", "ProfileGenerationMode", "ProfileGenerator", diff --git a/src/guidellm/executor/base.py b/src/guidellm/executor/base.py index d6979a0..865ab30 100644 --- a/src/guidellm/executor/base.py +++ b/src/guidellm/executor/base.py @@ -1,11 +1,15 @@ from dataclasses import dataclass -from typing import AsyncGenerator, List, Optional, Union +from typing import AsyncGenerator, Optional, Sequence, Union from loguru import logger from guidellm.backend import Backend from guidellm.core import TextGenerationBenchmarkReport -from guidellm.executor.profile_generator import ProfileGenerationMode, ProfileGenerator +from guidellm.executor.profile_generator import ( + Profile, + ProfileGenerationMode, + ProfileGenerator, +) from guidellm.request import RequestGenerator from guidellm.scheduler import Scheduler, SchedulerResult @@ -23,7 +27,7 @@ class ExecutorResult: :type count_total: int :param count_completed: Number of completed profiles. :type count_completed: int - :param report: A benchmark report for text generation. + :param report: A report report for text generation. :type report: TextGenerationBenchmarkReport :param scheduler_result: Optional scheduler result for the last task. :type scheduler_result: Optional[SchedulerResult] @@ -32,8 +36,11 @@ class ExecutorResult: completed: bool count_total: int count_completed: int + generation_modes: Sequence[ProfileGenerationMode] report: TextGenerationBenchmarkReport scheduler_result: Optional[SchedulerResult] = None + current_index: Optional[int] = None + current_profile: Optional[Profile] = None class Executor: @@ -51,10 +58,10 @@ class Executor: :param rate: The list of rates for load generation, or None. :type rate: Optional[List[float]] :param max_number: Maximum number of requests to generate for the scheduler - (a single benchmark run), or None. + (a single report run), or None. :type max_number: Optional[int] :param max_duration: Maximum duration for generating requests for the scheduler, - (a single benchmark run), or None. + (a single report run), or None. :type max_duration: Optional[float] """ @@ -63,7 +70,7 @@ def __init__( backend: Backend, request_generator: RequestGenerator, mode: ProfileGenerationMode = "sweep", - rate: Optional[Union[float, List[float]]] = None, + rate: Optional[Union[float, Sequence[float]]] = None, max_number: Optional[int] = None, max_duration: Optional[float] = None, ): @@ -133,17 +140,29 @@ async def run(self) -> AsyncGenerator[ExecutorResult, None]: """ report = TextGenerationBenchmarkReport() report.args = { + # backend args + "backend_type": self.backend.type_, + "target": self.backend.target, + "model": self.backend.model, + # data args + "data_type": self.request_generator.type_, + "data": self.request_generator.source, + "tokenizer": self.request_generator.tokenizer.name_or_path, + # rate args "mode": self.profile_generator.mode, "rate": self.profile_generator.rates, + # limits args "max_number": self.max_number, "max_duration": self.max_duration, } + profile_index = -1 logger.info("Starting Executor run") yield ExecutorResult( completed=False, count_total=len(self.profile_generator), count_completed=0, + generation_modes=self.profile_generator.profile_generation_modes, report=report, ) @@ -154,9 +173,10 @@ async def run(self) -> AsyncGenerator[ExecutorResult, None]: worker=self.backend, mode=profile.load_gen_mode, rate=profile.load_gen_rate, - max_number=self.max_number, + max_number=self.max_number or profile.args.get("max_number", None), max_duration=self.max_duration, ) + profile_index += 1 logger.info( "Scheduling tasks with mode: {}, rate: {}", @@ -176,8 +196,11 @@ async def run(self) -> AsyncGenerator[ExecutorResult, None]: completed=False, count_total=len(self.profile_generator), count_completed=len(report.benchmarks), + generation_modes=self.profile_generator.profile_generation_modes, report=report, scheduler_result=scheduler_result, + current_index=profile_index, + current_profile=profile, ) logger.info("Executor run completed") @@ -185,5 +208,6 @@ async def run(self) -> AsyncGenerator[ExecutorResult, None]: completed=True, count_total=len(self.profile_generator), count_completed=len(report.benchmarks), + generation_modes=self.profile_generator.profile_generation_modes, report=report, ) diff --git a/src/guidellm/executor/profile_generator.py b/src/guidellm/executor/profile_generator.py index 8a36db0..703ea05 100644 --- a/src/guidellm/executor/profile_generator.py +++ b/src/guidellm/executor/profile_generator.py @@ -1,7 +1,8 @@ -from typing import Dict, List, Literal, Optional, Union, get_args +from typing import Any, Dict, Literal, Optional, Sequence, Union, get_args import numpy as np from loguru import logger +from pydantic import Field from guidellm.config import settings from guidellm.core import TextGenerationBenchmark, TextGenerationBenchmarkReport @@ -27,10 +28,13 @@ class Profile(Serializable): :type load_gen_mode: LoadGenerationMode :param load_gen_rate: The rate of load generation, if applicable. :type load_gen_rate: Optional[float] + :param args: Additional arguments for the profile. + :type args: Optional[Dict[str, Any]] """ load_gen_mode: LoadGenerationMode load_gen_rate: Optional[float] = None + args: Dict[str, Any] = Field(default_factory=dict) class ProfileGenerator: @@ -40,13 +44,13 @@ class ProfileGenerator: :param mode: The mode for profile generation (e.g., sweep, synchronous). :type mode: ProfileGenerationMode :param rate: The rate(s) for load generation; could be a float or list of floats. - :type rate: Optional[Union[float, List[float]]] + :type rate: Optional[Union[float, Sequence[float]]] """ def __init__( self, mode: ProfileGenerationMode, - rate: Optional[Union[float, List[float]]] = None, + rate: Optional[Union[float, Sequence[float]]] = None, ): if mode not in get_args(ProfileGenerationMode): err = ValueError( @@ -69,7 +73,7 @@ def __init__( err = ValueError(f"Rates are required for {self._mode} mode") logger.error(err) raise err - self._rates = rate if isinstance(rate, list) else [rate] + self._rates = rate if isinstance(rate, Sequence) else [rate] for rt in self._rates: if rt <= 0: @@ -89,7 +93,7 @@ def __len__(self) -> int: :rtype: int """ if self._mode == "sweep": - return settings.num_sweep_profiles + return settings.num_sweep_profiles + 2 if self._mode in ("throughput", "synchronous"): return 1 @@ -110,12 +114,12 @@ def mode(self) -> ProfileGenerationMode: return self._mode @property - def rates(self) -> Optional[List[float]]: + def rates(self) -> Optional[Sequence[float]]: """ Returns the list of rates for load generation, if any. - :return: List of rates or None if not applicable. - :rtype: Optional[List[float]] + :return: Sequence of rates or None if not applicable. + :rtype: Optional[Sequence[float]] """ return self._rates @@ -129,11 +133,35 @@ def generated_count(self) -> int: """ return self._generated_count + @property + def profile_generation_modes(self) -> Sequence[ProfileGenerationMode]: + """ + Return the list of profile modes to be run in the report. + + :return: Sequence of profile modes to be run in the report. + :rtype: Sequence[ProfileGenerationMode] + """ + if self._mode == "sweep": + return ["synchronous", "throughput"] + ["constant"] * ( # type: ignore # noqa: PGH003 + settings.num_sweep_profiles + ) + + if self._mode in ["throughput", "synchronous"]: + return [self._mode] + + if self._rates is None: + raise ValueError(f"Rates are required for {self._mode} mode") + + if self._mode in ["constant", "poisson"]: + return [self._mode] * len(self._rates) + + raise ValueError(f"Invalid mode: {self._mode}") + def next(self, current_report: TextGenerationBenchmarkReport) -> Optional[Profile]: """ Generates the next profile based on the current mode and report. - :param current_report: The current benchmark report. + :param current_report: The current report report. :type current_report: TextGenerationBenchmarkReport :return: The generated profile or None if no more profiles. :rtype: Optional[Profile] @@ -184,7 +212,7 @@ def next(self, current_report: TextGenerationBenchmarkReport) -> Optional[Profil @staticmethod def create_fixed_rate_profile( - index: int, mode: ProfileGenerationMode, rates: List[float] + index: int, mode: ProfileGenerationMode, rates: Sequence[float] ) -> Optional[Profile]: """ Creates a profile with a fixed rate. @@ -194,7 +222,7 @@ def create_fixed_rate_profile( :param mode: The mode for profile generation (e.g., constant, poisson). :type mode: ProfileGenerationMode :param rates: The list of rates for load generation. - :type rates: List[float] + :type rates: Sequence[float] :return: The generated profile or None if index is out of range. :rtype: Optional[Profile] """ @@ -273,42 +301,43 @@ def create_sweep_profile( :param index: The index of the profile to create. :type index: int - :param sync_benchmark: The synchronous benchmark data. + :param sync_benchmark: The synchronous report data. :type sync_benchmark: Optional[TextGenerationBenchmark] - :param throughput_benchmark: The throughput benchmark data. + :param throughput_benchmark: The throughput report data. :type throughput_benchmark: Optional[TextGenerationBenchmark] :return: The generated profile or None if index is out of range. :rtype: Optional[Profile] """ + if index < 0 or index >= settings.num_sweep_profiles + 2: + return None + if index == 0: return ProfileGenerator.create_synchronous_profile(0) if not sync_benchmark: - err = ValueError("Synchronous benchmark is required for sweep mode") + err = ValueError("Synchronous report is required for sweep mode") logger.error(err) raise err if index == 1: - return ProfileGenerator.create_throughput_profile(0) + throughput_profile: Profile = ProfileGenerator.create_throughput_profile(0) # type: ignore # noqa: PGH003 + # set the max number of requests to 5 times the number of requests + # incase it is not set for the sweep to limit the number of requests + throughput_profile.args = {"max_number": sync_benchmark.request_count * 5} + return throughput_profile if not throughput_benchmark: - err = ValueError("Throughput benchmark is required for sweep mode") + err = ValueError("Throughput report is required for sweep mode") logger.error(err) raise err min_rate = sync_benchmark.completed_request_rate max_rate = throughput_benchmark.completed_request_rate intermediate_rates = list( - np.linspace(min_rate, max_rate, settings.num_sweep_profiles) - ) + np.linspace(min_rate, max_rate, settings.num_sweep_profiles + 1) + )[1:] - profile = ( - Profile( - load_gen_mode="constant", - load_gen_rate=intermediate_rates[index - 1], - ) - if index < len(intermediate_rates) - else None + return Profile( + load_gen_mode="constant", + load_gen_rate=intermediate_rates[index - 2], ) - logger.debug("Created sweep profile: {}", profile) - return profile diff --git a/src/guidellm/main.py b/src/guidellm/main.py index d754d6a..ddb9a4c 100644 --- a/src/guidellm/main.py +++ b/src/guidellm/main.py @@ -1,11 +1,12 @@ import asyncio +from typing import get_args import click from loguru import logger from guidellm.backend import Backend from guidellm.core import GuidanceReport, TextGenerationBenchmarkReport -from guidellm.executor import Executor +from guidellm.executor import Executor, ProfileGenerationMode from guidellm.logger import configure_logger from guidellm.request import ( EmulatedRequestGenerator, @@ -13,6 +14,7 @@ TransformersDatasetRequestGenerator, ) from guidellm.request.base import RequestGenerator +from guidellm.utils import BenchmarkReportProgress @click.command() @@ -26,7 +28,7 @@ @click.option("--port", type=str, default=None, help="Port for benchmarking") @click.option( "--backend", - type=click.Choice(["test", "openai_server"]), + type=click.Choice(["openai_server"]), default="openai_server", help="Backend type for benchmarking", ) @@ -46,8 +48,8 @@ ) @click.option( "--rate-type", - type=click.Choice(["sweep", "synchronous", "throughput", "constant", "poisson"]), - default="synchronous", + type=click.Choice(get_args(ProfileGenerationMode)), + default="sweep", help="Type of rate generation for benchmarking", ) @click.option( @@ -73,7 +75,7 @@ "--output-path", type=str, default="benchmark_report.json", - help="Path to save benchmark report to", + help="Path to save report report to", ) def main( target, @@ -123,7 +125,7 @@ def main( backend=backend, request_generator=request_generator, mode=rate_type, - rate=rate, + rate=rate if rate_type in ("constant", "poisson") else None, max_number=max_requests, max_duration=max_seconds, ) @@ -144,19 +146,40 @@ def main( guidance_report = GuidanceReport() guidance_report.benchmarks.append(report) guidance_report.save_file(output_path) - - print("Guidance Report Complete:") # noqa: T201 - print(guidance_report) # noqa: T201 + guidance_report.print(output_path, continual_refresh=True) async def _run_executor_for_result(executor: Executor) -> TextGenerationBenchmarkReport: report = None + progress = BenchmarkReportProgress() + started = False async for result in executor.run(): + if not started: + progress.start(result.generation_modes) # type: ignore # noqa: PGH003 + started = True + + if result.current_index is not None: + description = f"{result.current_profile.load_gen_mode}" # type: ignore # noqa: PGH003 + if result.current_profile.load_gen_mode in ("constant", "poisson"): # type: ignore # noqa: PGH003 + description += f"@{result.current_profile.load_gen_rate:.2f} req/s" # type: ignore # noqa: PGH003 + + progress.update_benchmark( + index=result.current_index, + description=description, + completed=result.scheduler_result.completed, # type: ignore # noqa: PGH003 + completed_count=result.scheduler_result.count_completed, # type: ignore # noqa: PGH003 + completed_total=result.scheduler_result.count_total, # type: ignore # noqa: PGH003 + start_time=result.scheduler_result.benchmark.start_time, # type: ignore # noqa: PGH003 + req_per_sec=result.scheduler_result.benchmark.completed_request_rate, # type: ignore # noqa: PGH003 + ) + if result.completed: report = result.report break + progress.finish() + if not report: raise ValueError("No report generated by executor") diff --git a/src/guidellm/request/__init__.py b/src/guidellm/request/__init__.py index 95fe230..4feca91 100644 --- a/src/guidellm/request/__init__.py +++ b/src/guidellm/request/__init__.py @@ -1,4 +1,4 @@ -from .base import RequestGenerator +from .base import GenerationMode, RequestGenerator from .emulated import EmulatedConfig, EmulatedRequestGenerator from .file import FileRequestGenerator from .transformers import TransformersDatasetRequestGenerator @@ -7,6 +7,7 @@ "EmulatedConfig", "EmulatedRequestGenerator", "FileRequestGenerator", + "GenerationMode", "RequestGenerator", "TransformersDatasetRequestGenerator", ] diff --git a/src/guidellm/request/base.py b/src/guidellm/request/base.py index 9a897bb..52935b7 100644 --- a/src/guidellm/request/base.py +++ b/src/guidellm/request/base.py @@ -24,6 +24,10 @@ class RequestGenerator(ABC): """ A base class for request generators that generate result requests. + :param type_: The type of the request generator. + :type type_: str + :param source: The data source for the request generator. + :type source: str :param tokenizer: The tokenizer instance or the name/config to use for tokenizing prompts. :type tokenizer: Union[str, PreTrainedTokenizer] @@ -35,10 +39,14 @@ class RequestGenerator(ABC): def __init__( self, + type_: str, + source: str, tokenizer: Optional[Union[str, PreTrainedTokenizer]] = None, mode: GenerationMode = "async", async_queue_size: int = 50, ): + self._type = type_ + self._source = source self._async_queue_size: int = async_queue_size self._mode: str = mode self._queue: Queue = Queue(maxsize=async_queue_size) @@ -101,6 +109,26 @@ def __iter__(self) -> Iterator[TextGenerationRequest]: while not self._stop_event.is_set(): yield self.create_item() + @property + def type_(self) -> str: + """ + Get the type of the request generator. + + :return: The type of the request generator. + :rtype: str + """ + return self._type + + @property + def source(self) -> str: + """ + Get the data source for the request generator. + + :return: The data source. + :rtype: str + """ + return self._source + @property def tokenizer(self) -> PreTrainedTokenizer: """ diff --git a/src/guidellm/request/emulated.py b/src/guidellm/request/emulated.py index b1d7f42..c85e9d7 100644 --- a/src/guidellm/request/emulated.py +++ b/src/guidellm/request/emulated.py @@ -327,7 +327,13 @@ def __init__( # NOTE: Must be after all the parameters since the queue population # function requires attributes above - super().__init__(tokenizer, mode, async_queue_size) + super().__init__( + type_="emulated", + source=str(config), + tokenizer=tokenizer, + mode=mode, + async_queue_size=async_queue_size, + ) def create_item(self) -> TextGenerationRequest: """ diff --git a/src/guidellm/request/file.py b/src/guidellm/request/file.py index c165619..8460bd2 100644 --- a/src/guidellm/request/file.py +++ b/src/guidellm/request/file.py @@ -43,7 +43,13 @@ def __init__( # NOTE: Must be after all the parameters since the queue population # function requires attributes above - super().__init__(tokenizer, mode, async_queue_size) + super().__init__( + type_="file", + source=str(path), + tokenizer=tokenizer, + mode=mode, + async_queue_size=async_queue_size, + ) def create_item(self) -> TextGenerationRequest: """ diff --git a/src/guidellm/request/transformers.py b/src/guidellm/request/transformers.py index 49d8388..eaab862 100644 --- a/src/guidellm/request/transformers.py +++ b/src/guidellm/request/transformers.py @@ -65,7 +65,13 @@ def __init__( # NOTE: Must be after all the parameters since the queue population # function requires attributes above - super().__init__(tokenizer, mode, async_queue_size) + super().__init__( + type_="transformers_dataset", + source=str(dataset), + tokenizer=tokenizer, + mode=mode, + async_queue_size=async_queue_size, + ) def create_item(self) -> TextGenerationRequest: """ diff --git a/src/guidellm/scheduler/base.py b/src/guidellm/scheduler/base.py index 1474c12..602166b 100644 --- a/src/guidellm/scheduler/base.py +++ b/src/guidellm/scheduler/base.py @@ -31,7 +31,7 @@ class SchedulerResult: :type count_total: int :param count_completed: Number of tasks that have been completed so far. :type count_completed: int - :param benchmark: Benchmark data for the task execution. + :param report: Benchmark data for the task execution. :type benchmark: TextGenerationBenchmark :param current_result: The result of the current request, if any. :type current_result: Optional[Union[TextGenerationResult, Exception]] @@ -94,17 +94,17 @@ def __init__( logger.error(err) raise err - if max_number is None and max_duration is None: + if not max_number and not max_duration: err = ValueError("Either max_number or max_duration must be specified") logger.error(err) raise err - if max_number is not None and max_number <= 0: + if max_number and max_number <= 0: err = ValueError(f"max_number must be > 0, given: {max_number}") logger.error(err) raise err - if max_duration is not None and max_duration <= 0: + if max_duration and max_duration <= 0: err = ValueError(f"max_duration must be > 0, given: {max_duration}") logger.error(err) raise err @@ -196,9 +196,9 @@ def load_generator(self) -> LoadGenerator: @property def benchmark_mode(self) -> Literal["asynchronous", "synchronous", "throughput"]: """ - The benchmark mode for the scheduler. + The report mode for the scheduler. - :return: The benchmark mode. + :return: The report mode. :rtype: Literal["asynchronous", "synchronous", "throughput"] """ if self._mode == "synchronous": @@ -223,38 +223,38 @@ async def run(self) -> AsyncGenerator[SchedulerResult, None]: start_time = time.time() end_time = start_time + self.max_duration if self.max_duration else math.inf max_number = float(self.max_number) if self.max_number else math.inf - runner = self._run_async if self._mode == "asynchronous" else self._run_sync - - def _get_count_total(): - return ( - self.max_number - if self.max_number - else round(self.max_duration) - if self.max_duration - else 0 - ) - - def _get_count_completed(): - return min( - benchmark.request_count + benchmark.error_count - if self.max_number - else round(time.time() - start_time), - _get_count_total(), - ) + runner = self._run_sync if self._mode == "synchronous" else self._run_async + count_total = ( + self.max_number + if self.max_number + else round(self.max_duration) + if self.max_duration + else 0 + ) # yield initial result for progress tracking yield SchedulerResult( completed=False, - count_total=_get_count_total(), - count_completed=_get_count_completed(), + count_total=count_total, + count_completed=0, benchmark=benchmark, ) + run_count = 0 async for res in runner(benchmark, end_time, max_number): + run_count += 1 + count_completed = ( + min(run_count, self.max_number) + if self.max_number + else round(time.time() - start_time) + if self.max_duration + else 0 + ) + yield SchedulerResult( completed=False, - count_total=_get_count_total(), - count_completed=_get_count_completed(), + count_total=count_total, + count_completed=count_completed, benchmark=benchmark, current_result=res, ) @@ -263,8 +263,14 @@ def _get_count_completed(): yield SchedulerResult( completed=True, - count_total=_get_count_total(), - count_completed=_get_count_completed(), + count_total=count_total, + count_completed=( + benchmark.request_count + benchmark.error_count + if self.max_number + else round(time.time() - start_time) + if self.max_duration + else 0 + ), benchmark=benchmark, ) @@ -284,25 +290,26 @@ async def _run_sync( ) benchmark.request_started() result = await self._submit_task_coroutine(request, submit_at, end_time) - benchmark.request_completed(result) - logger.debug("Request completed with output: {}", result) - yield result + if result is not None: + benchmark.request_completed(result) + logger.debug("Request completed with output: {}", result) + yield result async def _run_async( self, benchmark: TextGenerationBenchmark, end_time: float, max_number: float ) -> AsyncGenerator[Union[TextGenerationResult, TextGenerationError], None]: tasks = [] - pending = 0 + completed = 0 for index, (request, submit_at) in enumerate( zip(self.generator, self.load_generator.times()) ): + while (index + 1 - completed) >= settings.max_concurrency: + await asyncio.sleep(0.1) + if index >= max_number or time.time() >= end_time or submit_at >= end_time: break - while pending >= settings.max_concurrency: - await asyncio.sleep(0.1) - logger.debug( "Running asynchronous request={} at submit_at={}", request, @@ -310,11 +317,13 @@ async def _run_async( ) def _completed(_task: asyncio.Task) -> None: - nonlocal pending - pending -= 1 + nonlocal completed + completed += 1 _res = _task.result() - benchmark.request_completed(_res) - logger.debug("Request completed: {}", _res) + + if _res: + benchmark.request_completed(_res) + logger.debug("Request completed: {}", _res) benchmark.request_started() task = asyncio.create_task( @@ -322,21 +331,31 @@ def _completed(_task: asyncio.Task) -> None: ) task.add_done_callback(_completed) tasks.append(task) - pending += 1 # noqa: SIM113 + + # release control to the event loop for other tasks + await asyncio.sleep(0.001) for compl_task in asyncio.as_completed(tasks): - yield await compl_task + task_res = await compl_task + if task_res is not None: + yield task_res async def _submit_task_coroutine( self, request: TextGenerationRequest, submit_at: float, end_time: float - ) -> Union[TextGenerationResult, TextGenerationError]: - if submit_at > end_time: - raise asyncio.TimeoutError( - f"Request submission time {submit_at} is " - f"greater than end time {end_time}" - ) - + ) -> Optional[Union[TextGenerationResult, TextGenerationError]]: try: + if submit_at > end_time: + logger.info( + "Request {} submission time {} is greater than end time {}", + request, + submit_at, + end_time, + ) + raise asyncio.TimeoutError( + f"Request submission time {submit_at} " + f"is greater than end time {end_time}" + ) + if submit_at > time.time(): await asyncio.sleep(submit_at - time.time()) @@ -348,7 +367,7 @@ async def _submit_task_coroutine( except asyncio.TimeoutError as exc: logger.info("Request {} timed out: {}", request, exc) - return TextGenerationError(request=request, message=str(exc)) + return None except Exception as exc: # noqa: BLE001 logger.warning("Request {} failed: {}", request, exc) diff --git a/src/guidellm/utils/__init__.py b/src/guidellm/utils/__init__.py index 60668cf..2fdd8ca 100644 --- a/src/guidellm/utils/__init__.py +++ b/src/guidellm/utils/__init__.py @@ -1,4 +1,5 @@ from .injector import create_report, inject_data +from .progress import BenchmarkReportProgress from .text import ( clean_text, filter_text, @@ -19,6 +20,7 @@ ) __all__ = [ + "BenchmarkReportProgress", "clean_text", "create_report", "filter_text", diff --git a/src/guidellm/utils/progress.py b/src/guidellm/utils/progress.py new file mode 100644 index 0000000..5c7a845 --- /dev/null +++ b/src/guidellm/utils/progress.py @@ -0,0 +1,196 @@ +from datetime import datetime +from typing import List + +from loguru import logger +from rich.console import Group +from rich.live import Live +from rich.panel import Panel +from rich.progress import ( + BarColumn, + Progress, + SpinnerColumn, + TaskID, + TaskProgressColumn, + TextColumn, + TimeElapsedColumn, + TimeRemainingColumn, +) + +__all__ = ["BenchmarkReportProgress"] + + +class BenchmarkReportProgress: + """ + Manages the progress display for benchmarks and report generation using Rich. + + This class provides a visual representation of the benchmarking process + and report generation using Rich's progress bars and panels. + """ + + def __init__(self): + """ + Initialize the BenchmarkReportProgress with default settings. + + This method sets up the progress displays for both individual benchmarks + and the overall report, as well as initializing internal task management + structures. + """ + logger.info("Initializing BenchmarkReportProgress instance") + + self.benchmarks_progress = Progress( + TextColumn("[{task.fields[start_time_str]}]"), + SpinnerColumn(), + TaskProgressColumn(), + TextColumn("{task.description}"), + TextColumn(" "), + TextColumn( + "[bold cyan]({task.fields[req_per_sec]} req/sec avg)[/bold cyan]" + ), + ) + self.benchmarks_panel = Panel( + self.benchmarks_progress, + title="Benchmarks", + title_align="left", + expand=True, + ) + self.report_progress = Progress( + SpinnerColumn(), + TextColumn("Generating report..."), + BarColumn(bar_width=None), + TextColumn( + "({task.fields[completed_benchmarks]}/{task.fields[total_benchmarks]})" + ), + TextColumn("["), + TimeElapsedColumn(), + TextColumn("<"), + TimeRemainingColumn(), + TextColumn("]"), + ) + self.render_group = Group(self.benchmarks_panel, self.report_progress) + self.live = Live(self.render_group, redirect_stdout=True, redirect_stderr=True) + + self.report_task: TaskID = None # type: ignore # noqa: PGH003 + self.benchmark_tasks: List[TaskID] = [] + self.benchmark_tasks_started: List[bool] = [] + self.benchmark_tasks_completed: List[bool] = [] + self.benchmark_tasks_progress: List[float] = [] + + def start(self, task_descriptions: List[str]) -> None: + """ + Starts the live progress display and initializes benchmark tasks. + + :param task_descriptions: List of descriptions for each benchmark task. + :type task_descriptions: List[str] + """ + logger.info( + "Starting BenchmarkReportProgress with task descriptions: {}", + task_descriptions, + ) + self.live.start() + + for task_description in task_descriptions: + logger.debug("Adding task with description: {}", task_description) + task_id = self.benchmarks_progress.add_task( + task_description, + start=False, + total=None, + start_time_str="--:--:--", + req_per_sec="#.##", + ) + self.benchmark_tasks.append(task_id) + self.benchmark_tasks_started.append(False) + self.benchmark_tasks_completed.append(False) + self.benchmark_tasks_progress.append(0) + + self.report_task = self.report_progress.add_task( + "", + total=len(self.benchmark_tasks) * 100, # 100 points per report + completed_benchmarks=0, + total_benchmarks=len(task_descriptions), + ) + logger.info("Initialized {} benchmark tasks", len(task_descriptions)) + + def update_benchmark( + self, + index: int, + description: str, + completed: bool, + completed_count: int, + completed_total: int, + start_time: float, + req_per_sec: float, + ) -> None: + """ + Updates the progress of a specific benchmark task. + + :param index: Index of the benchmark task to update. + :type index: int + :param description: Description of the current benchmark task. + :type description: str + :param completed: Flag indicating if the benchmark is completed. + :type completed: bool + :param completed_count: Number of completed operations for the task. + :type completed_count: int + :param completed_total: Total number of operations for the task. + :type completed_total: int + :param start_time: Start time of the benchmark in timestamp format. + :type start_time: float + :param req_per_sec: Average requests per second. + :type req_per_sec: float + :raises ValueError: If trying to update a completed benchmark. + """ + if self.benchmark_tasks_completed[index]: + err = ValueError(f"Benchmark {index} already completed") + logger.error("Error updating benchmark: {}", err) + raise err + + if not self.benchmark_tasks_started[index]: + self.benchmark_tasks_started[index] = True + self.benchmarks_progress.start_task(self.benchmark_tasks[index]) + logger.info("Starting benchmark task at index {}", index) + + if completed: + self.benchmark_tasks_completed[index] = True + self.benchmark_tasks_progress[index] = 100 + self.benchmarks_progress.stop_task(self.benchmark_tasks[index]) + logger.info("Completed benchmark task at index {}", index) + + self.benchmark_tasks_progress[index] = completed_count / completed_total * 100 + self.benchmarks_progress.update( + self.benchmark_tasks[index], + description=description, + total=completed_total, + completed=completed_count if not completed else completed_total, + req_per_sec=(f"{req_per_sec:.2f}" if req_per_sec else "#.##"), + start_time_str=datetime.fromtimestamp(start_time).strftime("%H:%M:%S") + if start_time + else "--:--:--", + ) + logger.debug( + "Updated benchmark task at index {}: {}% complete", + index, + self.benchmark_tasks_progress[index], + ) + self.report_progress.update( + self.report_task, + total=len(self.benchmark_tasks) * 100, + completed=sum(self.benchmark_tasks_progress), + completed_benchmarks=sum(self.benchmark_tasks_completed), + total_benchmarks=len(self.benchmark_tasks), + ) + + def finish(self) -> None: + """ + Marks the overall report task as finished and stops the live display. + """ + logger.info("Finishing BenchmarkReportProgress") + self.report_progress.update( + self.report_task, + total=len(self.benchmark_tasks) * 100, + completed=len(self.benchmark_tasks) * 100, + completed_benchmarks=len(self.benchmark_tasks), + total_benchmarks=len(self.benchmark_tasks), + ) + self.report_progress.stop_task(self.report_task) + self.live.stop() + logger.info("BenchmarkReportProgress finished and live display stopped") diff --git a/tests/dummy/services/requests.py b/tests/dummy/services/requests.py index 3bb8152..c502318 100644 --- a/tests/dummy/services/requests.py +++ b/tests/dummy/services/requests.py @@ -1,5 +1,7 @@ +from typing import Optional + from guidellm.core import TextGenerationRequest -from guidellm.request import RequestGenerator +from guidellm.request import GenerationMode, RequestGenerator class TestRequestGenerator(RequestGenerator): @@ -8,5 +10,19 @@ class TestRequestGenerator(RequestGenerator): The purpose - to be used for testing. """ + def __init__( + self, + tokenizer: Optional[str] = None, + mode: GenerationMode = "async", + async_queue_size: int = 50, + ): + super().__init__( + type_="test", + source="test", + tokenizer=tokenizer, + mode=mode, + async_queue_size=async_queue_size, + ) + def create_item(self) -> TextGenerationRequest: return TextGenerationRequest(prompt="Test prompt") diff --git a/tests/unit/backend/test_base.py b/tests/unit/backend/test_base.py index c518f82..9247eb1 100644 --- a/tests/unit/backend/test_base.py +++ b/tests/unit/backend/test_base.py @@ -7,6 +7,9 @@ @pytest.mark.smoke() def test_backend_registry(): class MockBackend(Backend): + def __init__(self): + super().__init__("test", "http://localhost:8000", "mock-model") + async def make_request(self, request): yield GenerativeResponse(type_="final", output="Test") @@ -42,6 +45,9 @@ def test_generative_response_creation(): @pytest.mark.asyncio() async def test_backend_make_request(): class MockBackend(Backend): + def __init__(self): + super().__init__("test", "http://localhost:8000", "mock-model") + async def make_request(self, request): yield GenerativeResponse( type_="token_iter", @@ -82,6 +88,9 @@ def available_models(self): @pytest.mark.asyncio() async def test_backend_submit_final(): class MockBackend(Backend): + def __init__(self): + super().__init__("test", "http://localhost:8000", "mock-model") + async def make_request(self, request): yield GenerativeResponse(type_="final", output="Test") @@ -98,6 +107,9 @@ def available_models(self): @pytest.mark.asyncio() async def test_backend_submit_multi(): class MockBackend(Backend): + def __init__(self): + super().__init__("test", "http://localhost:8000", "mock-model") + async def make_request(self, request): yield GenerativeResponse(type_="token_iter", add_token="Token") yield GenerativeResponse(type_="token_iter", add_token=" ") @@ -117,6 +129,9 @@ def available_models(self): @pytest.mark.asyncio() async def test_backend_submit_no_response(): class MockBackend(Backend): + def __init__(self): + super().__init__("test", "http://localhost:8000", "mock-model") + async def make_request(self, request): if False: # simulate no yield yield @@ -134,6 +149,9 @@ def available_models(self): @pytest.mark.asyncio() async def test_backend_submit_multi_final(): class MockBackend(Backend): + def __init__(self): + super().__init__("test", "http://localhost:8000", "mock-model") + async def make_request(self, request): yield GenerativeResponse(type_="token_iter", add_token="Token") yield GenerativeResponse(type_="token_iter", add_token=" ") @@ -153,6 +171,9 @@ def available_models(self): @pytest.mark.smoke() def test_backend_models(): class MockBackend(Backend): + def __init__(self): + super().__init__("test", "http://localhost:8000", "mock-model") + def available_models(self): return ["mock-model", "mock-model-2"] @@ -170,6 +191,9 @@ def test_backend_abstract_methods(): Backend() # type: ignore class IncompleteBackend(Backend): + def __init__(self): + super().__init__("test", "http://localhost:8000", "mock-model") + async def make_request(self, request): yield GenerativeResponse(type_="final", output="Test") diff --git a/tests/unit/backend/test_openai_backend.py b/tests/unit/backend/test_openai_backend.py index 80cab45..73afa94 100644 --- a/tests/unit/backend/test_openai_backend.py +++ b/tests/unit/backend/test_openai_backend.py @@ -87,7 +87,7 @@ def sync_constructor(*args, **kwargs): {"arg1": "value1"}, "http://test-target", ), - ("test_key", None, "localhost", 8000, "test-model", {}, "localhost:8000"), + ("test_key", None, "localhost", 8000, "test-model", {}, "localhost:8000/v1"), (None, None, None, None, None, {}, settings.openai.base_url), ], ) @@ -256,8 +256,8 @@ def test_openai_backend_target(mock_openai_client): assert backend._client.kwargs["base_url"] == "http://test-target" # type: ignore backend = OpenAIBackend(host="localhost", port=8000) - assert backend._async_client.kwargs["base_url"] == "localhost:8000" # type: ignore - assert backend._client.kwargs["base_url"] == "localhost:8000" # type: ignore + assert backend._async_client.kwargs["base_url"] == "localhost:8000/v1" # type: ignore + assert backend._client.kwargs["base_url"] == "localhost:8000/v1" # type: ignore backend = OpenAIBackend() assert backend._async_client.kwargs["base_url"] == settings.openai.base_url # type: ignore diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index ae2cc34..3257a8d 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -22,7 +22,9 @@ def _fake_tokenize(text: str) -> List[int]: @pytest.fixture() def mock_requests_pride_and_prejudice(): - text_path = Path(__file__).parent / "dummy" / "data" / "pride_and_prejudice.txt" + text_path = ( + Path(__file__).parent.parent / "dummy" / "data" / "pride_and_prejudice.txt" + ) text_content = text_path.read_text() with requests_mock.Mocker() as mock: diff --git a/tests/unit/core/test_report.py b/tests/unit/core/test_report.py index 610879e..5d56c25 100644 --- a/tests/unit/core/test_report.py +++ b/tests/unit/core/test_report.py @@ -8,7 +8,6 @@ GuidanceReport, TextGenerationBenchmark, TextGenerationBenchmarkReport, - TextGenerationError, TextGenerationRequest, TextGenerationResult, ) @@ -33,12 +32,11 @@ def sample_benchmark_report() -> TextGenerationBenchmarkReport: first_token_time=None, decode_times=sample_distribution, ) - sample_error = TextGenerationError(request=sample_request, message="sample error") sample_benchmark = TextGenerationBenchmark( mode="asynchronous", rate=1.0, results=[sample_result], - errors=[sample_error], + errors=[], concurrencies=[], ) return TextGenerationBenchmarkReport( @@ -47,7 +45,7 @@ def sample_benchmark_report() -> TextGenerationBenchmarkReport: def compare_guidance_reports(report1: GuidanceReport, report2: GuidanceReport) -> bool: - return report1 == report2 + return report1.benchmarks == report2.benchmarks @pytest.mark.smoke() @@ -62,14 +60,10 @@ def test_guidance_report_initialization_with_params(sample_benchmark_report): assert report.benchmarks == [sample_benchmark_report] -@pytest.mark.smoke() -def test_guidance_report_file(sample_benchmark_report): +@pytest.mark.sanity() +def test_guidance_report_print(sample_benchmark_report): report = GuidanceReport(benchmarks=[sample_benchmark_report]) - with tempfile.TemporaryDirectory() as temp_dir: - file_path = Path(temp_dir) / "report.yaml" - report.save_file(file_path) - loaded_report = GuidanceReport.load_file(file_path) - assert compare_guidance_reports(report, loaded_report) + report.print() # This will output to the console @pytest.mark.regression() @@ -86,3 +80,34 @@ def test_guidance_report_yaml(sample_benchmark_report): yaml_str = report.to_yaml() loaded_report = GuidanceReport.from_yaml(yaml_str) assert compare_guidance_reports(report, loaded_report) + + +@pytest.mark.regression() +def test_guidance_report_save_load_file(sample_benchmark_report): + report = GuidanceReport(benchmarks=[sample_benchmark_report]) + with tempfile.TemporaryDirectory() as temp_dir: + file_path = Path(temp_dir) / "report.yaml" + report.save_file(file_path) + loaded_report = GuidanceReport.load_file(file_path) + assert compare_guidance_reports(report, loaded_report) + + +@pytest.mark.regression() +def test_empty_guidance_report(): + report = GuidanceReport() + assert len(report.benchmarks) == 0 + report.print() # Ensure it doesn't raise error with no benchmarks + + +@pytest.mark.regression() +def test_compare_guidance_reports(sample_benchmark_report): + report1 = GuidanceReport(benchmarks=[sample_benchmark_report]) + report2 = GuidanceReport(benchmarks=[sample_benchmark_report]) + assert compare_guidance_reports(report1, report2) + + +@pytest.mark.regression() +def test_compare_guidance_reports_inequality(sample_benchmark_report): + report1 = GuidanceReport(benchmarks=[sample_benchmark_report]) + report2 = GuidanceReport(benchmarks=[]) + assert not compare_guidance_reports(report1, report2) diff --git a/tests/unit/core/test_result.py b/tests/unit/core/test_result.py index 39aecc7..02232ba 100644 --- a/tests/unit/core/test_result.py +++ b/tests/unit/core/test_result.py @@ -365,7 +365,7 @@ def test_text_generation_benchmark_report_iter(): elif index == 1: assert benchmark == slow_benchmark else: - raise AssertionError("Unexpected benchmark in report") + raise AssertionError("Unexpected report in report") for index, benchmark in enumerate(report.benchmarks_sorted): if index == 0: @@ -373,7 +373,7 @@ def test_text_generation_benchmark_report_iter(): elif index == 1: assert benchmark == fast_benchmark else: - raise AssertionError("Unexpected benchmark in report") + raise AssertionError("Unexpected report in report") @pytest.mark.regression() diff --git a/tests/unit/executor/test_base.py b/tests/unit/executor/test_base.py index 2cfa670..844cf7f 100644 --- a/tests/unit/executor/test_base.py +++ b/tests/unit/executor/test_base.py @@ -8,8 +8,13 @@ from guidellm.core import ( TextGenerationBenchmarkReport, ) -from guidellm.executor.base import Executor, ExecutorResult -from guidellm.executor.profile_generator import ProfileGenerator +from guidellm.executor import ( + Executor, + ExecutorResult, + Profile, + ProfileGenerationMode, + ProfileGenerator, +) from guidellm.request import RequestGenerator from guidellm.scheduler import Scheduler, SchedulerResult @@ -72,6 +77,7 @@ def test_executor_result_instantiation(): completed=True, count_total=10, count_completed=5, + generation_modes=["synchronous", "throughput", "constant"], report=report, scheduler_result=scheduler_result, ) @@ -122,75 +128,86 @@ def test_executor_instantiation(mode, rate): assert executor.max_duration == 60.0 -async def _run_executor_tests( - executor: Executor, - num_profiles: int, - num_requests: int, - mode: str, - rate: Optional[Union[float, List[float]]], +def _check_executor_result_base( + result: ExecutorResult, + expected_completed: bool, + expected_count_total: int, + expected_count_completed: int, + expected_generation_modes: List[ProfileGenerationMode], ): - iterator = executor.run() + assert result.completed == expected_completed + assert result.count_total == expected_count_total + assert result.count_completed == expected_count_completed + assert result.generation_modes == expected_generation_modes - result = await iterator.__anext__() - assert result.completed is False - assert result.count_total == num_profiles - assert result.count_completed == 0 + +def _check_executor_result_report( + result: ExecutorResult, + mode: ProfileGenerationMode, + rate: Optional[Union[float, List[float]]], + max_number: Optional[int], + max_duration: Optional[float], + benchmarks_count: int, +): assert result.report is not None assert isinstance(result.report, TextGenerationBenchmarkReport) - assert len(result.report.benchmarks) == 0 - assert "mode" in result.report.args + + # check args + for expected in ( + "backend_type", + "target", + "model", + "data_type", + "data", + "tokenizer", + "mode", + "rate", + "max_number", + "max_duration", + ): + assert expected in result.report.args + assert result.report.args["mode"] == mode - assert "rate" in result.report.args assert ( result.report.args["rate"] == rate - if rate is None or isinstance(rate, list) + if rate is None or not isinstance(rate, (float, int)) else [rate] ) - assert "max_number" in result.report.args - assert result.report.args["max_number"] == num_requests - assert "max_duration" in result.report.args - assert result.report.args["max_duration"] is None - assert result.scheduler_result is None - - for benchmark_index in range(num_profiles): - result = await iterator.__anext__() - assert result.completed is False - assert result.count_total == num_profiles - assert result.count_completed == benchmark_index - assert result.report is not None - assert len(result.report.benchmarks) == benchmark_index - assert result.scheduler_result is not None - assert isinstance(result.scheduler_result, SchedulerResult) - - for _ in range(num_requests): - result = await iterator.__anext__() - assert result.completed is False - assert result.count_total == num_profiles - assert result.count_completed == benchmark_index - assert result.report is not None - assert len(result.report.benchmarks) == benchmark_index - assert result.scheduler_result is not None - assert isinstance(result.scheduler_result, SchedulerResult) - - result = await iterator.__anext__() - assert result.completed is False - assert result.count_total == num_profiles - assert result.count_completed == benchmark_index + 1 - assert result.report is not None - assert len(result.report.benchmarks) == benchmark_index + 1 - assert result.scheduler_result is not None - assert isinstance(result.scheduler_result, SchedulerResult) - result.scheduler_result.benchmark.completed_request_rate = ( # type: ignore - benchmark_index + 1 - ) - - result = await iterator.__anext__() - assert result.completed is True - assert result.count_total == num_profiles - assert result.count_completed == num_profiles - assert result.report is not None - assert len(result.report.benchmarks) == num_profiles - assert result.scheduler_result is None + assert result.report.args["max_number"] == max_number + assert result.report.args["max_duration"] == max_duration + + # check benchmarks + assert len(result.report.benchmarks) == benchmarks_count + for benchmark in result.report.benchmarks: + assert isinstance(benchmark, TextGenerationBenchmarkReport) + + +def _check_executor_result_scheduler( + result: ExecutorResult, + expected_scheduler_result: bool, + expected_generation_modes: List[ProfileGenerationMode], + expected_index: Optional[int], + expected_profile_mode: Optional[ProfileGenerationMode], + expected_profile_rate: Optional[float], +): + if not expected_scheduler_result: + assert result.scheduler_result is None + assert result.current_index is None + assert result.current_profile is None + + return + + assert result.scheduler_result is not None + assert isinstance(result.scheduler_result, SchedulerResult) + assert result.current_index == expected_index + assert result.current_profile is not None + assert isinstance(result.current_profile, Profile) + assert result.current_profile.load_gen_mode == expected_profile_mode + assert result.current_profile.load_gen_rate == expected_profile_rate + assert ( + result.current_profile.load_gen_mode + == expected_generation_modes[expected_index] # type: ignore + ) @pytest.mark.smoke() @@ -208,32 +225,116 @@ async def test_executor_run_sweep(mock_scheduler): max_number=num_requests, ) - await _run_executor_tests( - executor, settings.num_sweep_profiles, num_requests, "sweep", None + num_profiles = 2 + settings.num_sweep_profiles + generation_modes = ["synchronous", "throughput"] + [ + "constant" + ] * settings.num_sweep_profiles + generation_rates = [None, None] + list(range(2, settings.num_sweep_profiles + 2)) + output_rates = [1, settings.num_sweep_profiles + 1] + list( + range(2, settings.num_sweep_profiles + 2) ) + iterator = executor.run() -@pytest.mark.smoke() -@pytest.mark.asyncio() -async def test_executor_run_synchronous(mock_scheduler): - num_requests = 15 - - backend = create_autospec(Backend, instance=True) - request_generator = create_autospec(RequestGenerator, instance=True) - executor = Executor( - backend=backend, - request_generator=request_generator, - mode="synchronous", + # Check start result + result = await iterator.__anext__() + _check_executor_result_base( + result=result, + expected_completed=False, + expected_count_total=num_profiles, + expected_count_completed=0, + expected_generation_modes=generation_modes, # type: ignore + ) + _check_executor_result_report( + result=result, + mode="sweep", rate=None, max_number=num_requests, + max_duration=None, + benchmarks_count=0, + ) + _check_executor_result_scheduler( + result=result, + expected_scheduler_result=False, + expected_generation_modes=generation_modes, # type: ignore + expected_index=None, + expected_profile_mode=None, + expected_profile_rate=None, ) - await _run_executor_tests(executor, 1, num_requests, "synchronous", None) + for scheduler_index in range(num_profiles): + for request_index in range(num_requests + 2): + result = await iterator.__anext__() + _check_executor_result_base( + result=result, + expected_completed=False, + expected_count_total=num_profiles, + expected_count_completed=scheduler_index + if request_index < num_requests + 1 + else scheduler_index + 1, + expected_generation_modes=generation_modes, # type: ignore + ) + _check_executor_result_report( + result=result, + mode="sweep", + rate=None, + max_number=num_requests, + max_duration=None, + benchmarks_count=scheduler_index + if request_index < num_requests + 1 + else scheduler_index + 1, + ) + _check_executor_result_scheduler( + result=result, + expected_scheduler_result=True, + expected_generation_modes=generation_modes, # type: ignore + expected_index=scheduler_index, + expected_profile_mode=generation_modes[scheduler_index], # type: ignore + expected_profile_rate=generation_rates[scheduler_index], + ) + # set the rate for the benchmark for sweep profile generation + result.report.benchmarks[-1].completed_request_rate = output_rates[ # type: ignore + scheduler_index + ] + result.report.benchmarks[-1].request_count = num_requests # type: ignore + + # Check end result + result = await iterator.__anext__() + _check_executor_result_base( + result=result, + expected_completed=True, + expected_count_total=num_profiles, + expected_count_completed=num_profiles, + expected_generation_modes=generation_modes, # type: ignore + ) + _check_executor_result_report( + result=result, + mode="sweep", + rate=None, + max_number=num_requests, + max_duration=None, + benchmarks_count=num_profiles, + ) + _check_executor_result_scheduler( + result=result, + expected_scheduler_result=False, + expected_generation_modes=generation_modes, # type: ignore + expected_index=None, + expected_profile_mode=None, + expected_profile_rate=None, + ) @pytest.mark.smoke() @pytest.mark.asyncio() -async def test_executor_run_throughput(mock_scheduler): +@pytest.mark.parametrize( + "mode", + [ + "synchronous", + "throughput", + ], +) +async def test_executor_run_non_rate_modes(mock_scheduler, mode): num_requests = 15 backend = create_autospec(Backend, instance=True) @@ -241,12 +342,90 @@ async def test_executor_run_throughput(mock_scheduler): executor = Executor( backend=backend, request_generator=request_generator, - mode="throughput", + mode=mode, + rate=None, + max_number=num_requests, + ) + + iterator = executor.run() + + # Check start result + result = await iterator.__anext__() + _check_executor_result_base( + result=result, + expected_completed=False, + expected_count_total=1, + expected_count_completed=0, + expected_generation_modes=[mode], + ) + _check_executor_result_report( + result=result, + mode=mode, rate=None, max_number=num_requests, + max_duration=None, + benchmarks_count=0, + ) + _check_executor_result_scheduler( + result=result, + expected_scheduler_result=False, + expected_generation_modes=[mode], + expected_index=None, + expected_profile_mode=None, + expected_profile_rate=None, ) - await _run_executor_tests(executor, 1, num_requests, "throughput", None) + for request_index in range(num_requests + 2): + result = await iterator.__anext__() + _check_executor_result_base( + result=result, + expected_completed=False, + expected_count_total=1, + expected_count_completed=0 if request_index < num_requests + 1 else 1, + expected_generation_modes=[mode], + ) + _check_executor_result_report( + result=result, + mode=mode, + rate=None, + max_number=num_requests, + max_duration=None, + benchmarks_count=0 if request_index < num_requests + 1 else 1, + ) + _check_executor_result_scheduler( + result=result, + expected_scheduler_result=True, + expected_generation_modes=[mode], + expected_index=0, + expected_profile_mode=mode, + expected_profile_rate=None, + ) + + # Check end result + result = await iterator.__anext__() + _check_executor_result_base( + result=result, + expected_completed=True, + expected_count_total=1, + expected_count_completed=1, + expected_generation_modes=[mode], + ) + _check_executor_result_report( + result=result, + mode=mode, + rate=None, + max_number=num_requests, + max_duration=None, + benchmarks_count=1, + ) + _check_executor_result_scheduler( + result=result, + expected_scheduler_result=False, + expected_generation_modes=[mode], + expected_index=None, + expected_profile_mode=None, + expected_profile_rate=None, + ) @pytest.mark.smoke() @@ -260,7 +439,7 @@ async def test_executor_run_throughput(mock_scheduler): ("poisson", [10, 20, 30]), ], ) -async def test_executor_run_constant_poisson(mock_scheduler, mode, rate): +async def test_executor_run_rate_modes(mock_scheduler, mode, rate): num_requests = 15 backend = create_autospec(Backend, instance=True) @@ -273,6 +452,91 @@ async def test_executor_run_constant_poisson(mock_scheduler, mode, rate): max_number=num_requests, ) - await _run_executor_tests( - executor, len(rate) if isinstance(rate, list) else 1, num_requests, mode, rate + num_profiles = len(rate) if isinstance(rate, list) else 1 + generation_modes = [mode] * num_profiles + generation_rates = rate if isinstance(rate, list) else [rate] + + iterator = executor.run() + + # Check start result + result = await iterator.__anext__() + _check_executor_result_base( + result=result, + expected_completed=False, + expected_count_total=num_profiles, + expected_count_completed=0, + expected_generation_modes=generation_modes, + ) + _check_executor_result_report( + result=result, + mode=mode, + rate=rate, + max_number=num_requests, + max_duration=None, + benchmarks_count=0, + ) + _check_executor_result_scheduler( + result=result, + expected_scheduler_result=False, + expected_generation_modes=generation_modes, + expected_index=None, + expected_profile_mode=None, + expected_profile_rate=None, + ) + + for scheduler_index in range(num_profiles): + for request_index in range(num_requests + 2): + result = await iterator.__anext__() + _check_executor_result_base( + result=result, + expected_completed=False, + expected_count_total=num_profiles, + expected_count_completed=scheduler_index + if request_index < num_requests + 1 + else scheduler_index + 1, + expected_generation_modes=generation_modes, + ) + _check_executor_result_report( + result=result, + mode=mode, + rate=rate, + max_number=num_requests, + max_duration=None, + benchmarks_count=scheduler_index + if request_index < num_requests + 1 + else scheduler_index + 1, + ) + _check_executor_result_scheduler( + result=result, + expected_scheduler_result=True, + expected_generation_modes=generation_modes, + expected_index=scheduler_index, + expected_profile_mode=generation_modes[scheduler_index], + expected_profile_rate=generation_rates[scheduler_index], + ) + + # Check end result + result = await iterator.__anext__() + _check_executor_result_base( + result=result, + expected_completed=True, + expected_count_total=num_profiles, + expected_count_completed=num_profiles, + expected_generation_modes=generation_modes, + ) + _check_executor_result_report( + result=result, + mode=mode, + rate=rate, + max_number=num_requests, + max_duration=None, + benchmarks_count=num_profiles, + ) + _check_executor_result_scheduler( + result=result, + expected_scheduler_result=False, + expected_generation_modes=generation_modes, + expected_index=None, + expected_profile_mode=None, + expected_profile_rate=None, ) diff --git a/tests/unit/executor/test_profile_generator.py b/tests/unit/executor/test_profile_generator.py index 1389459..9c91d57 100644 --- a/tests/unit/executor/test_profile_generator.py +++ b/tests/unit/executor/test_profile_generator.py @@ -27,6 +27,7 @@ def test_profile_instantiation(): profile = Profile(load_gen_mode="constant", load_gen_rate=10) assert profile.load_gen_mode == "constant" assert profile.load_gen_rate == 10 + assert profile.args == {} @pytest.mark.smoke() @@ -54,11 +55,20 @@ def test_profile_generator_instantiation(mode, rate): assert generator.rates == [rate] if mode == "sweep": - assert len(generator) == settings.num_sweep_profiles + assert len(generator) == settings.num_sweep_profiles + 2 + assert ( + generator.profile_generation_modes + == ["synchronous", "throughput"] + + ["constant"] * settings.num_sweep_profiles + ) elif mode in ("throughput", "synchronous"): assert len(generator) == 1 + assert generator.profile_generation_modes == [mode] else: assert len(generator) == len(rate) if isinstance(rate, list) else 1 + assert generator.profile_generation_modes == [mode] * ( + len(rate) if isinstance(rate, list) else 1 + ) assert generator.generated_count == 0 @@ -95,7 +105,7 @@ def test_profile_generator_next_sweep(): generator = ProfileGenerator(mode="sweep") current_report = TextGenerationBenchmarkReport() - for index in range(settings.num_sweep_profiles): + for index in range(settings.num_sweep_profiles + 2): profile: Profile = generator.next(current_report) # type: ignore if index == 0: diff --git a/tests/unit/scheduler/test_base.py b/tests/unit/scheduler/test_base.py index 093b57a..b485e59 100644 --- a/tests/unit/scheduler/test_base.py +++ b/tests/unit/scheduler/test_base.py @@ -7,7 +7,6 @@ from guidellm.backend import Backend from guidellm.core import ( TextGenerationBenchmark, - TextGenerationError, TextGenerationRequest, TextGenerationResult, ) @@ -149,30 +148,48 @@ def _submit(req): max_number=max_number, ) + run_count = 0 count_completed = 0 received_init = False received_final = False async for result in scheduler.run(): + run_count += 1 + + assert run_count <= max_number + 2 assert result.count_total == max_number assert result.benchmark is not None assert isinstance(result.benchmark, TextGenerationBenchmark) - if result.current_result: - assert isinstance(result.current_result, TextGenerationResult) + if result.current_result is not None: count_completed += 1 - assert result.count_completed == count_completed - assert not result.completed - elif not received_init: + + if run_count == 1: + assert not received_init + assert not received_final assert count_completed == 0 assert result.count_completed == 0 assert not result.completed + assert result.current_result is None received_init = True - elif not received_final: + elif run_count - 2 == max_number: + assert received_init + assert not received_final assert count_completed == max_number assert result.count_completed == max_number assert result.completed + assert result.current_result is None received_final = True + else: + assert received_init + assert not received_final + assert count_completed == run_count - 1 + assert result.count_completed == run_count - 1 + assert not result.completed + assert result.current_result is not None + assert isinstance(result.current_result, TextGenerationResult) + assert received_init + assert received_final assert count_completed == max_number @@ -182,8 +199,6 @@ def _submit(req): "mode", [ "synchronous", - "throughput", - "poisson", "constant", ], ) @@ -218,32 +233,47 @@ async def _submit(req): max_duration=max_duration, ) + run_count = 0 count_completed = 0 received_init = False received_final = False start_time = time.time() async for result in scheduler.run(): + run_count += 1 + + assert run_count <= max_duration * rate + 2 assert result.count_total == max_duration assert result.benchmark is not None assert isinstance(result.benchmark, TextGenerationBenchmark) - if result.current_result: - assert isinstance( - result.current_result, (TextGenerationResult, TextGenerationError) - ) + if result.current_result is not None: count_completed += 1 - assert result.count_completed == round(time.time() - start_time) - assert not result.completed - elif not received_init: + + if run_count == 1: + assert not received_init + assert not received_final assert count_completed == 0 assert result.count_completed == 0 assert not result.completed + assert result.current_result is None received_init = True - elif not received_final: + elif time.time() - start_time >= max_duration: + assert received_init + assert not received_final assert result.count_completed == max_duration assert result.completed + assert result.current_result is None received_final = True + else: + assert received_init + assert not received_final + assert result.count_completed == round(time.time() - start_time) + assert not result.completed + assert result.current_result is not None + assert isinstance(result.current_result, TextGenerationResult) + assert received_init + assert received_final end_time = time.time() assert pytest.approx(end_time - start_time, abs=0.1) == max_duration assert pytest.approx(count_completed, abs=5) == max_duration * rate diff --git a/tests/unit/test_logger.py b/tests/unit/test_logger.py index 943d6e1..53e8b66 100644 --- a/tests/unit/test_logger.py +++ b/tests/unit/test_logger.py @@ -22,9 +22,12 @@ def test_default_logger_settings(capsys): # Default settings should log to console with INFO level and no file logging logger.info("Info message") logger.debug("Debug message") + logger.warning("Warning message") + logger.error("Error message") captured = capsys.readouterr() - assert captured.out.count("Info message") == 1 + assert captured.out.count("Warning message") == 1 + assert captured.out.count("Error message") == 1 assert "Debug message" not in captured.out diff --git a/tests/unit/utils/test_progress.py b/tests/unit/utils/test_progress.py new file mode 100644 index 0000000..637b2be --- /dev/null +++ b/tests/unit/utils/test_progress.py @@ -0,0 +1,116 @@ +import pytest + +from guidellm.utils import BenchmarkReportProgress + + +@pytest.fixture() +def benchmark_progress(): + return BenchmarkReportProgress() + + +@pytest.mark.smoke() +def test_initialization(benchmark_progress): + assert benchmark_progress.report_task is None + assert benchmark_progress.benchmark_tasks == [] + assert benchmark_progress.benchmark_tasks_started == [] + assert benchmark_progress.benchmark_tasks_completed == [] + assert benchmark_progress.benchmark_tasks_progress == [] + + +@pytest.mark.smoke() +def test_start_method(benchmark_progress): + descriptions = ["Benchmark 1", "Benchmark 2"] + benchmark_progress.start(descriptions) + + assert len(benchmark_progress.benchmark_tasks) == 2 + assert benchmark_progress.report_task is not None + + benchmark_progress.finish() + + +@pytest.mark.sanity() +def test_update_benchmark(benchmark_progress): + descriptions = ["Benchmark 1"] + benchmark_progress.start(descriptions) + + benchmark_progress.update_benchmark( + index=0, + description="Updating Benchmark 1", + completed=False, + completed_count=50, + completed_total=100, + start_time=0, + req_per_sec=10.5, + ) + assert benchmark_progress.benchmark_tasks_progress[0] == 50.0 + + benchmark_progress.finish() + + +@pytest.mark.sanity() +def test_finish_method(benchmark_progress): + descriptions = ["Benchmark 1", "Benchmark 2"] + benchmark_progress.start(descriptions) + benchmark_progress.finish() + + assert benchmark_progress.report_progress.finished + + +@pytest.mark.regression() +def test_error_on_update_completed_benchmark(benchmark_progress): + descriptions = ["Benchmark 1"] + benchmark_progress.start(descriptions) + benchmark_progress.update_benchmark( + index=0, + description="Benchmark 1", + completed=True, + completed_count=100, + completed_total=100, + start_time=0, + req_per_sec=10.5, + ) + + with pytest.raises(ValueError, match="already completed"): + benchmark_progress.update_benchmark( + index=0, + description="Benchmark 1", + completed=False, + completed_count=50, + completed_total=100, + start_time=0, + req_per_sec=10.5, + ) + + benchmark_progress.finish() + + +@pytest.mark.regression() +def test_multiple_updates(benchmark_progress): + descriptions = ["Benchmark 1", "Benchmark 2"] + benchmark_progress.start(descriptions) + + # First update + benchmark_progress.update_benchmark( + index=0, + description="Updating Benchmark 1", + completed=False, + completed_count=50, + completed_total=100, + start_time=0, + req_per_sec=5.0, + ) + assert benchmark_progress.benchmark_tasks_progress[0] == 50.0 + + # Second update, same task + benchmark_progress.update_benchmark( + index=0, + description="Updating Benchmark 1", + completed=True, + completed_count=100, + completed_total=100, + start_time=0, + req_per_sec=5.0, + ) + assert benchmark_progress.benchmark_tasks_progress[0] == 100.0 + + benchmark_progress.finish()