Skip to content

Commit

Permalink
Add mean and percentile info as computed_field properties such that t…
Browse files Browse the repository at this point in the history
…hey become serializable
  • Loading branch information
anmarques committed Sep 4, 2024
1 parent edba84d commit 4312cb4
Show file tree
Hide file tree
Showing 2 changed files with 99 additions and 19 deletions.
26 changes: 8 additions & 18 deletions src/guidellm/core/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,19 +147,15 @@ def _create_benchmark_report_data_tokens_summary(
for benchmark in report.benchmarks_sorted:
table.add_row(
_benchmark_rate_id(benchmark),
f"{benchmark.prompt_token_distribution.mean:.2f}",
f"{benchmark.prompt_token:.2f}",
", ".join(
f"{percentile:.1f}"
for percentile in benchmark.prompt_token_distribution.percentiles(
[1, 5, 50, 95, 99]
)
for percentile in benchmark.prompt_token_percentiles
),
f"{benchmark.output_token_distribution.mean:.2f}",
f"{benchmark.output_token:.2f}",
", ".join(
f"{percentile:.1f}"
for percentile in benchmark.output_token_distribution.percentiles(
[1, 5, 50, 95, 99]
)
for percentile in benchmark.output_token_percentiles
),
)
logger.debug("Created data tokens summary table for the report.")
Expand All @@ -181,7 +177,7 @@ def _create_benchmark_report_dist_perf_summary(
"Benchmark",
"Request Latency [1%, 5%, 10%, 50%, 90%, 95%, 99%] (sec)",
"Time to First Token [1%, 5%, 10%, 50%, 90%, 95%, 99%] (ms)",
"Inter Token Latency [1%, 5%, 10%, 50%, 90% 95%, 99%] (ms)",
"Inter Token Latency [1%, 5%, 10%, 50%, 90%, 95%, 99%] (ms)",
title="[magenta]Performance Stats by Benchmark[/magenta]",
title_style="bold",
title_justify="left",
Expand All @@ -193,21 +189,15 @@ def _create_benchmark_report_dist_perf_summary(
_benchmark_rate_id(benchmark),
", ".join(
f"{percentile:.2f}"
for percentile in benchmark.request_latency_distribution.percentiles(
[1, 5, 10, 50, 90, 95, 99]
)
for percentile in benchmark.request_latency_percentiles
),
", ".join(
f"{percentile * 1000:.1f}"
for percentile in benchmark.ttft_distribution.percentiles(
[1, 5, 10, 50, 90, 95, 99]
)
for percentile in benchmark.time_to_first_token_percentiles
),
", ".join(
f"{percentile * 1000:.1f}"
for percentile in benchmark.itl_distribution.percentiles(
[1, 5, 10, 50, 90, 95, 99]
)
for percentile in benchmark.inter_token_latency_percentiles
),
)
logger.debug("Created distribution performance summary table for the report.")
Expand Down
92 changes: 91 additions & 1 deletion src/guidellm/core/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from typing import Any, Dict, List, Literal, Optional, Union

from loguru import logger
from pydantic import Field
from pydantic import Field, computed_field

from guidellm.core.distribution import Distribution
from guidellm.core.request import TextGenerationRequest
Expand Down Expand Up @@ -221,6 +221,7 @@ def __iter__(self):
"""
return iter(self.results)

@computed_field
@property
def request_count(self) -> int:
"""
Expand All @@ -231,6 +232,7 @@ def request_count(self) -> int:
"""
return len(self.results)

@computed_field
@property
def error_count(self) -> int:
"""
Expand All @@ -241,6 +243,7 @@ def error_count(self) -> int:
"""
return len(self.errors)

@computed_field
@property
def total_count(self) -> int:
"""
Expand All @@ -251,6 +254,7 @@ def total_count(self) -> int:
"""
return self.request_count + self.error_count

@computed_field
@property
def start_time(self) -> Optional[float]:
"""
Expand All @@ -264,6 +268,7 @@ def start_time(self) -> Optional[float]:

return self.results[0].start_time

@computed_field
@property
def end_time(self) -> Optional[float]:
"""
Expand All @@ -277,6 +282,7 @@ def end_time(self) -> Optional[float]:

return self.results[-1].end_time

@computed_field
@property
def duration(self) -> float:
"""
Expand All @@ -290,6 +296,7 @@ def duration(self) -> float:

return self.end_time - self.start_time

@computed_field
@property
def completed_request_rate(self) -> float:
"""
Expand All @@ -303,6 +310,7 @@ def completed_request_rate(self) -> float:

return len(self.results) / self.duration

@computed_field
@property
def request_latency(self) -> float:
"""
Expand Down Expand Up @@ -332,6 +340,19 @@ def request_latency_distribution(self) -> Distribution:
]
)

@computed_field
@property
def request_latency_percentiles(self) -> List[float]:
"""
Get standard percentiles of request latency in seconds.
:return: List of percentile request latency in seconds
:rtype: List[float]
"""
return self.request_latency_distribution.percentiles([1, 5, 10, 50, 90, 95, 99])


@computed_field
@property
def time_to_first_token(self) -> float:
"""
Expand Down Expand Up @@ -360,7 +381,19 @@ def ttft_distribution(self) -> Distribution:
if result.first_token_time is not None
]
)

@computed_field
@property
def time_to_first_token_percentiles(self) -> List[float]:
"""
Get standard percentiles for time taken to decode the first token in milliseconds.
:return: List of percentile time taken to decode the first token in milliseconds.
:rtype: List[float]
"""
return self.ttft_distribution.percentiles([1, 5, 10, 50, 90, 95, 99])

@computed_field
@property
def inter_token_latency(self) -> float:
"""
Expand All @@ -387,7 +420,19 @@ def itl_distribution(self) -> Distribution:
decode for result in self.results for decode in result.decode_times.data
]
)

@computed_field
@property
def inter_token_latency_percentiles(self) -> List[float]:
"""
Get standard percentiles for the time between tokens in milliseconds.
:return: List of percentiles for the average time between tokens.
:rtype: List[float]
"""
return self.itl_distribution.percentiles([1, 5, 10, 50, 90, 95, 99])

@computed_field
@property
def output_token_throughput(self) -> float:
"""
Expand All @@ -403,6 +448,17 @@ def output_token_throughput(self) -> float:

return total_tokens / self.duration

@computed_field
@property
def prompt_token(self) -> float:
"""
Get the average number of prompt tokens.
:return: The average number of prompt tokens.
:rtype: float
"""
return self.prompt_token_distribution.mean

@property
def prompt_token_distribution(self) -> Distribution:
"""
Expand All @@ -413,6 +469,28 @@ def prompt_token_distribution(self) -> Distribution:
"""
return Distribution(data=[result.prompt_token_count for result in self.results])

@computed_field
@property
def prompt_token_percentiles(self) -> List[float]:
"""
Get standard percentiles for number of prompt tokens.
:return: List of percentiles of number of prompt tokens.
:rtype: List[float]
"""
return self.prompt_token_distribution.percentiles([1, 5, 50, 95, 99])

@computed_field
@property
def output_token(self) -> float:
"""
Get the average number of output tokens.
:return: The average number of output tokens.
:rtype: float
"""
return self.output_token_distribution.mean

@property
def output_token_distribution(self) -> Distribution:
"""
Expand All @@ -423,6 +501,18 @@ def output_token_distribution(self) -> Distribution:
"""
return Distribution(data=[result.output_token_count for result in self.results])

@computed_field
@property
def output_token_percentiles(self) -> List[float]:
"""
Get standard percentiles for number of output tokens.
:return: List of percentiles of number of output tokens.
:rtype: List[float]
"""
return self.output_token_distribution.percentiles([1, 5, 50, 95, 99])

@computed_field
@property
def overloaded(self) -> bool:
if (
Expand Down

0 comments on commit 4312cb4

Please sign in to comment.