Add mean and percentile info as computed_field properties such that t…

…hey become serializable
neuralmagic · Sep 4, 2024 · 4312cb4 · 4312cb4
1 parent edba84d
commit 4312cb4
Show file tree

Hide file tree

Showing 2 changed files with 99 additions and 19 deletions.
diff --git a/src/guidellm/core/report.py b/src/guidellm/core/report.py
@@ -147,19 +147,15 @@ def _create_benchmark_report_data_tokens_summary(
     for benchmark in report.benchmarks_sorted:
         table.add_row(
             _benchmark_rate_id(benchmark),
-            f"{benchmark.prompt_token_distribution.mean:.2f}",
+            f"{benchmark.prompt_token:.2f}",
             ", ".join(
                 f"{percentile:.1f}"
-                for percentile in benchmark.prompt_token_distribution.percentiles(
-                    [1, 5, 50, 95, 99]
-                )
+                for percentile in benchmark.prompt_token_percentiles
             ),
-            f"{benchmark.output_token_distribution.mean:.2f}",
+            f"{benchmark.output_token:.2f}",
             ", ".join(
                 f"{percentile:.1f}"
-                for percentile in benchmark.output_token_distribution.percentiles(
-                    [1, 5, 50, 95, 99]
-                )
+                for percentile in benchmark.output_token_percentiles
             ),
         )
     logger.debug("Created data tokens summary table for the report.")
@@ -181,7 +177,7 @@ def _create_benchmark_report_dist_perf_summary(
         "Benchmark",
         "Request Latency [1%, 5%, 10%, 50%, 90%, 95%, 99%] (sec)",
         "Time to First Token [1%, 5%, 10%, 50%, 90%, 95%, 99%] (ms)",
-        "Inter Token Latency [1%, 5%, 10%, 50%, 90% 95%, 99%] (ms)",
+        "Inter Token Latency [1%, 5%, 10%, 50%, 90%, 95%, 99%] (ms)",
         title="[magenta]Performance Stats by Benchmark[/magenta]",
         title_style="bold",
         title_justify="left",
@@ -193,21 +189,15 @@ def _create_benchmark_report_dist_perf_summary(
             _benchmark_rate_id(benchmark),
             ", ".join(
                 f"{percentile:.2f}"
-                for percentile in benchmark.request_latency_distribution.percentiles(
-                    [1, 5, 10, 50, 90, 95, 99]
-                )
+                for percentile in benchmark.request_latency_percentiles
             ),
             ", ".join(
                 f"{percentile * 1000:.1f}"
-                for percentile in benchmark.ttft_distribution.percentiles(
-                    [1, 5, 10, 50, 90, 95, 99]
-                )
+                for percentile in benchmark.time_to_first_token_percentiles
             ),
             ", ".join(
                 f"{percentile * 1000:.1f}"
-                for percentile in benchmark.itl_distribution.percentiles(
-                    [1, 5, 10, 50, 90, 95, 99]
-                )
+                for percentile in benchmark.inter_token_latency_percentiles
             ),
         )
     logger.debug("Created distribution performance summary table for the report.")

diff --git a/src/guidellm/core/result.py b/src/guidellm/core/result.py
@@ -2,7 +2,7 @@
 from typing import Any, Dict, List, Literal, Optional, Union
 
 from loguru import logger
-from pydantic import Field
+from pydantic import Field, computed_field
 
 from guidellm.core.distribution import Distribution
 from guidellm.core.request import TextGenerationRequest
@@ -221,6 +221,7 @@ def __iter__(self):
         """
         return iter(self.results)
 
+    @computed_field
     @property
     def request_count(self) -> int:
         """
@@ -231,6 +232,7 @@ def request_count(self) -> int:
         """
         return len(self.results)
 
+    @computed_field
     @property
     def error_count(self) -> int:
         """
@@ -241,6 +243,7 @@ def error_count(self) -> int:
         """
         return len(self.errors)
 
+    @computed_field
     @property
     def total_count(self) -> int:
         """
@@ -251,6 +254,7 @@ def total_count(self) -> int:
         """
         return self.request_count + self.error_count
 
+    @computed_field
     @property
     def start_time(self) -> Optional[float]:
         """
@@ -264,6 +268,7 @@ def start_time(self) -> Optional[float]:
 
         return self.results[0].start_time
 
+    @computed_field
     @property
     def end_time(self) -> Optional[float]:
         """
@@ -277,6 +282,7 @@ def end_time(self) -> Optional[float]:
 
         return self.results[-1].end_time
 
+    @computed_field
     @property
     def duration(self) -> float:
         """
@@ -290,6 +296,7 @@ def duration(self) -> float:
 
         return self.end_time - self.start_time
 
+    @computed_field
     @property
     def completed_request_rate(self) -> float:
         """
@@ -303,6 +310,7 @@ def completed_request_rate(self) -> float:
 
         return len(self.results) / self.duration
 
+    @computed_field
     @property
     def request_latency(self) -> float:
         """
@@ -332,6 +340,19 @@ def request_latency_distribution(self) -> Distribution:
             ]
         )
 
+    @computed_field
+    @property
+    def request_latency_percentiles(self) -> List[float]:
+        """
+        Get standard percentiles of request latency in seconds.
+
+        :return: List of percentile request latency in seconds
+        :rtype: List[float]
+        """
+        return self.request_latency_distribution.percentiles([1, 5, 10, 50, 90, 95, 99])
+
+
+    @computed_field
     @property
     def time_to_first_token(self) -> float:
         """
@@ -360,7 +381,19 @@ def ttft_distribution(self) -> Distribution:
                 if result.first_token_time is not None
             ]
         )
+
+    @computed_field
+    @property
+    def time_to_first_token_percentiles(self) -> List[float]:
+        """
+        Get standard percentiles for time taken to decode the first token in milliseconds.
+
+        :return: List of percentile time taken to decode the first token in milliseconds.
+        :rtype: List[float]
+        """        
+        return self.ttft_distribution.percentiles([1, 5, 10, 50, 90, 95, 99])
 
+    @computed_field
     @property
     def inter_token_latency(self) -> float:
         """
@@ -387,7 +420,19 @@ def itl_distribution(self) -> Distribution:
                 decode for result in self.results for decode in result.decode_times.data
             ]
         )
+
+    @computed_field
+    @property
+    def inter_token_latency_percentiles(self) -> List[float]:
+        """
+        Get standard percentiles for the time between tokens in milliseconds.
 
+        :return: List of percentiles for the average time between tokens.
+        :rtype: List[float]
+        """
+        return self.itl_distribution.percentiles([1, 5, 10, 50, 90, 95, 99])
+
+    @computed_field
     @property
     def output_token_throughput(self) -> float:
         """
@@ -403,6 +448,17 @@ def output_token_throughput(self) -> float:
 
         return total_tokens / self.duration
 
+    @computed_field
+    @property
+    def prompt_token(self) -> float:
+        """
+        Get the average number of prompt tokens.
+
+        :return: The average number of prompt tokens.
+        :rtype: float
+        """
+        return self.prompt_token_distribution.mean
+
     @property
     def prompt_token_distribution(self) -> Distribution:
         """
@@ -413,6 +469,28 @@ def prompt_token_distribution(self) -> Distribution:
         """
         return Distribution(data=[result.prompt_token_count for result in self.results])
 
+    @computed_field
+    @property
+    def prompt_token_percentiles(self) -> List[float]:
+        """
+        Get standard percentiles for number of prompt tokens.
+
+        :return: List of percentiles of number of prompt tokens.
+        :rtype: List[float]
+        """
+        return self.prompt_token_distribution.percentiles([1, 5, 50, 95, 99])
+
+    @computed_field
+    @property
+    def output_token(self) -> float:
+        """
+        Get the average number of output tokens.
+
+        :return: The average number of output tokens.
+        :rtype: float
+        """
+        return self.output_token_distribution.mean
+
     @property
     def output_token_distribution(self) -> Distribution:
         """
@@ -423,6 +501,18 @@ def output_token_distribution(self) -> Distribution:
         """
         return Distribution(data=[result.output_token_count for result in self.results])
 
+    @computed_field
+    @property
+    def output_token_percentiles(self) -> List[float]:
+        """
+        Get standard percentiles for number of output tokens.
+
+        :return: List of percentiles of number of output tokens.
+        :rtype: List[float]
+        """
+        return self.output_token_distribution.percentiles([1, 5, 50, 95, 99])
+
+    @computed_field
     @property
     def overloaded(self) -> bool:
         if (