diff --git a/src/bentoml/_internal/marshal/dispatcher.py b/src/bentoml/_internal/marshal/dispatcher.py index 0a9a8b43138..8abe6c52461 100644 --- a/src/bentoml/_internal/marshal/dispatcher.py +++ b/src/bentoml/_internal/marshal/dispatcher.py @@ -354,6 +354,7 @@ async def controller(self): a = self.optimizer.o_a b = self.optimizer.o_b + # the estimated latency of the first request if we began processing now latency_0 = w0 + a * n + b if n > 1 and latency_0 >= self.max_latency_in_ms: