Skip to content

Commit

Permalink
Fixing wrong token count issue in streaming client
Browse files Browse the repository at this point in the history
Signed-off-by: “Gangmuk <[email protected]>
  • Loading branch information
gangmuk committed Feb 25, 2025
1 parent 1b8805f commit 3abf67a
Showing 1 changed file with 9 additions and 4 deletions.
13 changes: 9 additions & 4 deletions benchmarks/client/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,18 @@ async def send_request_streaming(client: openai.AsyncOpenAI,
first_response_time = asyncio.get_event_loop().time()
output_text = chunk.choices[0].delta.content
text_chunks.append(output_text)
if hasattr(chunk, 'usage') and chunk.usage is not None:
# For OpenAI, we expect to get complete usage stats, not partial ones to accumulate
# So we can safely overwrite previous values if they exist
if chunk.usage.prompt_tokens is not None:
prompt_tokens = chunk.usage.prompt_tokens
if chunk.usage.completion_tokens is not None:
output_tokens = chunk.usage.completion_tokens
if chunk.usage.total_tokens is not None:
total_tokens = chunk.usage.total_tokens
except Exception as stream_error:
# Handle errors during streaming
logging.error(f"Request {request_id}: Stream interrupted: {type(stream_error).__name__}: {str(stream_error)}")
# Still try to use what we've received so far
if not text_chunks:
raise # Re-raise if we got nothing at all

response_text = "".join(text_chunks)
response_time = asyncio.get_event_loop().time()
Expand Down Expand Up @@ -216,7 +219,9 @@ def main(args):
base_url=args.endpoint + "/v1",
)
if args.routing_strategy is not None:
client.default_headers["routing-strategy"] = args.routing_strategy
client = client.with_options(
default_headers={"routing-strategy": args.routing_strategy}
)
if not args.streaming:
logging.info("Using batch client")
start_time = time.time()
Expand Down

0 comments on commit 3abf67a

Please sign in to comment.