Skip to content

Commit

Permalink
Clean up benchmarks files (#39)
Browse files Browse the repository at this point in the history
format benchmarks files
  • Loading branch information
FanhaiLu1 authored Apr 25, 2024
1 parent d267bd2 commit b0400f8
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 10 deletions.
10 changes: 5 additions & 5 deletions benchmarks/analyze_sharegpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@
SYSTEM_TIME_PER_DECODE_TOKEN_MS = 30 / 160


#pylint: disable-next=all
# pylint: disable-next=all
def do_simulation(prefill_bucket_size_to_ms, system_time_per_decode_token_ms):
def next_power_of_2(x):
return 1 if x == 0 else 2 ** (x - 1).bit_length()
Expand All @@ -83,7 +83,7 @@ def tokens_in_input_str(s):
convo_numbers = []
# Please update with your own data file path
loaded_share_gpt = json.load(
#pylint: disable-next=all
# pylint: disable-next=all
open("~/data/ShareGPT_V3_unfiltered_cleaned_split.json", "r")
)
for example in loaded_share_gpt:
Expand Down Expand Up @@ -151,20 +151,20 @@ def tokens_in_input_str(s):
generate_savings_sec = total_generate_sec - idealized_generate_sec

print(
f"""we think prefill will take {total_prefill_sec=:.2f},
f"""we think prefill will take {total_prefill_sec=:.2f},
we could get it to {idealized_prefill_sec=:.2f} so we'd
save {prefill_savings_sec=:.2f} seconds """
)
print(
f"""with sparsity we could go from {total_generate_sec=:.2f},
f"""with sparsity we could go from {total_generate_sec=:.2f},
we could get it to {idealized_generate_sec=:.2f} so we'd save
{generate_savings_sec=:.2f} seconds """
)

idealized_overall_time = idealized_generate_sec + idealized_prefill_sec

print(
f"""Idealized out tokens {output_tokens} in {idealized_overall_time:.2f} seconds,
f"""Idealized out tokens {output_tokens} in {idealized_overall_time:.2f} seconds,
for {output_tokens/idealized_overall_time:.2f} out tok/s"""
)
print("prfill", prefill_bucket_size_to_ms)
Expand Down
10 changes: 5 additions & 5 deletions benchmarks/run_offline.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
from benchmarks import analyze_sharegpt



logging.getLogger().setLevel(logging.ERROR)


Expand Down Expand Up @@ -159,15 +158,17 @@ def main():
sampled_tokens_list = []

for i in range(3): # warm up
#pylint: disable-next=all
decode_state, sampled_tokens = engine.generate(params=params, decode_state=decode_state)
# pylint: disable-next=all
decode_state, sampled_tokens = engine.generate(
params=params, decode_state=decode_state
)
sampled_tokens_list.append(sampled_tokens)

print("======= decode starting ===")
dec_times = []
for i in range(10):
start = time.perf_counter()
#pylint: disable-next=all
# pylint: disable-next=all
decode_state, sampled_tokens = engine.generate(params, decode_state)
jax.block_until_ready(decode_state)
sampled_tokens_list.append(sampled_tokens)
Expand All @@ -184,7 +185,6 @@ def main():
prefill_times_ms = {k: v * 1000 for k, v in prefill_times.items()}
decode_time_ms = sum(dec_times) * 1000 / 10 / _BATCH_SIZE.value


analyze_sharegpt.do_simulation(prefill_times_ms, decode_time_ms)


Expand Down

0 comments on commit b0400f8

Please sign in to comment.