From 0189ec282bb18a88c8fdfd79efe340ddebbde295 Mon Sep 17 00:00:00 2001 From: Mostafa Elhoushi Date: Thu, 18 Jul 2024 15:41:25 -0400 Subject: [PATCH] Set args.max_length_generation do HF's max_new_tokens generation configuration HuggingFace's `max_length` configuration corresponds to the total length of the prompt and the generated output, while `max_new_tokens` corresponds to the length of generated output only. Using `args.max_length_generation` to set `max_new_tokens` fixed runtime errors for me. Using `args.max_length_generation` to set `max_length` lead to runtime errors because the total length of prompt+generation would exceed the intended value. --- bigcode_eval/generation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigcode_eval/generation.py b/bigcode_eval/generation.py index 98e15a7be..e44e69e79 100644 --- a/bigcode_eval/generation.py +++ b/bigcode_eval/generation.py @@ -70,7 +70,7 @@ def parallel_generations( "temperature": args.temperature, "top_p": args.top_p, "top_k": args.top_k, - "max_length": args.max_length_generation, + "max_new_tokens": args.max_length_generation, } stopping_criteria = [] # The input_length / start_length set to 0 for now will be adjusted later