From 0189ec282bb18a88c8fdfd79efe340ddebbde295 Mon Sep 17 00:00:00 2001
From: Mostafa Elhoushi <m.elhoushi@ieee.org>
Date: Thu, 18 Jul 2024 15:41:25 -0400
Subject: [PATCH] Set args.max_length_generation do HF's max_new_tokens
 generation configuration

HuggingFace's `max_length` configuration corresponds to the total length of the prompt and the generated output, while `max_new_tokens` corresponds to the length of generated output only.

Using `args.max_length_generation` to set `max_new_tokens` fixed runtime errors for me.
Using `args.max_length_generation` to set `max_length` lead to runtime errors because the total length of prompt+generation would exceed the intended value.
---
 bigcode_eval/generation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bigcode_eval/generation.py b/bigcode_eval/generation.py
index 98e15a7be..e44e69e79 100644
--- a/bigcode_eval/generation.py
+++ b/bigcode_eval/generation.py
@@ -70,7 +70,7 @@ def parallel_generations(
         "temperature": args.temperature,
         "top_p": args.top_p,
         "top_k": args.top_k,
-        "max_length": args.max_length_generation,
+        "max_new_tokens": args.max_length_generation,
     }
     stopping_criteria = []
     # The input_length / start_length set to 0 for now will be adjusted later