From f8cda2e35ccc1f93940f8a1f5992a89bbba03cf1 Mon Sep 17 00:00:00 2001 From: Omni-FinAgent Date: Thu, 6 Feb 2025 04:39:00 +0800 Subject: [PATCH] Replace the deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B to Qwen2.5-1.5B-Instruct --- recipes/qwen/Qwen2.5-1.5B-Instruct/grpo/confg_full.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/recipes/qwen/Qwen2.5-1.5B-Instruct/grpo/confg_full.yaml b/recipes/qwen/Qwen2.5-1.5B-Instruct/grpo/confg_full.yaml index 3624c95a..19faabf5 100644 --- a/recipes/qwen/Qwen2.5-1.5B-Instruct/grpo/confg_full.yaml +++ b/recipes/qwen/Qwen2.5-1.5B-Instruct/grpo/confg_full.yaml @@ -1,5 +1,5 @@ # Model arguments -model_name_or_path: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B +model_name_or_path: Qwen/Qwen2.5-1.5B-Instruct model_revision: main torch_dtype: bfloat16 @@ -22,7 +22,7 @@ gradient_accumulation_steps: 16 gradient_checkpointing: true gradient_checkpointing_kwargs: use_reentrant: false -hub_model_id: Qwen2.5-1.5B-Open-R1-GRPO +hub_model_id: Qwen2.5-1.5B-Instruct-Open-R1-GRPO hub_strategy: every_save learning_rate: 2.0e-05 log_level: info @@ -33,7 +33,7 @@ max_prompt_length: 512 max_completion_length: 1024 max_steps: -1 num_train_epochs: 1 -output_dir: data/Qwen2.5-1.5B-Open-R1-GRPO +output_dir: data/Qwen2.5-1.5B-Instruct-Open-R1-GRPO overwrite_output_dir: true per_device_eval_batch_size: 4 per_device_train_batch_size: 1