From 6394293a502d12f82225eefb4fec44ef7986dbe9 Mon Sep 17 00:00:00 2001
From: "U. Artie Eoff" <ullysses.a.eoff@intel.com>
Date: Thu, 27 Feb 2025 16:43:34 -0500
Subject: [PATCH] Clone gaudi2 refs to gaudi3

Start with the same references on gaudi3 as gaudi2.

Then, we can incrementally update them as needed.

Signed-off-by: U. Artie Eoff <ullysses.a.eoff@intel.com>
---
 .../fixture/tests/test_bnb_qlora.json         |   3 +
 .../fixture/tests/test_diffusers.json         |  48 ++
 .../fixture/tests/test_encoder_decoder.json   |  12 +
 .../fixture/tests/test_examples.json          | 252 ++++++
 .../tests/test_feature_extraction.json        |   3 +
 .../fixture/tests/test_fp8_examples.json      |   4 +
 .../fixture/tests/test_fsdp_examples.json     |   8 +
 .../tests/test_image_classification.json      |   3 +
 .../tests/test_image_segmentation.json        |   3 +
 .../tests/test_image_to_text_example.json     |  48 ++
 .../fixture/tests/test_object_detection.json  |   6 +
 .../tests/test_object_segmentation.json       |   3 +
 .../fixture/tests/test_openclip_vqa.json      |   6 +
 .../tests/test_sentence_transformers.json     |  39 +
 .../fixture/tests/test_table_transformer.json |   3 +
 .../tests/test_text_generation_example.json   | 222 +++++
 .../fixture/tests/test_video_llava.json       |   3 +
 .../fixture/tests/test_video_mae.json         |   3 +
 .../test_zero_shot_object_detection.json      |   3 +
 .../examples/CodeLlama_13b_Instruct_hf.json   |  33 +-
 tests/configs/examples/LlamaGuard_7b.json     |  33 +-
 tests/configs/examples/Llama_3_1_8B.json      |  45 +-
 .../Llama_3_2_11B_Vision_Instruct.json        |  48 +-
 tests/configs/examples/Qwen2_72B.json         |  57 +-
 tests/configs/examples/Qwen2_7B.json          |  92 ++-
 tests/configs/examples/albert_large_v2.json   |  68 +-
 tests/configs/examples/albert_xxlarge_v1.json |  68 +-
 .../ast_finetuned_speech_commands_v2.json     |  43 +-
 tests/configs/examples/bert_base_uncased.json |  34 +-
 ...bert_large_uncased_whole_word_masking.json | 132 ++-
 tests/configs/examples/bloom_7b1.json         |   9 +-
 .../bridgetower_large_itm_mlm_itc.json        |  37 +-
 tests/configs/examples/chatglm3_6b.json       |  41 +-
 tests/configs/examples/clip_roberta.json      |  46 +-
 .../examples/distilbert_base_uncased.json     |  68 +-
 tests/configs/examples/falcon_40b.json        |  95 ++-
 tests/configs/examples/flan_t5_xxl.json       |  40 +-
 tests/configs/examples/gemma_2b_it.json       |  74 +-
 tests/configs/examples/gemma_2b_it_eager.json |  30 +-
 tests/configs/examples/gpt2.json              |  68 +-
 tests/configs/examples/gpt2_xl.json           |  41 +-
 tests/configs/examples/gpt_neox_20b.json      |  33 +-
 tests/configs/examples/idefics2_8b.json       |  48 +-
 tests/configs/examples/llama_7b.json          | 776 +++++++++++++++++-
 tests/configs/examples/llava_1_5_7b_hf.json   |  48 +-
 ...t_esm1b_for_sequential_classification.json |  36 +-
 tests/configs/examples/roberta_base.json      | 104 ++-
 tests/configs/examples/roberta_large.json     | 104 ++-
 .../swin_base_patch4_window7_224_in22k.json   |  80 +-
 tests/configs/examples/t5_small.json          | 154 +++-
 .../examples/vit_base_patch16_224_in21k.json  |  79 +-
 tests/configs/examples/wav2vec2_base.json     |  51 +-
 .../configs/examples/wav2vec2_large_lv60.json |  57 +-
 tests/configs/examples/whisper_small.json     |  55 +-
 54 files changed, 3250 insertions(+), 249 deletions(-)

diff --git a/tests/baselines/fixture/tests/test_bnb_qlora.json b/tests/baselines/fixture/tests/test_bnb_qlora.json
index f917167fde..ddaaec170d 100644
--- a/tests/baselines/fixture/tests/test_bnb_qlora.json
+++ b/tests/baselines/fixture/tests/test_bnb_qlora.json
@@ -2,6 +2,9 @@
   "tests/test_bnb_qlora.py::test_nf4_quantization_inference": {
     "gaudi2": {
       "eval_loss": 1.638
+    },
+    "gaudi3": {
+      "eval_loss": 1.638
     }
   }
 }
\ No newline at end of file
diff --git a/tests/baselines/fixture/tests/test_diffusers.json b/tests/baselines/fixture/tests/test_diffusers.json
index b84e40dab8..cde044dfd2 100644
--- a/tests/baselines/fixture/tests/test_diffusers.json
+++ b/tests/baselines/fixture/tests/test_diffusers.json
@@ -5,21 +5,33 @@
     },
     "gaudi2": {
       "throughput": 0.145
+    },
+    "gaudi3": {
+      "throughput": 0.145
     }
   },
   "tests/test_diffusers.py::GaudiFluxImg2ImgPipelineTester::test_flux_img2img_inference": {
     "gaudi2": {
       "throughput": 0.12
+    },
+    "gaudi3": {
+      "throughput": 0.12
     }
   },
   "tests/test_diffusers.py::GaudiFluxPipelineTester::test_flux_inference": {
     "gaudi2": {
       "throughput": 0.03
+    },
+    "gaudi3": {
+      "throughput": 0.03
     }
   },
   "tests/test_diffusers.py::GaudiStableDiffusion3PipelineTester::test_sd3_inference": {
     "gaudi2": {
       "throughput": 0.006
+    },
+    "gaudi3": {
+      "throughput": 0.006
     }
   },
   "tests/test_diffusers.py::GaudiStableDiffusionPipelineTester::test_no_generation_regression_ldm3d": {
@@ -28,6 +40,9 @@
     },
     "gaudi2": {
       "throughput": 0.394
+    },
+    "gaudi3": {
+      "throughput": 0.394
     }
   },
   "tests/test_diffusers.py::GaudiStableDiffusionPipelineTester::test_no_throughput_regression_autocast": {
@@ -36,6 +51,9 @@
     },
     "gaudi2": {
       "throughput": 0.394
+    },
+    "gaudi3": {
+      "throughput": 0.394
     }
   },
   "tests/test_diffusers.py::GaudiStableDiffusionPipelineTester::test_no_throughput_regression_bf16": {
@@ -44,6 +62,9 @@
     },
     "gaudi2": {
       "throughput": 1.086
+    },
+    "gaudi3": {
+      "throughput": 1.086
     }
   },
   "tests/test_diffusers.py::GaudiStableDiffusionPipelineTester::test_sd_textual_inversion": {
@@ -54,6 +75,10 @@
     "gaudi2": {
       "train_runtime": 1.542460777796805,
       "train_samples_per_second": 131.7606336456344
+    },
+    "gaudi3": {
+      "train_runtime": 1.542460777796805,
+      "train_samples_per_second": 131.7606336456344
     }
   },
   "tests/test_diffusers.py::GaudiStableDiffusionXLPipelineTester::test_sdxl_textual_inversion": {
@@ -64,6 +89,10 @@
     "gaudi2": {
       "train_runtime": 74.92,
       "train_samples_per_second": 2.6694
+    },
+    "gaudi3": {
+      "train_runtime": 74.92,
+      "train_samples_per_second": 2.6694
     }
   },
   "tests/test_diffusers.py::GaudiStableDiffusionXLPipelineTester::test_stable_diffusion_xl_generation_throughput": {
@@ -72,6 +101,9 @@
     },
     "gaudi2": {
       "throughput": 0.301
+    },
+    "gaudi3": {
+      "throughput": 0.301
     }
   },
   "tests/test_diffusers.py::GaudiStableVideoDiffusionPipelineTester::test_stable_video_diffusion_no_throughput_regression_bf16": {
@@ -80,6 +112,9 @@
     },
     "gaudi2": {
       "throughput": 0.012
+    },
+    "gaudi3": {
+      "throughput": 0.012
     }
   },
   "tests/test_diffusers.py::I2VGenXLPipelineTests::test_i2vgen_xl_bf16": {
@@ -88,6 +123,9 @@
     },
     "gaudi2": {
       "throughput": 0.017
+    },
+    "gaudi3": {
+      "throughput": 0.017
     }
   },
   "tests/test_diffusers.py::StableDiffusionInpaintPipelineTests::test_stable_diffusion_inpaint_no_throughput_regression": {
@@ -96,6 +134,9 @@
     },
     "gaudi2": {
       "throughput": 1.025
+    },
+    "gaudi3": {
+      "throughput": 1.025
     }
   },
   "tests/test_diffusers.py::StableDiffusionXLInpaintPipelineTests::test_stable_diffusion_xl_inpaint_no_throughput_regression": {
@@ -104,6 +145,9 @@
     },
     "gaudi2": {
       "throughput": 0.175
+    },
+    "gaudi3": {
+      "throughput": 0.175
     }
   },
   "tests/test_diffusers.py::TrainControlNet::test_train_controlnet": {
@@ -114,6 +158,10 @@
     "gaudi2": {
       "train_runtime": 1.8647471838630736,
       "train_samples_per_second": 120.123522340414
+    },
+    "gaudi3": {
+      "train_runtime": 1.8647471838630736,
+      "train_samples_per_second": 120.123522340414
     }
   }
 }
\ No newline at end of file
diff --git a/tests/baselines/fixture/tests/test_encoder_decoder.json b/tests/baselines/fixture/tests/test_encoder_decoder.json
index 25c780e5dd..b4196a7766 100644
--- a/tests/baselines/fixture/tests/test_encoder_decoder.json
+++ b/tests/baselines/fixture/tests/test_encoder_decoder.json
@@ -7,6 +7,10 @@
     "gaudi2": {
       "predict_rougeLsum": 28.9801,
       "predict_samples_per_second": 4.339
+    },
+    "gaudi3": {
+      "predict_rougeLsum": 28.9801,
+      "predict_samples_per_second": 4.339
     }
   },
   "tests/test_encoder_decoder.py::TestEncoderDecoderModels::test_text_summarization_bf16[t5-3b-Habana/t5-2-1]": {
@@ -17,6 +21,10 @@
     "gaudi2": {
       "predict_rougeLsum": 21.8877,
       "predict_samples_per_second": 3.848
+    },
+    "gaudi3": {
+      "predict_rougeLsum": 21.8877,
+      "predict_samples_per_second": 3.848
     }
   },
   "tests/test_encoder_decoder.py::TestEncoderDecoderModels::test_text_translation_bf16[t5-small-Habana/t5-2-1]": {
@@ -27,6 +35,10 @@
     "gaudi2": {
       "predict_bleu": 11.7277,
       "predict_samples_per_second": 11.648
+    },
+    "gaudi3": {
+      "predict_bleu": 11.7277,
+      "predict_samples_per_second": 11.648
     }
   }
 }
\ No newline at end of file
diff --git a/tests/baselines/fixture/tests/test_examples.json b/tests/baselines/fixture/tests/test_examples.json
index d1820727f8..831b0e7dac 100644
--- a/tests/baselines/fixture/tests/test_examples.json
+++ b/tests/baselines/fixture/tests/test_examples.json
@@ -4,6 +4,11 @@
       "perplexity": 26.39,
       "train_runtime": 356.07,
       "train_samples_per_second": 14.06
+    },
+    "gaudi3": {
+      "perplexity": 26.39,
+      "train_runtime": 356.07,
+      "train_samples_per_second": 14.06
     }
   },
   "tests/test_examples.py::CausalLanguageModelingLORAExampleTester::test_run_lora_clm_llama-7b_single_card": {
@@ -16,6 +21,11 @@
       "perplexity": 3.8436,
       "train_runtime": 113.9713,
       "train_samples_per_second": 18.428
+    },
+    "gaudi3": {
+      "perplexity": 3.8436,
+      "train_runtime": 113.9713,
+      "train_samples_per_second": 18.428
     }
   },
   "tests/test_examples.py::DeepSpeedTextClassificationExampleTester::test_run_glue_LlamaGuard-7b_deepspeed": {
@@ -23,6 +33,11 @@
       "eval_f1": 0.8873483535528596,
       "train_runtime": 62.4539,
       "train_samples_per_second": 342.169
+    },
+    "gaudi3": {
+      "eval_f1": 0.8873483535528596,
+      "train_runtime": 62.4539,
+      "train_samples_per_second": 342.169
     }
   },
   "tests/test_examples.py::DeepspeedCausalLanguageModelingExampleTester::test_run_clm_CodeLlama-13b-Instruct-hf_deepspeed": {
@@ -30,6 +45,11 @@
       "perplexity": 6.877496628184696,
       "train_runtime": 542.2985,
       "train_samples_per_second": 18.789
+    },
+    "gaudi3": {
+      "perplexity": 6.877496628184696,
+      "train_runtime": 542.2985,
+      "train_samples_per_second": 18.789
     }
   },
   "tests/test_examples.py::DeepspeedCausalLanguageModelingExampleTester::test_run_clm_bloom-7b1_deepspeed": {
@@ -43,6 +63,11 @@
       "perplexity": 16.51629,
       "train_runtime": 445,
       "train_samples_per_second": 18.216
+    },
+    "gaudi3": {
+      "perplexity": 16.51629,
+      "train_runtime": 445,
+      "train_samples_per_second": 18.216
     }
   },
   "tests/test_examples.py::DeepspeedCausalLanguageModelingExampleTester::test_run_clm_gemma-2b-it_deepspeed": {
@@ -50,6 +75,11 @@
       "perplexity": 924.062,
       "train_runtime": 75.518,
       "train_samples_per_second": 81.097
+    },
+    "gaudi3": {
+      "perplexity": 924.062,
+      "train_runtime": 75.518,
+      "train_samples_per_second": 81.097
     }
   },
   "tests/test_examples.py::DeepspeedCausalLanguageModelingExampleTester::test_run_clm_gpt-neox-20b_deepspeed": {
@@ -57,6 +87,11 @@
       "perplexity": 8.169664686471043,
       "train_runtime": 445,
       "train_samples_per_second": 7.328
+    },
+    "gaudi3": {
+      "perplexity": 8.169664686471043,
+      "train_runtime": 445,
+      "train_samples_per_second": 7.328
     }
   },
   "tests/test_examples.py::DeepspeedCausalLanguageModelingExampleTester::test_run_clm_gpt2-xl_deepspeed": {
@@ -69,6 +104,11 @@
       "perplexity": 13.237754028004865,
       "train_runtime": 206.5775,
       "train_samples_per_second": 95.539
+    },
+    "gaudi3": {
+      "perplexity": 13.237754028004865,
+      "train_runtime": 206.5775,
+      "train_samples_per_second": 95.539
     }
   },
   "tests/test_examples.py::DeepspeedSFTExampleTester::test_sft_Qwen2-72B_deepspeed": {
@@ -76,6 +116,11 @@
       "perplexity": 3.7020898897918824,
       "train_runtime": 918.8018,
       "train_samples_per_second": 7.554
+    },
+    "gaudi3": {
+      "perplexity": 3.7020898897918824,
+      "train_runtime": 918.8018,
+      "train_samples_per_second": 7.554
     }
   },
   "tests/test_examples.py::DeepspeedSummarizationExampleTester::test_run_summarization_flan-t5-xxl_deepspeed": {
@@ -83,6 +128,11 @@
       "eval_rougeLsum": 29.308,
       "train_runtime": 155.86,
       "train_samples_per_second": 28.387
+    },
+    "gaudi3": {
+      "eval_rougeLsum": 29.308,
+      "train_runtime": 155.86,
+      "train_samples_per_second": 28.387
     }
   },
   "tests/test_examples.py::EagerModeCausalLanguageModelingExampleTester::test_run_clm_gemma-2b-it_single_card": {
@@ -90,6 +140,11 @@
       "perplexity": 26.69,
       "train_runtime": 560.8188,
       "train_samples_per_second": 8.597
+    },
+    "gaudi3": {
+      "perplexity": 26.69,
+      "train_runtime": 560.8188,
+      "train_samples_per_second": 8.597
     }
   },
   "tests/test_examples.py::ImageClassificationExampleTester::test_run_image_classification_swin-base-patch4-window7-224-in22k_single_card": {
@@ -102,6 +157,11 @@
       "eval_accuracy": 0.9850666666666666,
       "train_runtime": 77.8934,
       "train_samples_per_second": 826.766
+    },
+    "gaudi3": {
+      "eval_accuracy": 0.9850666666666666,
+      "train_runtime": 77.8934,
+      "train_samples_per_second": 826.766
     }
   },
   "tests/test_examples.py::ImageClassificationExampleTester::test_run_image_classification_vit-base-patch16-224-in21k_single_card": {
@@ -114,6 +174,11 @@
       "eval_accuracy": 0.9690666666666666,
       "train_runtime": 54.9734,
       "train_samples_per_second": 870.272
+    },
+    "gaudi3": {
+      "eval_accuracy": 0.9690666666666666,
+      "train_runtime": 54.9734,
+      "train_samples_per_second": 870.272
     }
   },
   "tests/test_examples.py::MultiCardAudioClassificationExampleTester::test_run_audio_classification_ast-finetuned-speech-commands-v2_multi_card": {
@@ -122,6 +187,12 @@
       "eval_samples_per_second": 2301.088,
       "train_runtime": 139.9477,
       "train_samples_per_second": 1955.74
+    },
+    "gaudi3": {
+      "eval_accuracy": 0.1871,
+      "eval_samples_per_second": 2301.088,
+      "train_runtime": 139.9477,
+      "train_samples_per_second": 1955.74
     }
   },
   "tests/test_examples.py::MultiCardAudioClassificationExampleTester::test_run_audio_classification_wav2vec2-base_multi_card": {
@@ -136,12 +207,22 @@
       "eval_samples_per_second": 3640.021,
       "train_runtime": 63.4079,
       "train_samples_per_second": 2975.844
+    },
+    "gaudi3": {
+      "eval_accuracy": 0.7228,
+      "eval_samples_per_second": 3640.021,
+      "train_runtime": 63.4079,
+      "train_samples_per_second": 2975.844
     }
   },
   "tests/test_examples.py::MultiCardBridgetowerExampleTester::test_run_bridgetower_bridgetower-large-itm-mlm-itc_multi_card": {
     "gaudi2": {
       "train_runtime": 224.42,
       "train_samples_per_second": 904.93
+    },
+    "gaudi3": {
+      "train_runtime": 224.42,
+      "train_samples_per_second": 904.93
     }
   },
   "tests/test_examples.py::MultiCardCausalLanguageModelingAdaloraExampleTester::test_run_lora_clm_llama-7b_multi_card": {
@@ -149,6 +230,11 @@
       "perplexity": 2.59,
       "train_runtime": 459,
       "train_samples_per_second": 107
+    },
+    "gaudi3": {
+      "perplexity": 2.59,
+      "train_runtime": 459,
+      "train_samples_per_second": 107
     }
   },
   "tests/test_examples.py::MultiCardCausalLanguageModelingExampleTester::test_run_clm_gemma-2b-it_multi_card": {
@@ -156,6 +242,11 @@
       "perplexity": 954.5995,
       "train_runtime": 82.6617,
       "train_samples_per_second": 94.524
+    },
+    "gaudi3": {
+      "perplexity": 954.5995,
+      "train_runtime": 82.6617,
+      "train_samples_per_second": 94.524
     }
   },
   "tests/test_examples.py::MultiCardCausalLanguageModelingIA3ExampleTester::test_run_lora_clm_llama-7b_multi_card": {
@@ -163,6 +254,11 @@
       "perplexity": 3.3,
       "train_runtime": 262.8,
       "train_samples_per_second": 161
+    },
+    "gaudi3": {
+      "perplexity": 3.3,
+      "train_runtime": 262.8,
+      "train_samples_per_second": 161
     }
   },
   "tests/test_examples.py::MultiCardCausalLanguageModelingLORAExampleTester2::test_run_lora_clm_falcon-40b_multi_card": {
@@ -170,6 +266,11 @@
       "perplexity": 1.6,
       "train_runtime": 710,
       "train_samples_per_second": 15.0
+    },
+    "gaudi3": {
+      "perplexity": 1.6,
+      "train_runtime": 710,
+      "train_samples_per_second": 15.0
     }
   },
   "tests/test_examples.py::MultiCardCausalLanguageModelingLORAExampleTester2::test_run_lora_clm_llama-7b_multi_card": {
@@ -177,6 +278,11 @@
       "perplexity": 2.3665,
       "train_runtime": 294.5707,
       "train_samples_per_second": 148.093
+    },
+    "gaudi3": {
+      "perplexity": 2.3665,
+      "train_runtime": 294.5707,
+      "train_samples_per_second": 148.093
     }
   },
   "tests/test_examples.py::MultiCardCausalLanguageModelingLORAExampleTester::test_run_lora_clm_falcon-40b_multi_card": {
@@ -184,6 +290,11 @@
       "perplexity": 4.0,
       "train_runtime": 550,
       "train_samples_per_second": 15.0
+    },
+    "gaudi3": {
+      "perplexity": 4.0,
+      "train_runtime": 550,
+      "train_samples_per_second": 15.0
     }
   },
   "tests/test_examples.py::MultiCardCausalLanguageModelingLORAExampleTester::test_run_lora_clm_llama-7b_multi_card": {
@@ -196,6 +307,11 @@
       "perplexity": 2.3665,
       "train_runtime": 294.5707,
       "train_samples_per_second": 148.093
+    },
+    "gaudi3": {
+      "perplexity": 2.3665,
+      "train_runtime": 294.5707,
+      "train_samples_per_second": 148.093
     }
   },
   "tests/test_examples.py::MultiCardCausalLanguageModelingLORAFSDPCompileExampleTester::test_run_lora_clm_llama-7b_multi_card": {
@@ -203,6 +319,11 @@
       "perplexity": 2.4259,
       "train_runtime": 186.2483,
       "train_samples_per_second": 93.5
+    },
+    "gaudi3": {
+      "perplexity": 2.4259,
+      "train_runtime": 186.2483,
+      "train_samples_per_second": 93.5
     }
   },
   "tests/test_examples.py::MultiCardCausalLanguageModelingLlamaAdapterExampleTester::test_run_lora_clm_llama-7b_multi_card": {
@@ -210,6 +331,11 @@
       "perplexity": 5.575,
       "train_runtime": 131.7,
       "train_samples_per_second": 294
+    },
+    "gaudi3": {
+      "perplexity": 5.575,
+      "train_runtime": 131.7,
+      "train_samples_per_second": 294
     }
   },
   "tests/test_examples.py::MultiCardCausalLanguageModelingLnExampleTester::test_run_lora_clm_llama-7b_multi_card": {
@@ -217,6 +343,11 @@
       "perplexity": 2.83,
       "train_runtime": 249,
       "train_samples_per_second": 165
+    },
+    "gaudi3": {
+      "perplexity": 2.83,
+      "train_runtime": 249,
+      "train_samples_per_second": 165
     }
   },
   "tests/test_examples.py::MultiCardCausalLanguageModelingLoRACPExampleTester::test_run_lora_clm_llama-7b_deepspeed": {
@@ -224,6 +355,11 @@
       "perplexity": 2.8889,
       "train_runtime": 147.3597,
       "train_samples_per_second": 34.41
+    },
+    "gaudi3": {
+      "perplexity": 2.8889,
+      "train_runtime": 147.3597,
+      "train_samples_per_second": 34.41
     }
   },
   "tests/test_examples.py::MultiCardCausalLanguageModelingLoRAFP8ExampleTester::test_run_lora_clm_llama-7b_multi_card": {
@@ -231,6 +367,11 @@
       "perplexity": 2.3692,
       "train_runtime": 411.9935,
       "train_samples_per_second": 232.439
+    },
+    "gaudi3": {
+      "perplexity": 2.3692,
+      "train_runtime": 411.9935,
+      "train_samples_per_second": 232.439
     }
   },
   "tests/test_examples.py::MultiCardCausalLanguageModelingPTuningExampleTester::test_run_prompt_tuning_clm_llama-7b_multi_card": {
@@ -238,6 +379,11 @@
       "perplexity": 1.047,
       "train_runtime": 18.7,
       "train_samples_per_second": 63.161
+    },
+    "gaudi3": {
+      "perplexity": 1.047,
+      "train_runtime": 18.7,
+      "train_samples_per_second": 63.161
     }
   },
   "tests/test_examples.py::MultiCardCausalLanguageModelingPrefixTuningExampleTester::test_run_prompt_tuning_clm_llama-7b_multi_card": {
@@ -245,6 +391,11 @@
       "perplexity": 1.172,
       "train_runtime": 16.1,
       "train_samples_per_second": 63.249
+    },
+    "gaudi3": {
+      "perplexity": 1.172,
+      "train_runtime": 16.1,
+      "train_samples_per_second": 63.249
     }
   },
   "tests/test_examples.py::MultiCardCausalLanguageModelingPromptTuningExampleTester::test_run_prompt_tuning_clm_llama-7b_multi_card": {
@@ -252,6 +403,11 @@
       "perplexity": 1.224,
       "train_runtime": 16.5,
       "train_samples_per_second": 63.161
+    },
+    "gaudi3": {
+      "perplexity": 1.224,
+      "train_runtime": 16.5,
+      "train_samples_per_second": 63.161
     }
   },
   "tests/test_examples.py::MultiCardCausalLanguageModelingVeraExampleTester::test_run_lora_clm_llama-7b_multi_card": {
@@ -259,12 +415,21 @@
       "perplexity": 9.064502567217577,
       "train_runtime": 312.9258,
       "train_samples_per_second": 127.305
+    },
+    "gaudi3": {
+      "perplexity": 9.064502567217577,
+      "train_runtime": 312.9258,
+      "train_samples_per_second": 127.305
     }
   },
   "tests/test_examples.py::MultiCardDPOExampleTester::test_dpo_llama-7b_multi_card": {
     "gaudi2": {
       "train_runtime": 234.6471,
       "train_samples_per_second": 13.499
+    },
+    "gaudi3": {
+      "train_runtime": 234.6471,
+      "train_samples_per_second": 13.499
     }
   },
   "tests/test_examples.py::MultiCardImageClassificationExampleTester::test_run_image_classification_swin-base-patch4-window7-224-in22k_multi_card": {
@@ -277,6 +442,11 @@
       "eval_accuracy": 0.9821,
       "train_runtime": 62.9986,
       "train_samples_per_second": 6202.525
+    },
+    "gaudi3": {
+      "eval_accuracy": 0.9821,
+      "train_runtime": 62.9986,
+      "train_samples_per_second": 6202.525
     }
   },
   "tests/test_examples.py::MultiCardImageClassificationExampleTester::test_run_image_classification_vit-base-patch16-224-in21k_multi_card": {
@@ -289,6 +459,11 @@
       "eval_accuracy": 0.9679,
       "train_runtime": 23.99,
       "train_samples_per_second": 6718.643
+    },
+    "gaudi3": {
+      "eval_accuracy": 0.9679,
+      "train_runtime": 23.99,
+      "train_samples_per_second": 6718.643
     }
   },
   "tests/test_examples.py::MultiCardImageToTextModelingLoRAExampleTester::test_run_image2text_lora_finetune_Llama-3.2-11B-Vision-Instruct_multi_card": {
@@ -296,6 +471,11 @@
       "eval_accuracy": 0.6,
       "train_runtime": 350,
       "train_samples_per_second": 20.48
+    },
+    "gaudi3": {
+      "eval_accuracy": 0.6,
+      "train_runtime": 350,
+      "train_samples_per_second": 20.48
     }
   },
   "tests/test_examples.py::MultiCardImageToTextModelingLoRAExampleTester::test_run_image2text_lora_finetune_idefics2-8b_multi_card": {
@@ -303,6 +483,11 @@
       "eval_accuracy": 0.6,
       "train_runtime": 286,
       "train_samples_per_second": 11.8
+    },
+    "gaudi3": {
+      "eval_accuracy": 0.6,
+      "train_runtime": 286,
+      "train_samples_per_second": 11.8
     }
   },
   "tests/test_examples.py::MultiCardImageToTextModelingLoRAExampleTester::test_run_image2text_lora_finetune_llava-1.5-7b-hf_multi_card": {
@@ -310,6 +495,11 @@
       "eval_accuracy": 0.2122,
       "train_runtime": 118.5782,
       "train_samples_per_second": 25.146
+    },
+    "gaudi3": {
+      "eval_accuracy": 0.2122,
+      "train_runtime": 118.5782,
+      "train_samples_per_second": 25.146
     }
   },
   "tests/test_examples.py::MultiCardMaskedLanguageModelingExampleTester::test_run_mlm_roberta-large_multi_card": {
@@ -322,12 +512,21 @@
       "perplexity": 2.829522488584474,
       "train_runtime": 22.7101,
       "train_samples_per_second": 1056.875
+    },
+    "gaudi3": {
+      "perplexity": 2.829522488584474,
+      "train_runtime": 22.7101,
+      "train_samples_per_second": 1056.875
     }
   },
   "tests/test_examples.py::MultiCardPPOExampleTester::test_ppo_llama-7b_multi_card": {
     "gaudi2": {
       "train_runtime": 62,
       "train_samples_per_second": 0.5
+    },
+    "gaudi3": {
+      "train_runtime": 62,
+      "train_samples_per_second": 0.5
     }
   },
   "tests/test_examples.py::MultiCardProteinFoldingClassificationTester::test_run_sequence_classification_protst-esm1b-for-sequential-classification_multi_card": {
@@ -335,6 +534,11 @@
       "eval_accuracy": 0.5436668594563332,
       "train_runtime": 38.9504,
       "train_samples_per_second": 768.648
+    },
+    "gaudi3": {
+      "eval_accuracy": 0.5436668594563332,
+      "train_runtime": 38.9504,
+      "train_samples_per_second": 768.648
     }
   },
   "tests/test_examples.py::MultiCardQuestionAnsweringExampleTester::test_run_qa_roberta-large_multi_card": {
@@ -347,30 +551,51 @@
       "eval_f1": 94.09,
       "train_runtime": 79.333,
       "train_samples_per_second": 2138.366
+    },
+    "gaudi3": {
+      "eval_f1": 94.09,
+      "train_runtime": 79.333,
+      "train_samples_per_second": 2138.366
     }
   },
   "tests/test_examples.py::MultiCardRewardExampleTester::test_reward_modeling_llama-7b_multi_card": {
     "gaudi2": {
       "train_runtime": 250,
       "train_samples_per_second": 1.6
+    },
+    "gaudi3": {
+      "train_runtime": 250,
+      "train_samples_per_second": 1.6
     }
   },
   "tests/test_examples.py::MultiCardSFTChatExampleTester::test_sft_Qwen2-7B_multi_card": {
     "gaudi2": {
       "train_runtime": 423.995,
       "train_samples_per_second": 7.342
+    },
+    "gaudi3": {
+      "train_runtime": 423.995,
+      "train_samples_per_second": 7.342
     }
   },
   "tests/test_examples.py::MultiCardSFTChatPeftExampleTester::test_sft_Qwen2-7B_multi_card": {
     "gaudi2": {
       "train_runtime": 410,
       "train_samples_per_second": 120
+    },
+    "gaudi3": {
+      "train_runtime": 410,
+      "train_samples_per_second": 120
     }
   },
   "tests/test_examples.py::MultiCardSFTExampleTester::test_sft_llama-7b_multi_card": {
     "gaudi2": {
       "train_runtime": 206,
       "train_samples_per_second": 51.54
+    },
+    "gaudi3": {
+      "train_runtime": 206,
+      "train_samples_per_second": 51.54
     }
   },
   "tests/test_examples.py::MultiCardSeq2SeqSpeechRecognitionExampleTester::test_run_speech_recognition_seq2seq_whisper-small_multi_card": {
@@ -385,6 +610,12 @@
       "eval_wer": 0.4693843594009983,
       "train_runtime": 380.0,
       "train_samples_per_second": 218.0
+    },
+    "gaudi3": {
+      "eval_samples_per_second": 31.0,
+      "eval_wer": 0.4693843594009983,
+      "train_runtime": 380.0,
+      "train_samples_per_second": 218.0
     }
   },
   "tests/test_examples.py::MultiCardSpeechRecognitionExampleTester::test_run_speech_recognition_ctc_wav2vec2-large-lv60_multi_card": {
@@ -399,6 +630,12 @@
       "eval_wer": 0.1109,
       "train_runtime": 308.8036,
       "train_samples_per_second": 225.572
+    },
+    "gaudi3": {
+      "eval_samples_per_second": 196.665,
+      "eval_wer": 0.1109,
+      "train_runtime": 308.8036,
+      "train_samples_per_second": 225.572
     }
   },
   "tests/test_examples.py::MultiCardTextClassificationExampleTester::test_run_glue_bert-large-uncased-whole-word-masking_multi_card": {
@@ -411,6 +648,11 @@
       "eval_f1": 0.8452579034941764,
       "train_runtime": 31.445,
       "train_samples_per_second": 2845.068
+    },
+    "gaudi3": {
+      "eval_f1": 0.8452579034941764,
+      "train_runtime": 31.445,
+      "train_samples_per_second": 2845.068
     }
   },
   "tests/test_examples.py::QuestionAnsweringExampleTester::test_run_qa_roberta-large_single_card": {
@@ -423,6 +665,11 @@
       "eval_f1": 94.5886,
       "train_runtime": 361.4789,
       "train_samples_per_second": 266.47
+    },
+    "gaudi3": {
+      "eval_f1": 94.5886,
+      "train_runtime": 361.4789,
+      "train_samples_per_second": 266.47
     }
   },
   "tests/test_examples.py::TextClassificationExampleTester::test_run_glue_bert-large-uncased-whole-word-masking_single_card": {
@@ -435,6 +682,11 @@
       "eval_f1": 0.867,
       "train_runtime": 33.2909,
       "train_samples_per_second": 1100.598
+    },
+    "gaudi3": {
+      "eval_f1": 0.867,
+      "train_runtime": 33.2909,
+      "train_samples_per_second": 1100.598
     }
   }
 }
\ No newline at end of file
diff --git a/tests/baselines/fixture/tests/test_feature_extraction.json b/tests/baselines/fixture/tests/test_feature_extraction.json
index bf336f6c17..8293e8fed7 100644
--- a/tests/baselines/fixture/tests/test_feature_extraction.json
+++ b/tests/baselines/fixture/tests/test_feature_extraction.json
@@ -5,6 +5,9 @@
     },
     "gaudi2": {
       "time_per_iter": 0.6812
+    },
+    "gaudi3": {
+      "time_per_iter": 0.6812
     }
   }
 }
\ No newline at end of file
diff --git a/tests/baselines/fixture/tests/test_fp8_examples.json b/tests/baselines/fixture/tests/test_fp8_examples.json
index 43aa371fa1..0487cbc1e8 100644
--- a/tests/baselines/fixture/tests/test_fp8_examples.json
+++ b/tests/baselines/fixture/tests/test_fp8_examples.json
@@ -3,6 +3,10 @@
     "gaudi2": {
       "eval_accuracy": 0.7538,
       "train_samples_per_second": 12.373
+    },
+    "gaudi3": {
+      "eval_accuracy": 0.7538,
+      "train_samples_per_second": 12.373
     }
   }
 }
\ No newline at end of file
diff --git a/tests/baselines/fixture/tests/test_fsdp_examples.json b/tests/baselines/fixture/tests/test_fsdp_examples.json
index 834ecba8a6..b9e17c7354 100644
--- a/tests/baselines/fixture/tests/test_fsdp_examples.json
+++ b/tests/baselines/fixture/tests/test_fsdp_examples.json
@@ -3,12 +3,20 @@
     "gaudi2": {
       "eval_f1": 85.7077,
       "train_samples_per_second": 2983.533
+    },
+    "gaudi3": {
+      "eval_f1": 85.7077,
+      "train_samples_per_second": 2983.533
     }
   },
   "tests/test_fsdp_examples.py::test_fsdp_bf16[meta-llama/Llama-2-7b-hf--language-modeling-8-8-run_lora_clm.py-auto_wrap]": {
     "gaudi2": {
       "train_loss": 0.9093,
       "train_samples_per_second": 85.016
+    },
+    "gaudi3": {
+      "train_loss": 0.9093,
+      "train_samples_per_second": 85.016
     }
   }
 }
\ No newline at end of file
diff --git a/tests/baselines/fixture/tests/test_image_classification.json b/tests/baselines/fixture/tests/test_image_classification.json
index 28868221e1..cc903834f8 100644
--- a/tests/baselines/fixture/tests/test_image_classification.json
+++ b/tests/baselines/fixture/tests/test_image_classification.json
@@ -2,6 +2,9 @@
   "tests/test_image_classification.py::GaudiFastViTTester::test_no_latency_regression_autocast": {
     "gaudi2": {
       "latency": 2.527062664031982
+    },
+    "gaudi3": {
+      "latency": 2.527062664031982
     }
   }
 }
\ No newline at end of file
diff --git a/tests/baselines/fixture/tests/test_image_segmentation.json b/tests/baselines/fixture/tests/test_image_segmentation.json
index 87a0523de8..dbec2bf555 100644
--- a/tests/baselines/fixture/tests/test_image_segmentation.json
+++ b/tests/baselines/fixture/tests/test_image_segmentation.json
@@ -2,6 +2,9 @@
   "tests/test_image_segmentation.py::GaudiSAMTester::test_no_latency_regression_bf16": {
     "gaudi2": {
       "latency": 98.92215728759766
+    },
+    "gaudi3": {
+      "latency": 98.92215728759766
     }
   }
 }
\ No newline at end of file
diff --git a/tests/baselines/fixture/tests/test_image_to_text_example.json b/tests/baselines/fixture/tests/test_image_to_text_example.json
index d9bab43d39..e95c6d88d8 100644
--- a/tests/baselines/fixture/tests/test_image_to_text_example.json
+++ b/tests/baselines/fixture/tests/test_image_to_text_example.json
@@ -2,21 +2,33 @@
   "tests/test_image_to_text_example.py::test_image_to_text_bf16[HuggingFaceM4/idefics2-8b-1]": {
     "gaudi2": {
       "throughput": 21.89944593215077
+    },
+    "gaudi3": {
+      "throughput": 21.89944593215077
     }
   },
   "tests/test_image_to_text_example.py::test_image_to_text_bf16[Qwen/Qwen2-VL-2B-Instruct-1]": {
     "gaudi2": {
       "throughput": 28.755882208438422
+    },
+    "gaudi3": {
+      "throughput": 28.755882208438422
     }
   },
   "tests/test_image_to_text_example.py::test_image_to_text_bf16[Qwen/Qwen2-VL-7B-Instruct-1]": {
     "gaudi2": {
       "throughput": 19.32562189532818
+    },
+    "gaudi3": {
+      "throughput": 19.32562189532818
     }
   },
   "tests/test_image_to_text_example.py::test_image_to_text_bf16[google/paligemma-3b-mix-224-1]": {
     "gaudi2": {
       "throughput": 132.8949150246155
+    },
+    "gaudi3": {
+      "throughput": 132.8949150246155
     }
   },
   "tests/test_image_to_text_example.py::test_image_to_text_bf16[llava-hf/llava-1.5-13b-hf-1]": {
@@ -25,6 +37,9 @@
     },
     "gaudi2": {
       "throughput": 48.54364937033955
+    },
+    "gaudi3": {
+      "throughput": 48.54364937033955
     }
   },
   "tests/test_image_to_text_example.py::test_image_to_text_bf16[llava-hf/llava-1.5-7b-hf-1]": {
@@ -33,6 +48,9 @@
     },
     "gaudi2": {
       "throughput": 77.98733740859008
+    },
+    "gaudi3": {
+      "throughput": 77.98733740859008
     }
   },
   "tests/test_image_to_text_example.py::test_image_to_text_bf16[llava-hf/llava-v1.6-mistral-7b-hf-1]": {
@@ -41,6 +59,9 @@
     },
     "gaudi2": {
       "throughput": 33.17984878151546
+    },
+    "gaudi3": {
+      "throughput": 33.17984878151546
     }
   },
   "tests/test_image_to_text_example.py::test_image_to_text_bf16[llava-hf/llava-v1.6-vicuna-13b-hf-1]": {
@@ -49,46 +70,73 @@
     },
     "gaudi2": {
       "throughput": 23.527610042925
+    },
+    "gaudi3": {
+      "throughput": 23.527610042925
     }
   },
   "tests/test_image_to_text_example.py::test_image_to_text_bf16[llava-hf/llava-v1.6-vicuna-7b-hf-1]": {
     "gaudi2": {
       "throughput": 35.00608681379742
+    },
+    "gaudi3": {
+      "throughput": 35.00608681379742
     }
   },
   "tests/test_image_to_text_example.py::test_image_to_text_bf16[meta-llama/Llama-3.2-11B-Vision-Instruct-1]": {
     "gaudi2": {
       "throughput": 18.974541922240313
+    },
+    "gaudi3": {
+      "throughput": 18.974541922240313
     }
   },
   "tests/test_image_to_text_example.py::test_image_to_text_bf16[tiiuae/falcon-11B-vlm-1]": {
     "gaudi2": {
       "throughput": 23.69260849957278
+    },
+    "gaudi3": {
+      "throughput": 23.69260849957278
     }
   },
   "tests/test_image_to_text_example.py::test_image_to_text_fp8[llava-hf/llava-1.5-13b-hf-1]": {
     "gaudi2": {
       "throughput": 67.20488222876344
+    },
+    "gaudi3": {
+      "throughput": 67.20488222876344
     }
   },
   "tests/test_image_to_text_example.py::test_image_to_text_fp8[llava-hf/llava-1.5-7b-hf-1]": {
     "gaudi2": {
       "throughput": 98.72578382705062
+    },
+    "gaudi3": {
+      "throughput": 98.72578382705062
     }
   },
   "tests/test_image_to_text_example.py::test_image_to_text_fp8[llava-hf/llava-v1.6-mistral-7b-hf-1]": {
     "gaudi2": {
       "throughput": 45.011551008367086
+    },
+    "gaudi3": {
+      "throughput": 45.011551008367086
     }
   },
   "tests/test_image_to_text_example.py::test_image_to_text_fp8[llava-hf/llava-v1.6-vicuna-13b-hf-1]": {
     "gaudi2": {
       "throughput": 30.9535718774675
+    },
+    "gaudi3": {
+      "throughput": 30.9535718774675
     }
   },
   "tests/test_image_to_text_example.py::test_image_to_text_fp8[llava-hf/llava-v1.6-vicuna-7b-hf-1]": {
     "gaudi2": {
       "throughput": 45.18544502949674
+    },
+    "gaudi3": {
+      "throughput": 45.18544502949674
     }
   }
 }
\ No newline at end of file
diff --git a/tests/baselines/fixture/tests/test_object_detection.json b/tests/baselines/fixture/tests/test_object_detection.json
index 176a27036a..c1c93b6c52 100644
--- a/tests/baselines/fixture/tests/test_object_detection.json
+++ b/tests/baselines/fixture/tests/test_object_detection.json
@@ -5,6 +5,9 @@
     },
     "gaudi2": {
       "latency": 7.0
+    },
+    "gaudi3": {
+      "latency": 7.0
     }
   },
   "tests/test_object_detection.py::GaudiDetrResnet50_Tester::test_no_latency_regression_autocast": {
@@ -13,6 +16,9 @@
     },
     "gaudi2": {
       "latency": 7.0
+    },
+    "gaudi3": {
+      "latency": 7.0
     }
   }
 }
\ No newline at end of file
diff --git a/tests/baselines/fixture/tests/test_object_segmentation.json b/tests/baselines/fixture/tests/test_object_segmentation.json
index 87b9ac28dc..65ae50ea0f 100644
--- a/tests/baselines/fixture/tests/test_object_segmentation.json
+++ b/tests/baselines/fixture/tests/test_object_segmentation.json
@@ -2,6 +2,9 @@
   "tests/test_object_segmentation.py::GaudiClipSegTester::test_no_latency_regression_autocast": {
     "gaudi2": {
       "latency": 5.3107380867004395
+    },
+    "gaudi3": {
+      "latency": 5.3107380867004395
     }
   }
 }
\ No newline at end of file
diff --git a/tests/baselines/fixture/tests/test_openclip_vqa.json b/tests/baselines/fixture/tests/test_openclip_vqa.json
index 91f9d7d601..2daee462ac 100644
--- a/tests/baselines/fixture/tests/test_openclip_vqa.json
+++ b/tests/baselines/fixture/tests/test_openclip_vqa.json
@@ -5,6 +5,9 @@
     },
     "gaudi2": {
       "throughput": 1472
+    },
+    "gaudi3": {
+      "throughput": 1472
     }
   },
   "tests/test_openclip_vqa.py::test_openclip_vqa_bf16[microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224]": {
@@ -13,6 +16,9 @@
     },
     "gaudi2": {
       "throughput": 1816
+    },
+    "gaudi3": {
+      "throughput": 1816
     }
   }
 }
\ No newline at end of file
diff --git a/tests/baselines/fixture/tests/test_sentence_transformers.json b/tests/baselines/fixture/tests/test_sentence_transformers.json
index 23f4f6af97..dfa5753e50 100644
--- a/tests/baselines/fixture/tests/test_sentence_transformers.json
+++ b/tests/baselines/fixture/tests/test_sentence_transformers.json
@@ -5,6 +5,9 @@
     },
     "gaudi2": {
       "measured_throughput": 3614.2610109716247
+    },
+    "gaudi3": {
+      "measured_throughput": 3614.2610109716247
     }
   },
   "tests/test_sentence_transformers.py::test_compute_embeddings_throughput[sentence-transformers/all-MiniLM-L6-v2]": {
@@ -13,6 +16,9 @@
     },
     "gaudi2": {
       "measured_throughput": 2615.6975354038477
+    },
+    "gaudi3": {
+      "measured_throughput": 2615.6975354038477
     }
   },
   "tests/test_sentence_transformers.py::test_compute_embeddings_throughput[sentence-transformers/all-distilroberta-v1]": {
@@ -21,6 +27,9 @@
     },
     "gaudi2": {
       "measured_throughput": 958.5097903298335
+    },
+    "gaudi3": {
+      "measured_throughput": 958.5097903298335
     }
   },
   "tests/test_sentence_transformers.py::test_compute_embeddings_throughput[sentence-transformers/all-mpnet-base-v2]": {
@@ -29,6 +38,9 @@
     },
     "gaudi2": {
       "measured_throughput": 762.5595168883357
+    },
+    "gaudi3": {
+      "measured_throughput": 762.5595168883357
     }
   },
   "tests/test_sentence_transformers.py::test_compute_embeddings_throughput[sentence-transformers/distiluse-base-multilingual-cased-v1]": {
@@ -37,6 +49,9 @@
     },
     "gaudi2": {
       "measured_throughput": 3487.3319366004903
+    },
+    "gaudi3": {
+      "measured_throughput": 3487.3319366004903
     }
   },
   "tests/test_sentence_transformers.py::test_compute_embeddings_throughput[sentence-transformers/distiluse-base-multilingual-cased-v2]": {
@@ -45,6 +60,9 @@
     },
     "gaudi2": {
       "measured_throughput": 3807.2486282025716
+    },
+    "gaudi3": {
+      "measured_throughput": 3807.2486282025716
     }
   },
   "tests/test_sentence_transformers.py::test_compute_embeddings_throughput[sentence-transformers/multi-qa-MiniLM-L6-cos-v1]": {
@@ -53,6 +71,9 @@
     },
     "gaudi2": {
       "measured_throughput": 1208.3672807492396
+    },
+    "gaudi3": {
+      "measured_throughput": 1208.3672807492396
     }
   },
   "tests/test_sentence_transformers.py::test_compute_embeddings_throughput[sentence-transformers/multi-qa-distilbert-cos-v1]": {
@@ -61,6 +82,9 @@
     },
     "gaudi2": {
       "measured_throughput": 944.6166139694299
+    },
+    "gaudi3": {
+      "measured_throughput": 944.6166139694299
     }
   },
   "tests/test_sentence_transformers.py::test_compute_embeddings_throughput[sentence-transformers/multi-qa-mpnet-base-dot-v1]": {
@@ -69,6 +93,9 @@
     },
     "gaudi2": {
       "measured_throughput": 545.3360251829846
+    },
+    "gaudi3": {
+      "measured_throughput": 545.3360251829846
     }
   },
   "tests/test_sentence_transformers.py::test_compute_embeddings_throughput[sentence-transformers/paraphrase-MiniLM-L3-v2]": {
@@ -77,6 +104,9 @@
     },
     "gaudi2": {
       "measured_throughput": 5734.318427972881
+    },
+    "gaudi3": {
+      "measured_throughput": 5734.318427972881
     }
   },
   "tests/test_sentence_transformers.py::test_compute_embeddings_throughput[sentence-transformers/paraphrase-albert-small-v2]": {
@@ -85,6 +115,9 @@
     },
     "gaudi2": {
       "measured_throughput": 3896.1911011860166
+    },
+    "gaudi3": {
+      "measured_throughput": 3896.1911011860166
     }
   },
   "tests/test_sentence_transformers.py::test_compute_embeddings_throughput[sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2]": {
@@ -93,6 +126,9 @@
     },
     "gaudi2": {
       "measured_throughput": 3558.0778715789693
+    },
+    "gaudi3": {
+      "measured_throughput": 3558.0778715789693
     }
   },
   "tests/test_sentence_transformers.py::test_compute_embeddings_throughput[sentence-transformers/paraphrase-multilingual-mpnet-base-v2]": {
@@ -101,6 +137,9 @@
     },
     "gaudi2": {
       "measured_throughput": 2392.1654748794062
+    },
+    "gaudi3": {
+      "measured_throughput": 2392.1654748794062
     }
   }
 }
\ No newline at end of file
diff --git a/tests/baselines/fixture/tests/test_table_transformer.json b/tests/baselines/fixture/tests/test_table_transformer.json
index 7e1b6cee61..873efedbe6 100644
--- a/tests/baselines/fixture/tests/test_table_transformer.json
+++ b/tests/baselines/fixture/tests/test_table_transformer.json
@@ -5,6 +5,9 @@
     },
     "gaudi2": {
       "latency": 2.2
+    },
+    "gaudi3": {
+      "latency": 2.2
     }
   }
 }
\ No newline at end of file
diff --git a/tests/baselines/fixture/tests/test_text_generation_example.json b/tests/baselines/fixture/tests/test_text_generation_example.json
index de9b3f1014..2915b129e1 100644
--- a/tests/baselines/fixture/tests/test_text_generation_example.json
+++ b/tests/baselines/fixture/tests/test_text_generation_example.json
@@ -2,26 +2,41 @@
   "tests/test_text_generation_example.py::test_text_generation_awq[TheBloke/Llama-2-7b-Chat-AWQ-1-10-False-128-2048]": {
     "gaudi2": {
       "throughput": 456.7
+    },
+    "gaudi3": {
+      "throughput": 456.7
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_beam_search[Qwen/Qwen2-7b-Instruct-1-True]": {
     "gaudi2": {
       "throughput": 91.24938949709826
+    },
+    "gaudi3": {
+      "throughput": 91.24938949709826
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_bf16_1x[CohereForAI/c4ai-command-r-v01-1-False-False]": {
     "gaudi2": {
       "throughput": 29.50315234651154
+    },
+    "gaudi3": {
+      "throughput": 29.50315234651154
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_bf16_1x[Deci/DeciLM-7B-1-False-False]": {
     "gaudi2": {
       "throughput": 115
+    },
+    "gaudi3": {
+      "throughput": 115
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_bf16_1x[EleutherAI/gpt-j-6b-1-False-False]": {
     "gaudi2": {
       "throughput": 160.5823842101192
+    },
+    "gaudi3": {
+      "throughput": 160.5823842101192
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_bf16_1x[EleutherAI/gpt-j-6b-1-True-False]": {
@@ -32,11 +47,17 @@
   "tests/test_text_generation_example.py::test_text_generation_bf16_1x[EleutherAI/gpt-neo-2.7B-1-False-False]": {
     "gaudi2": {
       "throughput": 257.2476416844122
+    },
+    "gaudi3": {
+      "throughput": 257.2476416844122
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_bf16_1x[EleutherAI/gpt-neox-20b-1-False-False]": {
     "gaudi2": {
       "throughput": 50.67672679310354
+    },
+    "gaudi3": {
+      "throughput": 50.67672679310354
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_bf16_1x[Qwen/Qwen1.5-7B-1-False-False]": {
@@ -47,22 +68,35 @@
   "tests/test_text_generation_example.py::test_text_generation_bf16_1x[Qwen/Qwen1.5-7B-4-False-False]": {
     "gaudi2": {
       "throughput": 490.8621617893209
+    },
+    "gaudi3": {
+      "throughput": 490.8621617893209
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_bf16_1x[Qwen/Qwen1.5-MoE-A2.7B-1-True-False]": {
     "gaudi2": {
       "throughput": 44.25834541569395
+    },
+    "gaudi3": {
+      "throughput": 44.25834541569395
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_bf16_1x[Qwen/Qwen2-7B-256-False-True]": {
     "gaudi2": {
       "output": "DeepSpeed is a machine learning framework that provides a unified interface for training deep learning models. It is designed to be easy to use and to provide high performance. DeepSpeed is built on top of PyTorch and TensorFlow, and it supports a wide range of models, including transformers, convolutional neural networks, and recurrent neural networks.\nDeepSpeed is a machine learning framework that provides a unified interface for training deep learning models. It is designed to be easy to use and to provide high performance. DeepSpeed is built on top of Py",
       "throughput": 8870.945160540245
+    },
+    "gaudi3": {
+      "output": "DeepSpeed is a machine learning framework that provides a unified interface for training deep learning models. It is designed to be easy to use and to provide high performance. DeepSpeed is built on top of PyTorch and TensorFlow, and it supports a wide range of models, including transformers, convolutional neural networks, and recurrent neural networks.\nDeepSpeed is a machine learning framework that provides a unified interface for training deep learning models. It is designed to be easy to use and to provide high performance. DeepSpeed is built on top of Py",
+      "throughput": 8870.945160540245
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_bf16_1x[Qwen/Qwen2.5-7B-4-False-False]": {
     "gaudi2": {
       "throughput": 490
+    },
+    "gaudi3": {
+      "throughput": 490
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_bf16_1x[Salesforce/codegen2-1B-1-False-False]": {
@@ -71,16 +105,25 @@
     },
     "gaudi2": {
       "throughput": 446.4029486883532
+    },
+    "gaudi3": {
+      "throughput": 446.4029486883532
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_bf16_1x[THUDM/chatglm2-6b-1-True-False]": {
     "gaudi2": {
       "throughput": 150
+    },
+    "gaudi3": {
+      "throughput": 150
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_bf16_1x[THUDM/chatglm3-6b-1-True-False]": {
     "gaudi2": {
       "throughput": 150
+    },
+    "gaudi3": {
+      "throughput": 150
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_bf16_1x[adept/persimmon-8b-base-1-False-False]": {
@@ -91,16 +134,25 @@
   "tests/test_text_generation_example.py::test_text_generation_bf16_1x[adept/persimmon-8b-base-4-False-False]": {
     "gaudi2": {
       "throughput": 366.73968820698406
+    },
+    "gaudi3": {
+      "throughput": 366.73968820698406
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_bf16_1x[baichuan-inc/Baichuan2-13B-Chat-1-False-False]": {
     "gaudi2": {
       "throughput": 66
+    },
+    "gaudi3": {
+      "throughput": 66
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_bf16_1x[baichuan-inc/Baichuan2-7B-Chat-1-True-False]": {
     "gaudi2": {
       "throughput": 108
+    },
+    "gaudi3": {
+      "throughput": 108
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_bf16_1x[bigcode/starcoder-1-False-False]": {
@@ -112,6 +164,10 @@
     "gaudi2": {
       "output": "def print_hello_world():\n    print(\"Hello World\")\n\ndef print_hello_world_twice():\n    print_hello_world()\n    print_hello_world()\n\ndef print_hello_world_thrice():\n    print_hello_world()\n    print_hello_world()\n    print_hello_world()\n\ndef print_hello_world_four_times():\n    print_hello_world()\n    print_hello_world()\n    print_hello_world()\n   ",
       "throughput": 6846.575763562658
+    },
+    "gaudi3": {
+      "output": "def print_hello_world():\n    print(\"Hello World\")\n\ndef print_hello_world_twice():\n    print_hello_world()\n    print_hello_world()\n\ndef print_hello_world_thrice():\n    print_hello_world()\n    print_hello_world()\n    print_hello_world()\n\ndef print_hello_world_four_times():\n    print_hello_world()\n    print_hello_world()\n    print_hello_world()\n   ",
+      "throughput": 6846.575763562658
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_bf16_1x[bigcode/starcoder2-3b-1-False-False]": {
@@ -123,6 +179,10 @@
     "gaudi2": {
       "output": "def print_hello_world():\n    print(\"Hello World\")\n\ndef print_hello_world_with_name(name):\n    print(\"Hello World, \" + name)\n\ndef print_hello_world_with_name_and_age(name, age):\n    print(\"Hello World, \" + name + \", \" + str(age))\n\ndef print_hello_world_with_name_and_age_and_gender(name, age, gender):\n    print(\"Hello",
       "throughput": 261.07213776344133
+    },
+    "gaudi3": {
+      "output": "def print_hello_world():\n    print(\"Hello World\")\n\ndef print_hello_world_with_name(name):\n    print(\"Hello World, \" + name)\n\ndef print_hello_world_with_name_and_age(name, age):\n    print(\"Hello World, \" + name + \", \" + str(age))\n\ndef print_hello_world_with_name_and_age_and_gender(name, age, gender):\n    print(\"Hello",
+      "throughput": 261.07213776344133
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_bf16_1x[bigscience/bloomz-7b1-1-False-False]": {
@@ -131,33 +191,53 @@
     },
     "gaudi2": {
       "throughput": 130.0472971205316
+    },
+    "gaudi3": {
+      "throughput": 130.0472971205316
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_bf16_1x[codellama/CodeLlama-34b-hf-1-True-False]": {
     "gaudi2": {
       "throughput": 32.644
+    },
+    "gaudi3": {
+      "throughput": 32.644
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_bf16_1x[deepseek-ai/DeepSeek-V2-Lite-1-False-False]": {
     "gaudi2": {
       "throughput": 35
+    },
+    "gaudi3": {
+      "throughput": 35
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_bf16_1x[facebook/xglm-1.7B-1-False-False]": {
     "gaudi2": {
       "throughput": 357.46365062825083
+    },
+    "gaudi3": {
+      "throughput": 357.46365062825083
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_bf16_1x[google/gemma-2-27b-1-False-True]": {
     "gaudi2": {
       "output": "DeepSpeed is a machine learning framework that enables you to train models with trillions of parameters and beyond, using model parallelism to partition large models over multiple GPUs.\n\nThe following is a brief introduction to the DeepSpeed model parallel training.\n\n<h2>1. Introduction</h2>\n\nThe DeepSpeed model parallel training is a simple and effective way to train large models. It is a framework that enables you to train models with trillions of parameters and beyond.\n\nDeepSpeed is a distributed deep learning optimization toolkit that makes it easy and efficient",
       "throughput": 36.578709544111
+    },
+    "gaudi3": {
+      "output": "DeepSpeed is a machine learning framework that enables you to train models with trillions of parameters and beyond, using model parallelism to partition large models over multiple GPUs.\n\nThe following is a brief introduction to the DeepSpeed model parallel training.\n\n<h2>1. Introduction</h2>\n\nThe DeepSpeed model parallel training is a simple and effective way to train large models. It is a framework that enables you to train models with trillions of parameters and beyond.\n\nDeepSpeed is a distributed deep learning optimization toolkit that makes it easy and efficient",
+      "throughput": 36.578709544111
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_bf16_1x[google/gemma-2-9b-1-False-True]": {
     "gaudi2": {
       "output": "DeepSpeed is a machine learning framework that enables training of large-scale deep learning models on a single GPU or across multiple GPUs. It is designed to be easy to use and highly scalable, making it a powerful tool for researchers and practitioners working with large-scale deep learning models.\n\nDeepSpeed is built on top of PyTorch, a popular deep learning framework, and provides a set of tools and libraries that make it easy to train large-scale models. It includes features such as zero-shot inference, which allows models to be",
       "throughput": 92.302359446567
+    },
+    "gaudi3": {
+      "output": "DeepSpeed is a machine learning framework that enables training of large-scale deep learning models on a single GPU or across multiple GPUs. It is designed to be easy to use and highly scalable, making it a powerful tool for researchers and practitioners working with large-scale deep learning models.\n\nDeepSpeed is built on top of PyTorch, a popular deep learning framework, and provides a set of tools and libraries that make it easy to train large-scale models. It includes features such as zero-shot inference, which allows models to be",
+      "throughput": 92.302359446567
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_bf16_1x[google/gemma-7b-1-False-False]": {
@@ -169,6 +249,10 @@
     "gaudi2": {
       "output": "DeepSpeed is a machine learning framework that enables training of large-scale models on commodity hardware. It is designed to be a drop-in replacement for PyTorch, and it is compatible with the existing PyTorch ecosystem. DeepSpeed is designed to be easy to use, and it provides a number of features that make it easy to train large-scale models. DeepSpeed is designed to be scalable, and it can be used to train models on a single machine or on a cluster of machines. DeepSpeed is designed to be efficient,",
       "throughput": 109.70751574382221
+    },
+    "gaudi3": {
+      "output": "DeepSpeed is a machine learning framework that enables training of large-scale models on commodity hardware. It is designed to be a drop-in replacement for PyTorch, and it is compatible with the existing PyTorch ecosystem. DeepSpeed is designed to be easy to use, and it provides a number of features that make it easy to train large-scale models. DeepSpeed is designed to be scalable, and it can be used to train models on a single machine or on a cluster of machines. DeepSpeed is designed to be efficient,",
+      "throughput": 109.70751574382221
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_bf16_1x[gpt2-xl-1-False-False]": {
@@ -177,6 +261,9 @@
     },
     "gaudi2": {
       "throughput": 281.8734689674413
+    },
+    "gaudi3": {
+      "throughput": 281.8734689674413
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_bf16_1x[meta-llama/Llama-2-7b-hf-1-True-False]": {
@@ -188,21 +275,34 @@
     "gaudi2": {
       "output": "DeepSpeed is a machine learning framework for deep learning. It is designed to be fast and efficient, while also being easy to use. DeepSpeed is based on the TensorFlow framework, and it uses the TensorFlow library to perform computations.\nDeepSpeed is a deep learning framework that is designed to be fast and efficient. It is based on the TensorFlow library and uses the TensorFlow library to perform computations. DeepSpeed is designed to be easy to use and to provide a high level of flex",
       "throughput": 141.25776956002076
+    },
+    "gaudi3": {
+      "output": "DeepSpeed is a machine learning framework for deep learning. It is designed to be fast and efficient, while also being easy to use. DeepSpeed is based on the TensorFlow framework, and it uses the TensorFlow library to perform computations.\nDeepSpeed is a deep learning framework that is designed to be fast and efficient. It is based on the TensorFlow library and uses the TensorFlow library to perform computations. DeepSpeed is designed to be easy to use and to provide a high level of flex",
+      "throughput": 141.25776956002076
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_bf16_1x[meta-llama/Llama-2-7b-hf-512-False-False]": {
     "gaudi2": {
       "throughput": 8711
+    },
+    "gaudi3": {
+      "throughput": 8711
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_bf16_1x[meta-llama/Llama-2-7b-hf-512-True-False]": {
     "gaudi2": {
       "throughput": 12808
+    },
+    "gaudi3": {
+      "throughput": 12808
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_bf16_1x[meta-llama/Meta-Llama-3-8B-1-True-False]": {
     "gaudi2": {
       "throughput": 129
+    },
+    "gaudi3": {
+      "throughput": 129
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_bf16_1x[microsoft/phi-2-1-False-False]": {
@@ -211,6 +311,9 @@
     },
     "gaudi2": {
       "throughput": 224.72307766211117
+    },
+    "gaudi3": {
+      "throughput": 224.72307766211117
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_bf16_1x[mistralai/Mistral-7B-v0.1-1-True-False]": {
@@ -222,17 +325,28 @@
     "gaudi2": {
       "output": "DeepSpeed is a machine learning framework that accelerates training of large models on a single machine or distributed systems. It is designed to be compatible with PyTorch and TensorFlow, and can be used to train models on a single machine or on a distributed system.\n\nDeepSpeed is a machine learning framework that accelerates training of large models on a single machine or distributed systems. It is designed to be compatible with PyTorch and TensorFlow, and can be used to train models on a single machine or on a distributed system",
       "throughput": 130.2172236767782
+    },
+    "gaudi3": {
+      "output": "DeepSpeed is a machine learning framework that accelerates training of large models on a single machine or distributed systems. It is designed to be compatible with PyTorch and TensorFlow, and can be used to train models on a single machine or on a distributed system.\n\nDeepSpeed is a machine learning framework that accelerates training of large models on a single machine or distributed systems. It is designed to be compatible with PyTorch and TensorFlow, and can be used to train models on a single machine or on a distributed system",
+      "throughput": 130.2172236767782
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_bf16_1x[mistralai/Mixtral-8x7B-v0.1-1-False-True]": {
     "gaudi2": {
       "output": "DeepSpeed is a machine learning framework that enables training of large models on a single machine with a single GPU. It is designed to be easy to use and efficient, and it can be used to train models on a variety of tasks.\n\n## Introduction\n\nDeepSpeed is a machine learning framework that enables training of large models on a single machine with a single GPU. It is designed to be easy to use and efficient, and it can be used to train models on a variety of tasks.\n\n## What is DeepSpeed",
       "throughput": 23.7931001677926
+    },
+    "gaudi3": {
+      "output": "DeepSpeed is a machine learning framework that enables training of large models on a single machine with a single GPU. It is designed to be easy to use and efficient, and it can be used to train models on a variety of tasks.\n\n## Introduction\n\nDeepSpeed is a machine learning framework that enables training of large models on a single machine with a single GPU. It is designed to be easy to use and efficient, and it can be used to train models on a variety of tasks.\n\n## What is DeepSpeed",
+      "throughput": 23.7931001677926
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_bf16_1x[mosaicml/mpt-30b-1-False-False]": {
     "gaudi2": {
       "throughput": 36.06464336116623
+    },
+    "gaudi3": {
+      "throughput": 36.06464336116623
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_bf16_1x[mosaicml/mpt-7b-1-False-False]": {
@@ -243,6 +357,9 @@
   "tests/test_text_generation_example.py::test_text_generation_bf16_1x[openbmb/MiniCPM3-4B-1-False-False]": {
     "gaudi2": {
       "throughput": 65.116
+    },
+    "gaudi3": {
+      "throughput": 65.116
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_bf16_1x[stabilityai/stablelm-2-12b-1-False-False]": {
@@ -251,11 +368,17 @@
     },
     "gaudi2": {
       "throughput": 74.8904496532218
+    },
+    "gaudi3": {
+      "throughput": 74.8904496532218
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_bf16_1x[state-spaces/mamba-130m-hf-1536-False-False]": {
     "gaudi2": {
       "throughput": 5385.511100161605
+    },
+    "gaudi3": {
+      "throughput": 5385.511100161605
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_bf16_1x[state-spaces/mamba-130m-hf-224-False-False]": {
@@ -266,6 +389,9 @@
   "tests/test_text_generation_example.py::test_text_generation_bf16_1x[tiiuae/falcon-40b-1-True-False]": {
     "gaudi2": {
       "throughput": 25.202450111088346
+    },
+    "gaudi3": {
+      "throughput": 25.202450111088346
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_bf16_1x[tiiuae/falcon-7b-1-True-False]": {
@@ -276,6 +402,9 @@
   "tests/test_text_generation_example.py::test_text_generation_bf16_1x[tiiuae/falcon-mamba-7b-1-False-False]": {
     "gaudi2": {
       "throughput": 47.1464839567739
+    },
+    "gaudi3": {
+      "throughput": 47.1464839567739
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_contrastive_search[gpt2-xl-1-False]": {
@@ -284,11 +413,17 @@
     },
     "gaudi2": {
       "throughput": 51.61471298016438
+    },
+    "gaudi3": {
+      "throughput": 51.61471298016438
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_deepspeed[Qwen/Qwen2.5-72B-2-1]": {
     "gaudi2": {
       "throughput": 26
+    },
+    "gaudi3": {
+      "throughput": 26
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_deepspeed[bigscience/bloomz-7b1-8-1]": {
@@ -299,146 +434,233 @@
   "tests/test_text_generation_example.py::test_text_generation_deepspeed[bigscience/bloomz-8-1]": {
     "gaudi2": {
       "throughput": 36.77314954096159
+    },
+    "gaudi3": {
+      "throughput": 36.77314954096159
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_deepspeed[facebook/opt-66b-2-1]": {
     "gaudi2": {
       "throughput": 28.48069266504111
+    },
+    "gaudi3": {
+      "throughput": 28.48069266504111
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_deepspeed[google/gemma-2-27b-8-1]": {
     "gaudi2": {
       "throughput": 87.578709544111
+    },
+    "gaudi3": {
+      "throughput": 87.578709544111
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_deepspeed[google/gemma-2-9b-8-1]": {
     "gaudi2": {
       "throughput": 110.12610917383735
+    },
+    "gaudi3": {
+      "throughput": 110.12610917383735
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_deepspeed[meta-llama/Llama-2-70b-hf-8-1]": {
     "gaudi2": {
       "throughput": 64.10514998902435
+    },
+    "gaudi3": {
+      "throughput": 64.10514998902435
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_deepspeed[meta-llama/Meta-Llama-3-70B-Instruct-8-1]": {
     "gaudi2": {
       "throughput": 64
+    },
+    "gaudi3": {
+      "throughput": 64
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_distributed_tp[meta-llama/Llama-2-7b-hf]": {
     "gaudi2": {
       "throughput": 1345.2369318328463
+    },
+    "gaudi3": {
+      "throughput": 1345.2369318328463
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_fp8[meta-llama/Llama-2-70b-hf-4-207-False-2048-128]": {
     "gaudi2": {
       "throughput": 568.5
+    },
+    "gaudi3": {
+      "throughput": 568.5
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_fp8[meta-llama/Llama-2-70b-hf-4-3042-False-128-128]": {
     "gaudi2": {
       "throughput": 5374.6
+    },
+    "gaudi3": {
+      "throughput": 5374.6
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_fp8[meta-llama/Llama-2-70b-hf-4-750-False-128-2048]": {
     "gaudi2": {
       "throughput": 7422.4
+    },
+    "gaudi3": {
+      "throughput": 7422.4
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_fp8[meta-llama/Llama-2-70b-hf-8-172-False-2048-2048]": {
     "gaudi2": {
       "throughput": 4656.2
+    },
+    "gaudi3": {
+      "throughput": 4656.2
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_fp8[meta-llama/Llama-2-7b-hf-1-1230-False-128-128]": {
     "gaudi2": {
       "throughput": 13152.7
+    },
+    "gaudi3": {
+      "throughput": 13152.7
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_fp8[meta-llama/Llama-2-7b-hf-1-163-False-128-2048]": {
     "gaudi2": {
       "throughput": 4774.7
+    },
+    "gaudi3": {
+      "throughput": 4774.7
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_fp8[meta-llama/Llama-2-7b-hf-1-81-False-2048-2048]": {
     "gaudi2": {
       "throughput": 1942.9
+    },
+    "gaudi3": {
+      "throughput": 1942.9
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_fp8[meta-llama/Llama-2-7b-hf-1-94-False-2048-128]": {
     "gaudi2": {
       "throughput": 1293.3
+    },
+    "gaudi3": {
+      "throughput": 1293.3
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_fp8[microsoft/phi-2-1-1-True-128-128]": {
     "gaudi2": {
       "throughput": 254.08932787178165
+    },
+    "gaudi3": {
+      "throughput": 254.08932787178165
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_fp8[mistralai/Mistral-7B-Instruct-v0.2-1-120-True-128-2048]": {
     "gaudi2": {
       "throughput": 6979.225194247115
+    },
+    "gaudi3": {
+      "throughput": 6979.225194247115
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_fp8[mistralai/Mistral-7B-Instruct-v0.2-1-120-True-2048-128]": {
     "gaudi2": {
       "throughput": 1681.4401450088983
+    },
+    "gaudi3": {
+      "throughput": 1681.4401450088983
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_fp8[mistralai/Mistral-7B-Instruct-v0.2-1-44-True-2048-2048]": {
     "gaudi2": {
       "throughput": 3393.149396451692
+    },
+    "gaudi3": {
+      "throughput": 3393.149396451692
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_fp8[mistralai/Mistral-7B-Instruct-v0.2-1-896-True-128-128]": {
     "gaudi2": {
       "throughput": 17068.965283763682
+    },
+    "gaudi3": {
+      "throughput": 17068.965283763682
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_fp8[mistralai/Mixtral-8x7B-v0.1-1-1-True-128-128]": {
     "gaudi2": {
       "throughput": 40.94
+    },
+    "gaudi3": {
+      "throughput": 40.94
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_fp8[mistralai/Mixtral-8x7B-v0.1-2-48-True-2048-2048]": {
     "gaudi2": {
       "throughput": 1147.5
+    },
+    "gaudi3": {
+      "throughput": 1147.5
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_fp8[mistralai/Mixtral-8x7B-v0.1-2-768-True-128-128]": {
     "gaudi2": {
       "throughput": 3428.65
+    },
+    "gaudi3": {
+      "throughput": 3428.65
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_fp8[mistralai/Mixtral-8x7B-v0.1-2-96-True-128-2048]": {
     "gaudi2": {
       "throughput": 2570.34
+    },
+    "gaudi3": {
+      "throughput": 2570.34
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_fp8[mistralai/Mixtral-8x7B-v0.1-2-96-True-2048-128]": {
     "gaudi2": {
       "throughput": 379.03
+    },
+    "gaudi3": {
+      "throughput": 379.03
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_fp8[tiiuae/falcon-180B-4-950-True-128-128]": {
     "gaudi2": {
       "throughput": 2506.68
+    },
+    "gaudi3": {
+      "throughput": 2506.68
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_gptq[TheBloke/Llama-2-7b-Chat-GPTQ-1-10-False-128-2048]": {
     "gaudi2": {
       "throughput": 456.7
+    },
+    "gaudi3": {
+      "throughput": 456.7
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_torch_compile[meta-llama/Llama-2-7b-hf]": {
     "gaudi2": {
       "throughput": 102.27823420713148
+    },
+    "gaudi3": {
+      "throughput": 102.27823420713148
     }
   },
   "tests/test_text_generation_example.py::test_text_generation_torch_compile_distributed[meta-llama/Llama-2-7b-hf]": {
     "gaudi2": {
       "throughput": 39.72973199515235
+    },
+    "gaudi3": {
+      "throughput": 39.72973199515235
     }
   }
 }
\ No newline at end of file
diff --git a/tests/baselines/fixture/tests/test_video_llava.json b/tests/baselines/fixture/tests/test_video_llava.json
index a37db23bd6..90146af1f5 100644
--- a/tests/baselines/fixture/tests/test_video_llava.json
+++ b/tests/baselines/fixture/tests/test_video_llava.json
@@ -5,6 +5,9 @@
     },
     "gaudi2": {
       "throughput": 27.72902536827787
+    },
+    "gaudi3": {
+      "throughput": 27.72902536827787
     }
   }
 }
\ No newline at end of file
diff --git a/tests/baselines/fixture/tests/test_video_mae.json b/tests/baselines/fixture/tests/test_video_mae.json
index 8388c9ff80..481c431a19 100644
--- a/tests/baselines/fixture/tests/test_video_mae.json
+++ b/tests/baselines/fixture/tests/test_video_mae.json
@@ -5,6 +5,9 @@
     },
     "gaudi2": {
       "latency": 17.544198036193848
+    },
+    "gaudi3": {
+      "latency": 17.544198036193848
     }
   }
 }
\ No newline at end of file
diff --git a/tests/baselines/fixture/tests/test_zero_shot_object_detection.json b/tests/baselines/fixture/tests/test_zero_shot_object_detection.json
index a98d4e6556..ec3779fbc4 100644
--- a/tests/baselines/fixture/tests/test_zero_shot_object_detection.json
+++ b/tests/baselines/fixture/tests/test_zero_shot_object_detection.json
@@ -5,6 +5,9 @@
     },
     "gaudi2": {
       "latency": 4.213955687819833
+    },
+    "gaudi3": {
+      "latency": 4.213955687819833
     }
   }
 }
\ No newline at end of file
diff --git a/tests/configs/examples/CodeLlama_13b_Instruct_hf.json b/tests/configs/examples/CodeLlama_13b_Instruct_hf.json
index d2c2aa86f9..576171fb1e 100644
--- a/tests/configs/examples/CodeLlama_13b_Instruct_hf.json
+++ b/tests/configs/examples/CodeLlama_13b_Instruct_hf.json
@@ -5,9 +5,36 @@
             "eval_batch_size": 48,
             "distribution": {
                 "deepspeed": {
-                    "learning_rate": 5e-5,
+                    "learning_rate": 5e-05,
                     "train_batch_size": 48,
-                    "metrics": ["perplexity", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--dataset_config_name wikitext-2-raw-v1",
+                        "--gradient_checkpointing",
+                        "--use_hpu_graphs_for_inference",
+                        "--deepspeed tests/configs/deepspeed_zero_1.json"
+                    ]
+                }
+            }
+        }
+    },
+    "gaudi3": {
+        "wikitext": {
+            "num_train_epochs": 1,
+            "eval_batch_size": 48,
+            "distribution": {
+                "deepspeed": {
+                    "learning_rate": 5e-05,
+                    "train_batch_size": 48,
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--dataset_config_name wikitext-2-raw-v1",
                         "--gradient_checkpointing",
@@ -18,4 +45,4 @@
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/tests/configs/examples/LlamaGuard_7b.json b/tests/configs/examples/LlamaGuard_7b.json
index 704fe64c73..7b0a4e122d 100644
--- a/tests/configs/examples/LlamaGuard_7b.json
+++ b/tests/configs/examples/LlamaGuard_7b.json
@@ -5,9 +5,36 @@
             "eval_batch_size": 8,
             "distribution": {
                 "deepspeed": {
-                    "learning_rate": 3e-5,
+                    "learning_rate": 3e-05,
                     "train_batch_size": 32,
-                    "metrics": ["eval_f1", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--max_seq_length 128",
+                        "--add_pad_token True",
+                        "--use_hpu_graphs_for_inference",
+                        "--deepspeed tests/configs/deepspeed_zero_2.json"
+                    ]
+                }
+            }
+        }
+    },
+    "gaudi3": {
+        "mrpc": {
+            "num_train_epochs": 3,
+            "eval_batch_size": 8,
+            "distribution": {
+                "deepspeed": {
+                    "learning_rate": 3e-05,
+                    "train_batch_size": 32,
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--max_seq_length 128",
                         "--add_pad_token True",
@@ -18,4 +45,4 @@
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/tests/configs/examples/Llama_3_1_8B.json b/tests/configs/examples/Llama_3_1_8B.json
index 4c57db9a6b..3e9edaaeb1 100644
--- a/tests/configs/examples/Llama_3_1_8B.json
+++ b/tests/configs/examples/Llama_3_1_8B.json
@@ -5,9 +5,48 @@
             "eval_batch_size": 1,
             "distribution": {
                 "single_card": {
-                    "learning_rate": 3e-4,
+                    "learning_rate": 0.0003,
                     "train_batch_size": 10,
-                    "metrics": ["perplexity", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--bf16",
+                        "--gradient_checkpointing",
+                        "--eval_strategy epoch",
+                        "--eval_delay 2",
+                        "--save_strategy no",
+                        "--warmup_ratio 0.03",
+                        "--lr_scheduler_type cosine",
+                        "--logging_steps 1",
+                        "--lora_rank 4",
+                        "--lora_target_modules q_proj v_proj",
+                        "--dataset_concatenation",
+                        "--max_seq_length 512",
+                        "--validation_split_percentage 10",
+                        "--attn_softmax_bf16",
+                        "--use_flash_attention True",
+                        "--flash_attention_causal_mask True"
+                    ]
+                }
+            }
+        }
+    },
+    "gaudi3": {
+        "tatsu-lab/alpaca": {
+            "num_train_epochs": 2,
+            "eval_batch_size": 1,
+            "distribution": {
+                "single_card": {
+                    "learning_rate": 0.0003,
+                    "train_batch_size": 10,
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--bf16",
                         "--gradient_checkpointing",
@@ -30,4 +69,4 @@
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/tests/configs/examples/Llama_3_2_11B_Vision_Instruct.json b/tests/configs/examples/Llama_3_2_11B_Vision_Instruct.json
index fd8abaccfc..b378a77213 100644
--- a/tests/configs/examples/Llama_3_2_11B_Vision_Instruct.json
+++ b/tests/configs/examples/Llama_3_2_11B_Vision_Instruct.json
@@ -5,9 +5,51 @@
             "eval_batch_size": 4,
             "distribution": {
                 "multi_card": {
-                    "learning_rate": 5e-5,
+                    "learning_rate": 5e-05,
                     "train_batch_size": 2,
-                     "metrics": ["eval_accuracy", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_accuracy",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--bf16",
+                        "--gradient_accumulation_steps 8",
+                        "--eval_strategy no",
+                        "--save_strategy no",
+                        "--warmup_steps 50",
+                        "--lr_scheduler_type constant",
+                        "--max_grad_norm 0.3",
+                        "--logging_steps 1",
+                        "--use_hpu_graphs_for_inference",
+                        "--lora_rank 8",
+                        "--lora_alpha 8",
+                        "--lora_dropout 0.1",
+                        "--lora_target_modules '.*(language_model).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$'",
+                        "--low_cpu_mem_usage True",
+                        "--adam_epsilon 1e-08",
+                        "--input_column_name image query",
+                        "--output_column_name answers",
+                        "--remove_unused_columns False",
+                        "--max_seq_length 512"
+                    ]
+                }
+            }
+        }
+    },
+    "gaudi3": {
+        "image2text_lora_finetune": {
+            "num_train_epochs": 1,
+            "eval_batch_size": 4,
+            "distribution": {
+                "multi_card": {
+                    "learning_rate": 5e-05,
+                    "train_batch_size": 2,
+                    "metrics": [
+                        "eval_accuracy",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--bf16",
                         "--gradient_accumulation_steps 8",
@@ -33,4 +75,4 @@
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/tests/configs/examples/Qwen2_72B.json b/tests/configs/examples/Qwen2_72B.json
index 848bb0238d..9ac52560aa 100644
--- a/tests/configs/examples/Qwen2_72B.json
+++ b/tests/configs/examples/Qwen2_72B.json
@@ -5,9 +5,60 @@
             "eval_batch_size": 8,
             "distribution": {
                 "deepspeed": {
-                    "learning_rate": 3e-4,
+                    "learning_rate": 0.0003,
                     "train_batch_size": 8,
-                    "metrics": ["perplexity", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--bf16 True",
+                        "--subset None",
+                        "--streaming False",
+                        "--packing False",
+                        "--num_buckets 8",
+                        "--gradient_accumulation_steps 8",
+                        "--gradient_checkpointing True",
+                        "--eval_strategy no",
+                        "--save_strategy no",
+                        "--throughput_warmup_steps 3",
+                        "--learning_rate 3e-4",
+                        "--warmup_ratio  0.03",
+                        "--lr_scheduler_type cosine",
+                        "--max_grad_norm  0.3",
+                        "--logging_steps 1",
+                        "--adam_epsilon 1e-8",
+                        "--use_peft True",
+                        "--lora_r 4",
+                        "--lora_alpha 16",
+                        "--lora_dropout 0.05",
+                        "--lora_target_modules q_proj v_proj k_proj o_proj",
+                        "--max_seq_length 512",
+                        "--weight_decay 0.05",
+                        "--report_to none",
+                        "--max_steps 10",
+                        "--gradient_checkpointing True",
+                        "--pipelining_fwd_bwd True",
+                        "--deepspeed tests/configs/deepspeed_zero_3_gaudi1.json"
+                    ]
+                }
+            }
+        }
+    },
+    "gaudi3": {
+        "trl-sft-qwen": {
+            "num_train_epochs": 1,
+            "eval_batch_size": 8,
+            "distribution": {
+                "deepspeed": {
+                    "learning_rate": 0.0003,
+                    "train_batch_size": 8,
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--bf16 True",
                         "--subset None",
@@ -42,4 +93,4 @@
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/tests/configs/examples/Qwen2_7B.json b/tests/configs/examples/Qwen2_7B.json
index 23b4ea048a..56a74e084f 100644
--- a/tests/configs/examples/Qwen2_7B.json
+++ b/tests/configs/examples/Qwen2_7B.json
@@ -5,9 +5,12 @@
             "eval_batch_size": 32,
             "distribution": {
                 "multi_card": {
-                    "learning_rate": 3e-4,
+                    "learning_rate": 0.0003,
                     "train_batch_size": 32,
-                    "metrics": ["train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--bf16 True",
                         "--subset ''",
@@ -41,9 +44,88 @@
             "eval_batch_size": 2,
             "distribution": {
                 "multi_card": {
-                    "learning_rate": 3e-4,
+                    "learning_rate": 0.0003,
                     "train_batch_size": 2,
-                    "metrics": ["train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--bf16 True",
+                        "--subset ''",
+                        "--streaming False",
+                        "--packing True",
+                        "--gradient_accumulation_steps 8",
+                        "--gradient_checkpointing True",
+                        "--eval_strategy no",
+                        "--save_strategy no",
+                        "--throughput_warmup_steps 5",
+                        "--warmup_ratio  0.03",
+                        "--lr_scheduler_type cosine",
+                        "--max_grad_norm  0.3",
+                        "--logging_steps 1",
+                        "--adam_epsilon 3e-4",
+                        "--use_peft False",
+                        "--max_seq_length 4096",
+                        "--report_to none",
+                        "--use_flash_attention True",
+                        "--max_steps 20"
+                    ]
+                }
+            }
+        }
+    },
+    "gaudi3": {
+        "trl-sft-chat-peft": {
+            "num_train_epochs": 1,
+            "eval_batch_size": 32,
+            "distribution": {
+                "multi_card": {
+                    "learning_rate": 0.0003,
+                    "train_batch_size": 32,
+                    "metrics": [
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--bf16 True",
+                        "--subset ''",
+                        "--streaming False",
+                        "--packing True",
+                        "--gradient_accumulation_steps 8",
+                        "--gradient_checkpointing True",
+                        "--eval_strategy no",
+                        "--save_strategy no",
+                        "--throughput_warmup_steps 5",
+                        "--warmup_ratio  0.03",
+                        "--lr_scheduler_type cosine",
+                        "--max_grad_norm  0.3",
+                        "--logging_steps 1",
+                        "--adam_epsilon 3e-4",
+                        "--use_peft True",
+                        "--lora_r 4",
+                        "--lora_alpha 16",
+                        "--lora_dropout 0.05",
+                        "--lora_target_modules q_proj v_proj k_proj o_proj",
+                        "--max_seq_length 512",
+                        "--weight_decay 0.05",
+                        "--report_to none",
+                        "--max_steps 20"
+                    ]
+                }
+            }
+        },
+        "trl-sft-chat": {
+            "num_train_epochs": 1,
+            "eval_batch_size": 2,
+            "distribution": {
+                "multi_card": {
+                    "learning_rate": 0.0003,
+                    "train_batch_size": 2,
+                    "metrics": [
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--bf16 True",
                         "--subset ''",
@@ -69,4 +151,4 @@
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/tests/configs/examples/albert_large_v2.json b/tests/configs/examples/albert_large_v2.json
index 59648caf3e..74dc607903 100644
--- a/tests/configs/examples/albert_large_v2.json
+++ b/tests/configs/examples/albert_large_v2.json
@@ -5,18 +5,26 @@
             "eval_batch_size": 4,
             "distribution": {
                 "single_card": {
-                    "learning_rate": 6e-5,
+                    "learning_rate": 6e-05,
                     "train_batch_size": 32,
-                    "metrics": ["eval_f1", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--max_seq_length 384",
                         "--use_hpu_graphs_for_inference"
                     ]
                 },
                 "multi_card": {
-                    "learning_rate": 6e-5,
+                    "learning_rate": 6e-05,
                     "train_batch_size": 32,
-                    "metrics": ["eval_f1", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--max_seq_length 384",
                         "--use_hpu_graphs_for_inference"
@@ -31,18 +39,60 @@
             "eval_batch_size": 4,
             "distribution": {
                 "single_card": {
-                    "learning_rate": 6e-5,
+                    "learning_rate": 6e-05,
                     "train_batch_size": 128,
-                    "metrics": ["eval_f1", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--max_seq_length 384",
                         "--use_hpu_graphs_for_inference"
                     ]
                 },
                 "multi_card": {
-                    "learning_rate": 7e-5,
+                    "learning_rate": 7e-05,
                     "train_batch_size": 128,
-                    "metrics": ["eval_f1", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--max_seq_length 384",
+                        "--use_hpu_graphs_for_inference"
+                    ]
+                }
+            }
+        }
+    },
+    "gaudi3": {
+        "squad": {
+            "num_train_epochs": 2,
+            "eval_batch_size": 4,
+            "distribution": {
+                "single_card": {
+                    "learning_rate": 6e-05,
+                    "train_batch_size": 128,
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--max_seq_length 384",
+                        "--use_hpu_graphs_for_inference"
+                    ]
+                },
+                "multi_card": {
+                    "learning_rate": 7e-05,
+                    "train_batch_size": 128,
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--max_seq_length 384",
                         "--use_hpu_graphs_for_inference"
@@ -51,4 +101,4 @@
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/tests/configs/examples/albert_xxlarge_v1.json b/tests/configs/examples/albert_xxlarge_v1.json
index ebda527c92..9b8e04472c 100644
--- a/tests/configs/examples/albert_xxlarge_v1.json
+++ b/tests/configs/examples/albert_xxlarge_v1.json
@@ -5,18 +5,26 @@
             "eval_batch_size": 2,
             "distribution": {
                 "single_card": {
-                    "learning_rate": 1e-5,
+                    "learning_rate": 1e-05,
                     "train_batch_size": 12,
-                    "metrics": ["eval_f1", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--max_seq_length 384",
                         "--use_hpu_graphs_for_inference"
                     ]
                 },
                 "multi_card": {
-                    "learning_rate": 5e-5,
+                    "learning_rate": 5e-05,
                     "train_batch_size": 12,
-                    "metrics": ["eval_f1", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--max_seq_length 384",
                         "--use_hpu_graphs_for_inference"
@@ -31,18 +39,60 @@
             "eval_batch_size": 2,
             "distribution": {
                 "single_card": {
-                    "learning_rate": 2e-5,
+                    "learning_rate": 2e-05,
                     "train_batch_size": 16,
-                    "metrics": ["eval_f1", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--max_seq_length 384",
                         "--use_hpu_graphs_for_inference"
                     ]
                 },
                 "multi_card": {
-                    "learning_rate": 7e-5,
+                    "learning_rate": 7e-05,
                     "train_batch_size": 16,
-                    "metrics": ["eval_f1", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--max_seq_length 384",
+                        "--use_hpu_graphs_for_inference"
+                    ]
+                }
+            }
+        }
+    },
+    "gaudi3": {
+        "squad": {
+            "num_train_epochs": 1,
+            "eval_batch_size": 2,
+            "distribution": {
+                "single_card": {
+                    "learning_rate": 2e-05,
+                    "train_batch_size": 16,
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--max_seq_length 384",
+                        "--use_hpu_graphs_for_inference"
+                    ]
+                },
+                "multi_card": {
+                    "learning_rate": 7e-05,
+                    "train_batch_size": 16,
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--max_seq_length 384",
                         "--use_hpu_graphs_for_inference"
@@ -51,4 +101,4 @@
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/tests/configs/examples/ast_finetuned_speech_commands_v2.json b/tests/configs/examples/ast_finetuned_speech_commands_v2.json
index b9c347222b..204d122a09 100644
--- a/tests/configs/examples/ast_finetuned_speech_commands_v2.json
+++ b/tests/configs/examples/ast_finetuned_speech_commands_v2.json
@@ -5,9 +5,46 @@
             "eval_batch_size": 64,
             "distribution": {
                 "multi_card": {
-                    "learning_rate": 5e-4,
+                    "learning_rate": 0.0005,
                     "train_batch_size": 32,
-                    "metrics": ["eval_accuracy", "train_runtime", "train_samples_per_second", "eval_samples_per_second"],
+                    "metrics": [
+                        "eval_accuracy",
+                        "train_runtime",
+                        "train_samples_per_second",
+                        "eval_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--audio_column_name audio",
+                        "--label_column_name language",
+                        "--remove_unused_columns False",
+                        "--max_length_seconds 8",
+                        "--attention_mask False",
+                        "--warmup_ratio 0.1",
+                        "--seed 0",
+                        "--dataloader_num_workers 1",
+                        "--ignore_mismatched_sizes=True",
+                        "--use_hpu_graphs_for_training",
+                        "--use_hpu_graphs_for_inference",
+                        "--trust_remote_code True"
+                    ]
+                }
+            }
+        }
+    },
+    "gaudi3": {
+        "common_language": {
+            "num_train_epochs": 10,
+            "eval_batch_size": 64,
+            "distribution": {
+                "multi_card": {
+                    "learning_rate": 0.0005,
+                    "train_batch_size": 32,
+                    "metrics": [
+                        "eval_accuracy",
+                        "train_runtime",
+                        "train_samples_per_second",
+                        "eval_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--audio_column_name audio",
                         "--label_column_name language",
@@ -26,4 +63,4 @@
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/tests/configs/examples/bert_base_uncased.json b/tests/configs/examples/bert_base_uncased.json
index 1960b5272a..c92b010e02 100644
--- a/tests/configs/examples/bert_base_uncased.json
+++ b/tests/configs/examples/bert_base_uncased.json
@@ -4,18 +4,26 @@
         "eval_batch_size": 8,
         "distribution": {
             "single_card": {
-                "learning_rate": 5e-5,
+                "learning_rate": 5e-05,
                 "train_batch_size": 24,
-                "metrics": ["eval_f1", "train_runtime", "train_samples_per_second"],
+                "metrics": [
+                    "eval_f1",
+                    "train_runtime",
+                    "train_samples_per_second"
+                ],
                 "extra_arguments": [
                     "--max_seq_length 384",
                     "--use_hpu_graphs_for_inference"
                 ]
             },
             "multi_card": {
-                "learning_rate": 2e-4,
+                "learning_rate": 0.0002,
                 "train_batch_size": 24,
-                "metrics": ["eval_f1", "train_runtime", "train_samples_per_second"],
+                "metrics": [
+                    "eval_f1",
+                    "train_runtime",
+                    "train_samples_per_second"
+                ],
                 "extra_arguments": [
                     "--max_seq_length 384",
                     "--use_hpu_graphs_for_inference"
@@ -28,18 +36,26 @@
         "eval_batch_size": 8,
         "distribution": {
             "single_card": {
-                "learning_rate": 6e-5,
+                "learning_rate": 6e-05,
                 "train_batch_size": 64,
-                "metrics": ["eval_f1", "train_runtime", "train_samples_per_second"],
+                "metrics": [
+                    "eval_f1",
+                    "train_runtime",
+                    "train_samples_per_second"
+                ],
                 "extra_arguments": [
                     "--max_seq_length 128",
                     "--use_hpu_graphs_for_inference"
                 ]
             },
             "multi_card": {
-                "learning_rate": 5e-4,
+                "learning_rate": 0.0005,
                 "train_batch_size": 64,
-                "metrics": ["eval_f1", "train_runtime", "train_samples_per_second"],
+                "metrics": [
+                    "eval_f1",
+                    "train_runtime",
+                    "train_samples_per_second"
+                ],
                 "extra_arguments": [
                     "--max_seq_length 128",
                     "--use_hpu_graphs_for_inference"
@@ -47,4 +63,4 @@
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/tests/configs/examples/bert_large_uncased_whole_word_masking.json b/tests/configs/examples/bert_large_uncased_whole_word_masking.json
index f3a0d79692..e3ed43e39d 100755
--- a/tests/configs/examples/bert_large_uncased_whole_word_masking.json
+++ b/tests/configs/examples/bert_large_uncased_whole_word_masking.json
@@ -5,18 +5,26 @@
             "eval_batch_size": 8,
             "distribution": {
                 "single_card": {
-                    "learning_rate": 3e-5,
+                    "learning_rate": 3e-05,
                     "train_batch_size": 24,
-                    "metrics": ["eval_f1", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--max_seq_length 384",
                         "--use_hpu_graphs_for_inference"
                     ]
                 },
                 "multi_card": {
-                    "learning_rate": 7e-5,
+                    "learning_rate": 7e-05,
                     "train_batch_size": 24,
-                    "metrics": ["eval_f1", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--max_seq_length 384",
                         "--use_hpu_graphs_for_inference"
@@ -29,18 +37,26 @@
             "eval_batch_size": 8,
             "distribution": {
                 "single_card": {
-                    "learning_rate": 3e-5,
+                    "learning_rate": 3e-05,
                     "train_batch_size": 32,
-                    "metrics": ["eval_f1", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--max_seq_length 128",
                         "--use_hpu_graphs_for_inference"
                     ]
                 },
                 "multi_card": {
-                    "learning_rate": 3e-5,
+                    "learning_rate": 3e-05,
                     "train_batch_size": 16,
-                    "metrics": ["eval_f1", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--max_seq_length 128",
                         "--use_hpu_graphs_for_inference"
@@ -55,18 +71,26 @@
             "eval_batch_size": 8,
             "distribution": {
                 "single_card": {
-                    "learning_rate": 3e-5,
+                    "learning_rate": 3e-05,
                     "train_batch_size": 32,
-                    "metrics": ["eval_f1", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--max_seq_length 384",
                         "--use_hpu_graphs_for_inference"
                     ]
                 },
                 "multi_card": {
-                    "learning_rate": 3e-5,
+                    "learning_rate": 3e-05,
                     "train_batch_size": 32,
-                    "metrics": ["eval_f1", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--max_seq_length 384",
                         "--use_hpu_graphs_for_inference"
@@ -79,18 +103,92 @@
             "eval_batch_size": 8,
             "distribution": {
                 "single_card": {
-                    "learning_rate": 3e-5,
+                    "learning_rate": 3e-05,
                     "train_batch_size": 256,
-                    "metrics": ["eval_f1", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--max_seq_length 128",
                         "--use_hpu_graphs_for_inference"
                     ]
                 },
                 "multi_card": {
-                    "learning_rate": 3e-5,
+                    "learning_rate": 3e-05,
                     "train_batch_size": 40,
-                    "metrics": ["eval_f1", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--max_seq_length 128",
+                        "--use_hpu_graphs_for_inference"
+                    ]
+                }
+            }
+        }
+    },
+    "gaudi3": {
+        "squad": {
+            "num_train_epochs": 1,
+            "eval_batch_size": 8,
+            "distribution": {
+                "single_card": {
+                    "learning_rate": 3e-05,
+                    "train_batch_size": 32,
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--max_seq_length 384",
+                        "--use_hpu_graphs_for_inference"
+                    ]
+                },
+                "multi_card": {
+                    "learning_rate": 3e-05,
+                    "train_batch_size": 32,
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--max_seq_length 384",
+                        "--use_hpu_graphs_for_inference"
+                    ]
+                }
+            }
+        },
+        "mrpc": {
+            "num_train_epochs": 3,
+            "eval_batch_size": 8,
+            "distribution": {
+                "single_card": {
+                    "learning_rate": 3e-05,
+                    "train_batch_size": 256,
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--max_seq_length 128",
+                        "--use_hpu_graphs_for_inference"
+                    ]
+                },
+                "multi_card": {
+                    "learning_rate": 3e-05,
+                    "train_batch_size": 40,
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--max_seq_length 128",
                         "--use_hpu_graphs_for_inference"
@@ -99,4 +197,4 @@
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/tests/configs/examples/bloom_7b1.json b/tests/configs/examples/bloom_7b1.json
index 9de0a72315..b017f499fe 100644
--- a/tests/configs/examples/bloom_7b1.json
+++ b/tests/configs/examples/bloom_7b1.json
@@ -5,9 +5,12 @@
             "eval_batch_size": 4,
             "distribution": {
                 "deepspeed": {
-                    "learning_rate": 1e-4,
+                    "learning_rate": 0.0001,
                     "train_batch_size": 8,
-                    "metrics": ["train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--dataset_config_name wikitext-2-raw-v1",
                         "--use_cache False",
@@ -19,4 +22,4 @@
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/tests/configs/examples/bridgetower_large_itm_mlm_itc.json b/tests/configs/examples/bridgetower_large_itm_mlm_itc.json
index 6dce3b79dc..52da14a07f 100644
--- a/tests/configs/examples/bridgetower_large_itm_mlm_itc.json
+++ b/tests/configs/examples/bridgetower_large_itm_mlm_itc.json
@@ -5,9 +5,40 @@
             "eval_batch_size": 16,
             "distribution": {
                 "multi_card": {
-                    "learning_rate": 1e-5,
+                    "learning_rate": 1e-05,
                     "train_batch_size": 48,
-                    "metrics": ["train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--dataset_config_name matching",
+                        "--dataset_revision 3c6c4f6c0ff7e902833d3afa5f8f3875c2b036e6",
+                        "--image_column image",
+                        "--caption_column image_description",
+                        "--remove_unused_columns False",
+                        "--mediapipe_dataloader",
+                        "--dataloader_num_workers 2",
+                        "--logging_steps 10",
+                        "--use_hpu_graphs_for_inference",
+                        "--trust_remote_code True"
+                    ]
+                }
+            }
+        }
+    },
+    "gaudi3": {
+        "jmhessel/newyorker_caption_contest": {
+            "num_train_epochs": 5,
+            "eval_batch_size": 16,
+            "distribution": {
+                "multi_card": {
+                    "learning_rate": 1e-05,
+                    "train_batch_size": 48,
+                    "metrics": [
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--dataset_config_name matching",
                         "--dataset_revision 3c6c4f6c0ff7e902833d3afa5f8f3875c2b036e6",
@@ -24,4 +55,4 @@
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/tests/configs/examples/chatglm3_6b.json b/tests/configs/examples/chatglm3_6b.json
index ce55433e91..450e0eca41 100644
--- a/tests/configs/examples/chatglm3_6b.json
+++ b/tests/configs/examples/chatglm3_6b.json
@@ -5,9 +5,44 @@
             "eval_batch_size": 4,
             "distribution": {
                 "deepspeed": {
-                    "learning_rate": 5e-5,
+                    "learning_rate": 5e-05,
                     "train_batch_size": 4,
-                    "metrics": ["perplexity", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--dataset_name wikitext",
+                        "--dataset_config_name wikitext-2-raw-v1",
+                        "--block_size 1024",
+                        "--use_cache False",
+                        "--gradient_checkpointing",
+                        "--bf16",
+                        "--eval_strategy no",
+                        "--save_strategy no",
+                        "--throughput_warmup_steps 3",
+                        "--logging_first_step True",
+                        "--logging_steps 20",
+                        "--deepspeed tests/configs/deepspeed_zero_3_gaudi1.json"
+                    ]
+                }
+            }
+        }
+    },
+    "gaudi3": {
+        "wikitext": {
+            "num_train_epochs": 3,
+            "eval_batch_size": 4,
+            "distribution": {
+                "deepspeed": {
+                    "learning_rate": 5e-05,
+                    "train_batch_size": 4,
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--dataset_name wikitext",
                         "--dataset_config_name wikitext-2-raw-v1",
@@ -26,4 +61,4 @@
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/tests/configs/examples/clip_roberta.json b/tests/configs/examples/clip_roberta.json
index 37e9d1f5cc..87a28d40e3 100755
--- a/tests/configs/examples/clip_roberta.json
+++ b/tests/configs/examples/clip_roberta.json
@@ -5,9 +5,12 @@
             "eval_batch_size": 64,
             "distribution": {
                 "multi_card": {
-                    "learning_rate": 5e-5,
+                    "learning_rate": 5e-05,
                     "train_batch_size": 64,
-                    "metrics": ["train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--data_dir $PWD/",
                         "--dataset_config_name 2017",
@@ -34,7 +37,42 @@
                 "multi_card": {
                     "learning_rate": 5e-05,
                     "train_batch_size": 512,
-                    "metrics": ["train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--data_dir $PWD/",
+                        "--dataset_config_name 2017",
+                        "--image_column image_path",
+                        "--caption_column caption",
+                        "--remove_unused_columns False",
+                        "--warmup_steps 0",
+                        "--weight_decay 0.1",
+                        "--save_strategy no",
+                        "--use_hpu_graphs",
+                        "--dataloader_num_workers 2",
+                        "--mediapipe_dataloader",
+                        "--logging_nan_inf_filter",
+                        "--trust_remote_code True",
+                        "--max_steps 100"
+                    ]
+                }
+            }
+        }
+    },
+    "gaudi3": {
+        "ydshieh/coco_dataset_script": {
+            "eval_batch_size": 64,
+            "num_train_epochs": 1,
+            "distribution": {
+                "multi_card": {
+                    "learning_rate": 5e-05,
+                    "train_batch_size": 512,
+                    "metrics": [
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--data_dir $PWD/",
                         "--dataset_config_name 2017",
@@ -55,4 +93,4 @@
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/tests/configs/examples/distilbert_base_uncased.json b/tests/configs/examples/distilbert_base_uncased.json
index 0eb215102a..fb1900c312 100644
--- a/tests/configs/examples/distilbert_base_uncased.json
+++ b/tests/configs/examples/distilbert_base_uncased.json
@@ -5,18 +5,26 @@
             "eval_batch_size": 8,
             "distribution": {
                 "single_card": {
-                    "learning_rate": 1e-4,
+                    "learning_rate": 0.0001,
                     "train_batch_size": 48,
-                    "metrics": ["eval_f1", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--max_seq_length 384",
                         "--use_hpu_graphs_for_inference"
                     ]
                 },
                 "multi_card": {
-                    "learning_rate": 4e-4,
+                    "learning_rate": 0.0004,
                     "train_batch_size": 48,
-                    "metrics": ["eval_f1", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--max_seq_length 384",
                         "--use_hpu_graphs_for_inference"
@@ -31,18 +39,60 @@
             "eval_batch_size": 8,
             "distribution": {
                 "single_card": {
-                    "learning_rate": 2e-4,
+                    "learning_rate": 0.0002,
                     "train_batch_size": 64,
-                    "metrics": ["eval_f1", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--max_seq_length 384",
                         "--use_hpu_graphs_for_inference"
                     ]
                 },
                 "multi_card": {
-                    "learning_rate": 3e-4,
+                    "learning_rate": 0.0003,
                     "train_batch_size": 64,
-                    "metrics": ["eval_f1", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--max_seq_length 384",
+                        "--use_hpu_graphs_for_inference"
+                    ]
+                }
+            }
+        }
+    },
+    "gaudi3": {
+        "squad": {
+            "num_train_epochs": 2,
+            "eval_batch_size": 8,
+            "distribution": {
+                "single_card": {
+                    "learning_rate": 0.0002,
+                    "train_batch_size": 64,
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--max_seq_length 384",
+                        "--use_hpu_graphs_for_inference"
+                    ]
+                },
+                "multi_card": {
+                    "learning_rate": 0.0003,
+                    "train_batch_size": 64,
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--max_seq_length 384",
                         "--use_hpu_graphs_for_inference"
@@ -51,4 +101,4 @@
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/tests/configs/examples/falcon_40b.json b/tests/configs/examples/falcon_40b.json
index 73c0ef93be..f499aec61c 100644
--- a/tests/configs/examples/falcon_40b.json
+++ b/tests/configs/examples/falcon_40b.json
@@ -5,9 +5,13 @@
             "eval_batch_size": 1,
             "distribution": {
                 "multi_card": {
-                    "learning_rate": 4e-4,
+                    "learning_rate": 0.0004,
                     "train_batch_size": 1,
-                    "metrics": ["perplexity", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--bf16",
                         "--gradient_accumulation_steps 16",
@@ -38,9 +42,13 @@
             "eval_batch_size": 1,
             "distribution": {
                 "multi_card": {
-                    "learning_rate": 4e-4,
+                    "learning_rate": 0.0004,
                     "train_batch_size": 1,
-                    "metrics": ["perplexity", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--bf16",
                         "--gradient_accumulation_steps 16",
@@ -64,6 +72,81 @@
                     ]
                 }
             }
-	}
+        }
+    },
+    "gaudi3": {
+        "timdettmers/openassistant-guanaco": {
+            "num_train_epochs": 1,
+            "eval_batch_size": 1,
+            "distribution": {
+                "multi_card": {
+                    "learning_rate": 0.0004,
+                    "train_batch_size": 1,
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--bf16",
+                        "--gradient_accumulation_steps 16",
+                        "--eval_strategy no",
+                        "--save_strategy no",
+                        "--warmup_ratio  0.03",
+                        "--lr_scheduler_type constant",
+                        "--max_grad_norm  0.3",
+                        "--logging_steps 1",
+                        "--use_hpu_graphs_for_inference",
+                        "--lora_rank 64",
+                        "--lora_alpha 16",
+                        "--lora_dropout 0.1",
+                        "--lora_target_modules query_key_value dense dense_h_to_4h dense_4h_to_h",
+                        "--dataset_concatenation",
+                        "--max_seq_length 256",
+                        "--low_cpu_mem_usage True",
+                        "--adam_epsilon 1e-08",
+                        "--ddp_bucket_cap_mb 50",
+                        "--pipelining_fwd_bwd",
+                        "--validation_split_percentage 10"
+                    ]
+                }
+            }
+        },
+        "mamamiya405/finred": {
+            "num_train_epochs": 1,
+            "eval_batch_size": 1,
+            "distribution": {
+                "multi_card": {
+                    "learning_rate": 0.0004,
+                    "train_batch_size": 1,
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--bf16",
+                        "--gradient_accumulation_steps 16",
+                        "--eval_strategy no",
+                        "--save_strategy no",
+                        "--warmup_ratio  0.03",
+                        "--lr_scheduler_type constant",
+                        "--max_grad_norm  0.3",
+                        "--logging_steps 1",
+                        "--use_hpu_graphs_for_inference",
+                        "--lora_rank 64",
+                        "--lora_alpha 16",
+                        "--lora_dropout 0.1",
+                        "--lora_target_modules query_key_value dense dense_h_to_4h dense_4h_to_h",
+                        "--max_seq_length 256",
+                        "--low_cpu_mem_usage True",
+                        "--adam_epsilon 1e-08",
+                        "--ddp_bucket_cap_mb 50",
+                        "--pipelining_fwd_bwd",
+                        "--validation_split_percentage 10"
+                    ]
+                }
+            }
+        }
     }
-}
+}
\ No newline at end of file
diff --git a/tests/configs/examples/flan_t5_xxl.json b/tests/configs/examples/flan_t5_xxl.json
index 3f67ea03b3..f16d13c882 100644
--- a/tests/configs/examples/flan_t5_xxl.json
+++ b/tests/configs/examples/flan_t5_xxl.json
@@ -5,9 +5,43 @@
             "eval_batch_size": 22,
             "distribution": {
                 "deepspeed": {
-                    "learning_rate": 1e-4,
+                    "learning_rate": 0.0001,
                     "train_batch_size": 22,
-                    "metrics": ["eval_rougeLsum", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_rougeLsum",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--max_steps 20",
+                        "--max_eval_samples 880",
+                        "--dataset_config 3.0.0",
+                        "--source_prefix summarize: ",
+                        "--predict_with_generate",
+                        "--ignore_pad_token_for_loss False",
+                        "--pad_to_max_length",
+                        "--generation_max_length 129",
+                        "--gradient_checkpointing",
+                        "--adam_epsilon 1e-08",
+                        "--deepspeed examples/summarization/ds_flan_t5_z3_config_bf16.json"
+                    ]
+                }
+            }
+        }
+    },
+    "gaudi3": {
+        "cnn_dailymail": {
+            "num_train_epochs": 2,
+            "eval_batch_size": 22,
+            "distribution": {
+                "deepspeed": {
+                    "learning_rate": 0.0001,
+                    "train_batch_size": 22,
+                    "metrics": [
+                        "eval_rougeLsum",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--max_steps 20",
                         "--max_eval_samples 880",
@@ -25,4 +59,4 @@
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/tests/configs/examples/gemma_2b_it.json b/tests/configs/examples/gemma_2b_it.json
index cfea562791..6ecab478ad 100644
--- a/tests/configs/examples/gemma_2b_it.json
+++ b/tests/configs/examples/gemma_2b_it.json
@@ -5,27 +5,87 @@
             "eval_batch_size": 4,
             "distribution": {
                 "single_card": {
-                    "learning_rate": 2e-4,
+                    "learning_rate": 0.0002,
                     "train_batch_size": 4,
-                    "metrics": ["perplexity", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--dataset_config_name wikitext-2-raw-v1",
                         "--use_hpu_graphs_for_inference"
                     ]
                 },
                 "multi_card": {
-                    "learning_rate": 8e-4,
+                    "learning_rate": 0.0008,
                     "train_batch_size": 4,
-                    "metrics": ["perplexity", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--dataset_config_name wikitext-2-raw-v1",
                         "--use_hpu_graphs_for_inference"
                     ]
                 },
                 "deepspeed": {
-                    "learning_rate": 8e-4,
+                    "learning_rate": 0.0008,
                     "train_batch_size": 4,
-                    "metrics": ["perplexity", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--dataset_config_name wikitext-2-raw-v1",
+                        "--use_hpu_graphs_for_inference",
+                        "--deepspeed tests/configs/deepspeed_zero_2.json"
+                    ]
+                }
+            }
+        }
+    },
+    "gaudi3": {
+        "wikitext": {
+            "num_train_epochs": 2,
+            "eval_batch_size": 4,
+            "distribution": {
+                "single_card": {
+                    "learning_rate": 0.0002,
+                    "train_batch_size": 4,
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--dataset_config_name wikitext-2-raw-v1",
+                        "--use_hpu_graphs_for_inference"
+                    ]
+                },
+                "multi_card": {
+                    "learning_rate": 0.0008,
+                    "train_batch_size": 4,
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--dataset_config_name wikitext-2-raw-v1",
+                        "--use_hpu_graphs_for_inference"
+                    ]
+                },
+                "deepspeed": {
+                    "learning_rate": 0.0008,
+                    "train_batch_size": 4,
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--dataset_config_name wikitext-2-raw-v1",
                         "--use_hpu_graphs_for_inference",
@@ -35,4 +95,4 @@
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/tests/configs/examples/gemma_2b_it_eager.json b/tests/configs/examples/gemma_2b_it_eager.json
index 09808d99d5..ea7993094d 100644
--- a/tests/configs/examples/gemma_2b_it_eager.json
+++ b/tests/configs/examples/gemma_2b_it_eager.json
@@ -5,9 +5,33 @@
             "eval_batch_size": 4,
             "distribution": {
                 "single_card": {
-                    "learning_rate": 2e-4,
+                    "learning_rate": 0.0002,
                     "train_batch_size": 4,
-                    "metrics": ["perplexity", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--dataset_config_name wikitext-2-raw-v1"
+                    ]
+                }
+            }
+        }
+    },
+    "gaudi3": {
+        "wikitext": {
+            "num_train_epochs": 2,
+            "eval_batch_size": 4,
+            "distribution": {
+                "single_card": {
+                    "learning_rate": 0.0002,
+                    "train_batch_size": 4,
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--dataset_config_name wikitext-2-raw-v1"
                     ]
@@ -15,4 +39,4 @@
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/tests/configs/examples/gpt2.json b/tests/configs/examples/gpt2.json
index 747ec83dd8..59d09b7264 100644
--- a/tests/configs/examples/gpt2.json
+++ b/tests/configs/examples/gpt2.json
@@ -5,9 +5,13 @@
             "eval_batch_size": 4,
             "distribution": {
                 "single_card": {
-                    "learning_rate": 5e-5,
+                    "learning_rate": 5e-05,
                     "train_batch_size": 4,
-                    "metrics": ["perplexity", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--dataset_config_name wikitext-2-raw-v1",
                         "--use_hpu_graphs_for_inference",
@@ -15,9 +19,13 @@
                     ]
                 },
                 "multi_card": {
-                    "learning_rate": 4e-4,
+                    "learning_rate": 0.0004,
                     "train_batch_size": 4,
-                    "metrics": ["perplexity", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--dataset_config_name wikitext-2-raw-v1",
                         "--use_hpu_graphs_for_inference",
@@ -33,18 +41,60 @@
             "eval_batch_size": 4,
             "distribution": {
                 "single_card": {
-                    "learning_rate": 2e-4,
+                    "learning_rate": 0.0002,
                     "train_batch_size": 16,
-                    "metrics": ["perplexity", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--dataset_config_name wikitext-2-raw-v1",
                         "--use_hpu_graphs_for_inference"
                     ]
                 },
                 "multi_card": {
-                    "learning_rate": 8e-4,
+                    "learning_rate": 0.0008,
                     "train_batch_size": 16,
-                    "metrics": ["perplexity", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--dataset_config_name wikitext-2-raw-v1",
+                        "--use_hpu_graphs_for_inference"
+                    ]
+                }
+            }
+        }
+    },
+    "gaudi3": {
+        "wikitext": {
+            "num_train_epochs": 2,
+            "eval_batch_size": 4,
+            "distribution": {
+                "single_card": {
+                    "learning_rate": 0.0002,
+                    "train_batch_size": 16,
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--dataset_config_name wikitext-2-raw-v1",
+                        "--use_hpu_graphs_for_inference"
+                    ]
+                },
+                "multi_card": {
+                    "learning_rate": 0.0008,
+                    "train_batch_size": 16,
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--dataset_config_name wikitext-2-raw-v1",
                         "--use_hpu_graphs_for_inference"
@@ -53,4 +103,4 @@
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/tests/configs/examples/gpt2_xl.json b/tests/configs/examples/gpt2_xl.json
index eb89da6d27..eeb9398b73 100644
--- a/tests/configs/examples/gpt2_xl.json
+++ b/tests/configs/examples/gpt2_xl.json
@@ -5,12 +5,16 @@
             "eval_batch_size": 4,
             "distribution": {
                 "deepspeed": {
-                    "learning_rate": 5e-5,
+                    "learning_rate": 5e-05,
                     "train_batch_size": 2,
                     "perplexity": 12.6744,
                     "train_runtime": 366.8694,
                     "train_samples_per_second": 16.464,
-                    "metrics": ["perplexity", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--dataset_config_name wikitext-2-raw-v1",
                         "--use_hpu_graphs_for_inference",
@@ -26,9 +30,36 @@
             "eval_batch_size": 4,
             "distribution": {
                 "deepspeed": {
-                    "learning_rate": 4e-4,
+                    "learning_rate": 0.0004,
                     "train_batch_size": 16,
-                    "metrics": ["perplexity", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--dataset_config_name wikitext-2-raw-v1",
+                        "--gradient_checkpointing",
+                        "--use_hpu_graphs_for_inference",
+                        "--deepspeed tests/configs/deepspeed_zero_2.json"
+                    ]
+                }
+            }
+        }
+    },
+    "gaudi3": {
+        "wikitext": {
+            "num_train_epochs": 2,
+            "eval_batch_size": 4,
+            "distribution": {
+                "deepspeed": {
+                    "learning_rate": 0.0004,
+                    "train_batch_size": 16,
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--dataset_config_name wikitext-2-raw-v1",
                         "--gradient_checkpointing",
@@ -39,4 +70,4 @@
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/tests/configs/examples/gpt_neox_20b.json b/tests/configs/examples/gpt_neox_20b.json
index 5a68691c16..0ed304101f 100644
--- a/tests/configs/examples/gpt_neox_20b.json
+++ b/tests/configs/examples/gpt_neox_20b.json
@@ -5,9 +5,36 @@
             "eval_batch_size": 2,
             "distribution": {
                 "deepspeed": {
-                    "learning_rate": 5e-5,
+                    "learning_rate": 5e-05,
                     "train_batch_size": 2,
-                    "metrics": ["perplexity", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--dataset_config_name wikitext-2-raw-v1",
+                        "--gradient_checkpointing",
+                        "--use_hpu_graphs_for_inference",
+                        "--deepspeed tests/configs/deepspeed_zero_2.json"
+                    ]
+                }
+            }
+        }
+    },
+    "gaudi3": {
+        "wikitext": {
+            "num_train_epochs": 1,
+            "eval_batch_size": 2,
+            "distribution": {
+                "deepspeed": {
+                    "learning_rate": 5e-05,
+                    "train_batch_size": 2,
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--dataset_config_name wikitext-2-raw-v1",
                         "--gradient_checkpointing",
@@ -18,4 +45,4 @@
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/tests/configs/examples/idefics2_8b.json b/tests/configs/examples/idefics2_8b.json
index c74f37ecee..45a40adfd3 100644
--- a/tests/configs/examples/idefics2_8b.json
+++ b/tests/configs/examples/idefics2_8b.json
@@ -5,9 +5,51 @@
             "eval_batch_size": 4,
             "distribution": {
                 "multi_card": {
-                    "learning_rate": 5e-5,
+                    "learning_rate": 5e-05,
                     "train_batch_size": 2,
-                    "metrics": ["eval_accuracy", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_accuracy",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--bf16",
+                        "--gradient_accumulation_steps 8",
+                        "--eval_strategy no",
+                        "--save_strategy no",
+                        "--warmup_steps 50",
+                        "--lr_scheduler_type constant",
+                        "--max_grad_norm 0.3",
+                        "--logging_steps 1",
+                        "--use_hpu_graphs_for_inference",
+                        "--lora_rank 8",
+                        "--lora_alpha 8",
+                        "--lora_dropout 0.1",
+                        "--lora_target_modules '.*(text_model|modality_projection|perceiver_resampler).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$'",
+                        "--low_cpu_mem_usage True",
+                        "--adam_epsilon 1e-08",
+                        "--input_column_name image query",
+                        "--output_column_name answers",
+                        "--remove_unused_columns False",
+                        "--max_seq_length 512"
+                    ]
+                }
+            }
+        }
+    },
+    "gaudi3": {
+        "image2text_lora_finetune": {
+            "num_train_epochs": 2,
+            "eval_batch_size": 4,
+            "distribution": {
+                "multi_card": {
+                    "learning_rate": 5e-05,
+                    "train_batch_size": 2,
+                    "metrics": [
+                        "eval_accuracy",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--bf16",
                         "--gradient_accumulation_steps 8",
@@ -33,4 +75,4 @@
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/tests/configs/examples/llama_7b.json b/tests/configs/examples/llama_7b.json
index 29c4a23e0a..d3d4a8ffeb 100644
--- a/tests/configs/examples/llama_7b.json
+++ b/tests/configs/examples/llama_7b.json
@@ -5,9 +5,13 @@
             "eval_batch_size": 2,
             "distribution": {
                 "single_card": {
-                    "learning_rate": 2e-4,
+                    "learning_rate": 0.0002,
                     "train_batch_size": 2,
-                    "metrics": ["perplexity", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--bf16",
                         "--gradient_accumulation_steps 1",
@@ -39,9 +43,13 @@
             "eval_batch_size": 2,
             "distribution": {
                 "multi_card": {
-                    "learning_rate": 1e-4,
+                    "learning_rate": 0.0001,
                     "train_batch_size": 2,
-                    "metrics": ["perplexity", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--bf16",
                         "--gradient_accumulation_steps 4",
@@ -62,9 +70,13 @@
             "eval_batch_size": 8,
             "distribution": {
                 "single_card": {
-                    "learning_rate": 2e-4,
+                    "learning_rate": 0.0002,
                     "train_batch_size": 16,
-                    "metrics": ["perplexity", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--bf16",
                         "--gradient_accumulation_steps 1",
@@ -96,9 +108,13 @@
             "eval_batch_size": 4,
             "distribution": {
                 "multi_card": {
-                    "learning_rate": 3e-4,
+                    "learning_rate": 0.0003,
                     "train_batch_size": 8,
-                    "metrics": ["perplexity", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--bf16",
                         "--gradient_accumulation_steps 2",
@@ -129,9 +145,13 @@
             "eval_batch_size": 4,
             "distribution": {
                 "multi_card": {
-                    "learning_rate": 3e-4,
+                    "learning_rate": 0.0003,
                     "train_batch_size": 8,
-                    "metrics": ["perplexity", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--bf16",
                         "--gradient_accumulation_steps 2",
@@ -161,9 +181,13 @@
             "eval_batch_size": 1,
             "distribution": {
                 "multi_card": {
-                    "learning_rate": 3e-4,
+                    "learning_rate": 0.0003,
                     "train_batch_size": 8,
-                    "metrics": ["perplexity", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--bf16 True",
                         "--gradient_accumulation_steps 2",
@@ -198,9 +222,13 @@
             "eval_batch_size": 4,
             "distribution": {
                 "multi_card": {
-                    "learning_rate": 3e-4,
+                    "learning_rate": 0.0003,
                     "train_batch_size": 8,
-                    "metrics": ["perplexity", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--bf16",
                         "--gradient_accumulation_steps 2",
@@ -234,9 +262,12 @@
             "eval_batch_size": 1,
             "distribution": {
                 "multi_card": {
-                    "learning_rate": 1e-4,
+                    "learning_rate": 0.0001,
                     "train_batch_size": 4,
-                    "metrics": ["train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--bf16 True",
                         "--gradient_accumulation_steps 2",
@@ -264,9 +295,12 @@
             "eval_batch_size": 1,
             "distribution": {
                 "multi_card": {
-                    "learning_rate": 5e-4,
+                    "learning_rate": 0.0005,
                     "train_batch_size": 1,
-                    "metrics": ["train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--logging_steps 1",
                         "--lora_r 8",
@@ -294,9 +328,12 @@
             "eval_batch_size": 1,
             "distribution": {
                 "multi_card": {
-                    "learning_rate": 5e-4,
+                    "learning_rate": 0.0005,
                     "train_batch_size": 1,
-                    "metrics": ["train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--logging_steps 1",
                         "--lora_r 8",
@@ -319,9 +356,12 @@
             "eval_batch_size": 1,
             "distribution": {
                 "multi_card": {
-                    "learning_rate": 5e-4,
+                    "learning_rate": 0.0005,
                     "train_batch_size": 8,
-                    "metrics": ["train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--lora_r 8",
                         "--lora_alpha 16",
@@ -346,9 +386,13 @@
             "eval_batch_size": 1,
             "distribution": {
                 "multi_card": {
-                    "learning_rate": 5e-4,
+                    "learning_rate": 0.0005,
                     "train_batch_size": 1,
-                    "metrics": ["perplexity", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--num_virtual_tokens 8",
                         "--max_seq_length 64",
@@ -369,9 +413,13 @@
             "eval_batch_size": 1,
             "distribution": {
                 "multi_card": {
-                    "learning_rate": 5e-4,
+                    "learning_rate": 0.0005,
                     "train_batch_size": 1,
-                    "metrics": ["perplexity", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--num_virtual_tokens 8",
                         "--max_seq_length 64",
@@ -392,9 +440,13 @@
             "eval_batch_size": 1,
             "distribution": {
                 "multi_card": {
-                    "learning_rate": 5e-4,
+                    "learning_rate": 0.0005,
                     "train_batch_size": 1,
-                    "metrics": ["perplexity", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--num_virtual_tokens 8",
                         "--max_seq_length 64",
@@ -415,9 +467,13 @@
             "eval_batch_size": 4,
             "distribution": {
                 "multi_card": {
-                    "learning_rate": 3e-4,
+                    "learning_rate": 0.0003,
                     "train_batch_size": 16,
-                    "metrics": ["perplexity", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--bf16",
                         "--gradient_accumulation_steps 1",
@@ -450,9 +506,13 @@
             "eval_batch_size": 4,
             "distribution": {
                 "multi_card": {
-                    "learning_rate": 3e-4,
+                    "learning_rate": 0.0003,
                     "train_batch_size": 8,
-                    "metrics": ["perplexity", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--bf16",
                         "--gradient_accumulation_steps 2",
@@ -481,9 +541,13 @@
             "eval_batch_size": 4,
             "distribution": {
                 "multi_card": {
-                    "learning_rate": 3e-4,
+                    "learning_rate": 0.0003,
                     "train_batch_size": 8,
-                    "metrics": ["perplexity", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--bf16",
                         "--gradient_accumulation_steps 2",
@@ -520,9 +584,13 @@
             "eval_batch_size": 4,
             "distribution": {
                 "multi_card": {
-                    "learning_rate": 1e-2,
+                    "learning_rate": 0.01,
                     "train_batch_size": 8,
-                    "metrics": ["perplexity", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--bf16",
                         "--gradient_accumulation_steps 1",
@@ -551,9 +619,13 @@
             "eval_batch_size": 4,
             "distribution": {
                 "multi_card": {
-                    "learning_rate": 3e-4,
+                    "learning_rate": 0.0003,
                     "train_batch_size": 8,
-                    "metrics": ["perplexity", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--bf16",
                         "--gradient_accumulation_steps 2",
@@ -582,9 +654,631 @@
             "eval_batch_size": 4,
             "distribution": {
                 "deepspeed": {
-                    "learning_rate": 3e-4,
+                    "learning_rate": 0.0003,
                     "train_batch_size": 8,
-                    "metrics": ["perplexity", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--bf16 True",
+                        "--gradient_accumulation_steps 4",
+                        "--logging_steps 1",
+                        "--validation_split_percentage 10",
+                        "--lora_rank 8",
+                        "--lora_alpha 16",
+                        "--lora_dropout 0.05",
+                        "--lora_target_modules q_proj v_proj",
+                        "--dataset_concatenation",
+                        "--max_seq_length 2048",
+                        "--pipelining_fwd_bwd",
+                        "--throughput_warmup_steps 3",
+                        "--use_lazy_mode",
+                        "--context_parallel_size 4",
+                        "--deepspeed tests/configs/deepspeed_zero_1.json"
+                    ]
+                }
+            }
+        }
+    },
+    "gaudi3": {
+        "databricks/databricks-dolly-15k": {
+            "num_train_epochs": 1,
+            "eval_batch_size": 8,
+            "distribution": {
+                "single_card": {
+                    "learning_rate": 0.0002,
+                    "train_batch_size": 16,
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--bf16",
+                        "--gradient_accumulation_steps 1",
+                        "--eval_strategy no",
+                        "--save_strategy no",
+                        "--warmup_ratio 0.03",
+                        "--lr_scheduler_type constant",
+                        "--max_grad_norm 0.3",
+                        "--logging_steps 1",
+                        "--use_hpu_graphs_for_inference",
+                        "--lora_rank 8",
+                        "--lora_alpha 16",
+                        "--lora_dropout 0.1",
+                        "--lora_target_modules q_proj v_proj",
+                        "--dataset_concatenation",
+                        "--low_cpu_mem_usage True",
+                        "--adam_epsilon 1e-08",
+                        "--validation_split_percentage 20",
+                        "--attn_softmax_bf16",
+                        "--max_steps 100",
+                        "--input_column_name context",
+                        "--output_column_name response"
+                    ]
+                }
+            }
+        },
+        "tatsu-lab/alpaca": {
+            "num_train_epochs": 3,
+            "eval_batch_size": 4,
+            "distribution": {
+                "multi_card": {
+                    "learning_rate": 0.0003,
+                    "train_batch_size": 8,
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--bf16",
+                        "--gradient_accumulation_steps 2",
+                        "--eval_strategy no",
+                        "--save_strategy no",
+                        "--warmup_ratio  0.03",
+                        "--lr_scheduler_type constant",
+                        "--max_grad_norm  0.3",
+                        "--logging_steps 1",
+                        "--use_hpu_graphs_for_inference",
+                        "--lora_rank 8",
+                        "--lora_alpha 16",
+                        "--lora_dropout 0.05",
+                        "--lora_target_modules q_proj v_proj",
+                        "--dataset_concatenation",
+                        "--max_seq_length 512",
+                        "--low_cpu_mem_usage True",
+                        "--adam_epsilon 1e-08",
+                        "--ddp_bucket_cap_mb 50",
+                        "--validation_split_percentage 10",
+                        "--attn_softmax_bf16"
+                    ]
+                }
+            }
+        },
+        "mamamiya405/finred": {
+            "num_train_epochs": 3,
+            "eval_batch_size": 4,
+            "distribution": {
+                "multi_card": {
+                    "learning_rate": 0.0003,
+                    "train_batch_size": 8,
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--bf16",
+                        "--gradient_accumulation_steps 2",
+                        "--eval_strategy no",
+                        "--save_strategy no",
+                        "--warmup_ratio  0.03",
+                        "--lr_scheduler_type constant",
+                        "--max_grad_norm  0.3",
+                        "--logging_steps 1",
+                        "--use_hpu_graphs_for_inference",
+                        "--lora_rank 8",
+                        "--lora_alpha 16",
+                        "--lora_dropout 0.05",
+                        "--lora_target_modules q_proj v_proj",
+                        "--max_seq_length 512",
+                        "--low_cpu_mem_usage True",
+                        "--adam_epsilon 1e-08",
+                        "--ddp_bucket_cap_mb 50",
+                        "--validation_split_percentage 10",
+                        "--attn_softmax_bf16"
+                    ]
+                }
+            }
+        },
+        "tatsu-lab/alpaca_fsdpcompile": {
+            "num_train_epochs": 1,
+            "eval_batch_size": 1,
+            "distribution": {
+                "multi_card": {
+                    "learning_rate": 0.0003,
+                    "train_batch_size": 8,
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--bf16 True",
+                        "--gradient_accumulation_steps 2",
+                        "--eval_strategy no",
+                        "--save_strategy no",
+                        "--warmup_ratio  0.03",
+                        "--lr_scheduler_type constant",
+                        "--max_grad_norm  0.3",
+                        "--logging_steps 1",
+                        "--lora_rank 8",
+                        "--lora_alpha 16",
+                        "--lora_dropout 0.05",
+                        "--lora_target_modules q_proj v_proj",
+                        "--dataset_concatenation",
+                        "--max_seq_length 512",
+                        "--low_cpu_mem_usage True",
+                        "--adam_epsilon 1e-08",
+                        "--ddp_bucket_cap_mb 50",
+                        "--validation_split_percentage 10",
+                        "--attn_softmax_bf16",
+                        "--pipelining_fwd_bwd False",
+                        "--fsdp auto_wrap",
+                        "--torch_compile_backend hpu_backend",
+                        "--torch_compile",
+                        "--fsdp_config examples/language-modeling/fsdp_config.json"
+                    ]
+                }
+            }
+        },
+        "llama-adapter": {
+            "num_train_epochs": 3,
+            "eval_batch_size": 4,
+            "distribution": {
+                "multi_card": {
+                    "learning_rate": 0.0003,
+                    "train_batch_size": 8,
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--bf16",
+                        "--gradient_accumulation_steps 2",
+                        "--eval_strategy no",
+                        "--save_strategy no",
+                        "--warmup_ratio  0.03",
+                        "--lr_scheduler_type constant",
+                        "--max_grad_norm  0.3",
+                        "--logging_steps 1",
+                        "--use_hpu_graphs_for_inference",
+                        "--lora_rank 8",
+                        "--lora_alpha 16",
+                        "--lora_dropout 0.05",
+                        "--lora_target_modules q_proj v_proj",
+                        "--dataset_concatenation",
+                        "--max_seq_length 512",
+                        "--low_cpu_mem_usage True",
+                        "--adam_epsilon 1e-08",
+                        "--ddp_bucket_cap_mb 50",
+                        "--validation_split_percentage 10",
+                        "--attn_softmax_bf16",
+                        "--adapter_layers 2",
+                        "--adapter_len 4",
+                        "--peft_type llama-adapter"
+                    ]
+                }
+            }
+        },
+        "trl-sft": {
+            "num_train_epochs": 1,
+            "eval_batch_size": 1,
+            "distribution": {
+                "multi_card": {
+                    "learning_rate": 0.0001,
+                    "train_batch_size": 4,
+                    "metrics": [
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--bf16 True",
+                        "--gradient_accumulation_steps 2",
+                        "--eval_strategy no",
+                        "--save_strategy no",
+                        "--warmup_ratio  0.03",
+                        "--lr_scheduler_type constant",
+                        "--max_grad_norm  0.3",
+                        "--logging_steps 1",
+                        "--lora_r 8",
+                        "--lora_alpha 16",
+                        "--lora_dropout 0.05",
+                        "--lora_target_modules q_proj v_proj",
+                        "--max_seq_length 1024",
+                        "--optim paged_adamw_32bit",
+                        "--weight_decay 0.05",
+                        "--report_to none",
+                        "--max_steps 100"
+                    ]
+                }
+            }
+        },
+        "trl-dpo": {
+            "num_train_epochs": 1,
+            "eval_batch_size": 1,
+            "distribution": {
+                "multi_card": {
+                    "learning_rate": 0.0005,
+                    "train_batch_size": 1,
+                    "metrics": [
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--logging_steps 1",
+                        "--lora_r 8",
+                        "--lora_alpha 16",
+                        "--lora_dropout 0.05",
+                        "--lora_target_modules q_proj v_proj k_proj out_proj fc_in fc_out wte",
+                        "--max_length 1024",
+                        "--max_prompt_length 512",
+                        "--report_to none",
+                        "--max_steps 100",
+                        "--eval_steps 200",
+                        "--lr_scheduler_type cosine",
+                        "--warmup_steps 0",
+                        "--weight_decay 0.05",
+                        "--optimizer_type paged_adamw_32bit",
+                        "--beta 0.1",
+                        "--gradient_accumulation_steps 4",
+                        "--sanity_check"
+                    ]
+                }
+            }
+        },
+        "trl-reward": {
+            "num_train_epochs": 1,
+            "eval_batch_size": 1,
+            "distribution": {
+                "multi_card": {
+                    "learning_rate": 0.0005,
+                    "train_batch_size": 1,
+                    "metrics": [
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--logging_steps 1",
+                        "--lora_r 8",
+                        "--lora_alpha 16",
+                        "--lora_dropout 0.05",
+                        "--lora_target_modules q_proj v_proj k_proj out_proj fc_in fc_out wte",
+                        "--max_length 1024",
+                        "--eval_steps 200",
+                        "--lr_scheduler_type cosine",
+                        "--weight_decay 0.05",
+                        "--gradient_accumulation_steps 4",
+                        "--train_subset 500",
+                        "--eval_subset 100"
+                    ]
+                }
+            }
+        },
+        "trl-ppo": {
+            "num_train_epochs": 1,
+            "eval_batch_size": 1,
+            "distribution": {
+                "multi_card": {
+                    "learning_rate": 0.0005,
+                    "train_batch_size": 8,
+                    "metrics": [
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--lora_r 8",
+                        "--lora_alpha 16",
+                        "--lora_dropout 0.05",
+                        "--reward_model_name HuggingFaceH4/tiny-random-LlamaForSequenceClassification",
+                        "--lora_target_modules q_proj v_proj k_proj out_proj fc_in fc_out wte",
+                        "--max_train_samples 1000",
+                        "--use_habana",
+                        "--ppo_epochs 1",
+                        "--batched_gen True",
+                        "--mini_batch_size 1",
+                        "--output_max_length 128",
+                        "--input_max_length 128",
+                        "--learning_rate 1.4e-5",
+                        "--early_stopping"
+                    ]
+                }
+            }
+        },
+        "prompt-tuning": {
+            "num_train_epochs": 20,
+            "eval_batch_size": 1,
+            "distribution": {
+                "multi_card": {
+                    "learning_rate": 0.0005,
+                    "train_batch_size": 1,
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--num_virtual_tokens 8",
+                        "--max_seq_length 64",
+                        "--logging_steps 1",
+                        "--report_to none",
+                        "--max_steps 100",
+                        "--peft_type prompt_tuning",
+                        "--lr_scheduler_type cosine",
+                        "--warmup_steps 0",
+                        "--weight_decay 0.05",
+                        "--gradient_accumulation_steps 1"
+                    ]
+                }
+            }
+        },
+        "prefix-tuning": {
+            "num_train_epochs": 20,
+            "eval_batch_size": 1,
+            "distribution": {
+                "multi_card": {
+                    "learning_rate": 0.0005,
+                    "train_batch_size": 1,
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--num_virtual_tokens 8",
+                        "--max_seq_length 64",
+                        "--logging_steps 1",
+                        "--report_to none",
+                        "--max_steps 100",
+                        "--peft_type prefix_tuning",
+                        "--lr_scheduler_type cosine",
+                        "--warmup_steps 0",
+                        "--weight_decay 0.05",
+                        "--gradient_accumulation_steps 1"
+                    ]
+                }
+            }
+        },
+        "p-tuning": {
+            "num_train_epochs": 20,
+            "eval_batch_size": 1,
+            "distribution": {
+                "multi_card": {
+                    "learning_rate": 0.0005,
+                    "train_batch_size": 1,
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--num_virtual_tokens 8",
+                        "--max_seq_length 64",
+                        "--logging_steps 1",
+                        "--report_to none",
+                        "--max_steps 100",
+                        "--peft_type p_tuning",
+                        "--lr_scheduler_type cosine",
+                        "--warmup_steps 0",
+                        "--weight_decay 0.05",
+                        "--gradient_accumulation_steps 1"
+                    ]
+                }
+            }
+        },
+        "tatsu-lab/alpaca_fp8": {
+            "num_train_epochs": 3,
+            "eval_batch_size": 4,
+            "distribution": {
+                "multi_card": {
+                    "learning_rate": 0.0003,
+                    "train_batch_size": 16,
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--bf16",
+                        "--gradient_accumulation_steps 1",
+                        "--eval_strategy no",
+                        "--save_strategy no",
+                        "--warmup_ratio  0.03",
+                        "--lr_scheduler_type constant",
+                        "--logging_steps 40",
+                        "--lora_rank 8",
+                        "--lora_alpha 16",
+                        "--lora_dropout 0.05",
+                        "--lora_target_modules q_proj v_proj",
+                        "--dataset_concatenation",
+                        "--max_seq_length 512",
+                        "--low_cpu_mem_usage True",
+                        "--adam_epsilon 1e-08",
+                        "--ddp_bucket_cap_mb 50",
+                        "--validation_split_percentage 10",
+                        "--pipelining_fwd_bwd",
+                        "--throughput_warmup_steps 18",
+                        "--use_lazy_mode",
+                        "--max_grad_norm 0.3",
+                        "--fp8"
+                    ]
+                }
+            }
+        },
+        "ia3": {
+            "num_train_epochs": 3,
+            "eval_batch_size": 4,
+            "distribution": {
+                "multi_card": {
+                    "learning_rate": 0.0003,
+                    "train_batch_size": 8,
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--bf16",
+                        "--gradient_accumulation_steps 2",
+                        "--eval_strategy no",
+                        "--save_strategy no",
+                        "--warmup_ratio  0.03",
+                        "--lr_scheduler_type constant",
+                        "--max_grad_norm  0.3",
+                        "--logging_steps 1",
+                        "--use_hpu_graphs_for_inference",
+                        "--ia3_target_modules q_proj v_proj",
+                        "--dataset_concatenation",
+                        "--max_seq_length 512",
+                        "--low_cpu_mem_usage True",
+                        "--adam_epsilon 1e-08",
+                        "--ddp_bucket_cap_mb 50",
+                        "--validation_split_percentage 10",
+                        "--attn_softmax_bf16",
+                        "--peft_type ia3"
+                    ]
+                }
+            }
+        },
+        "adalora": {
+            "num_train_epochs": 3,
+            "eval_batch_size": 4,
+            "distribution": {
+                "multi_card": {
+                    "learning_rate": 0.0003,
+                    "train_batch_size": 8,
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--bf16",
+                        "--gradient_accumulation_steps 2",
+                        "--eval_strategy no",
+                        "--save_strategy no",
+                        "--warmup_ratio  0.03",
+                        "--lr_scheduler_type constant",
+                        "--max_grad_norm  0.3",
+                        "--logging_steps 1",
+                        "--use_hpu_graphs_for_inference",
+                        "--lora_alpha 16",
+                        "--lora_dropout 0.05",
+                        "--lora_target_modules q_proj v_proj",
+                        "--adalora_init_r 12",
+                        "--adalora_target_r 4",
+                        "--adalora_tinit 50",
+                        "--adalora_tfinal 500",
+                        "--adalora_delta_t 100",
+                        "--adalora_orth_reg_weight 0.5",
+                        "--dataset_concatenation",
+                        "--max_seq_length 512",
+                        "--low_cpu_mem_usage True",
+                        "--adam_epsilon 1e-08",
+                        "--ddp_bucket_cap_mb 50",
+                        "--validation_split_percentage 10",
+                        "--attn_softmax_bf16",
+                        "--peft_type adalora"
+                    ]
+                }
+            }
+        },
+        "vera": {
+            "num_train_epochs": 3,
+            "eval_batch_size": 4,
+            "distribution": {
+                "multi_card": {
+                    "learning_rate": 0.01,
+                    "train_batch_size": 8,
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--bf16",
+                        "--gradient_accumulation_steps 1",
+                        "--eval_strategy no",
+                        "--save_strategy no",
+                        "--warmup_ratio  0.03",
+                        "--lr_scheduler_type constant",
+                        "--max_grad_norm  0.3",
+                        "--logging_steps 1",
+                        "--use_hpu_graphs_for_inference",
+                        "--vera_target_modules q_proj v_proj",
+                        "--dataset_concatenation",
+                        "--max_seq_length 512",
+                        "--low_cpu_mem_usage True",
+                        "--adam_epsilon 1e-08",
+                        "--ddp_bucket_cap_mb 50",
+                        "--validation_split_percentage 10",
+                        "--attn_softmax_bf16",
+                        "--peft_type vera"
+                    ]
+                }
+            }
+        },
+        "ln_tuning": {
+            "num_train_epochs": 3,
+            "eval_batch_size": 4,
+            "distribution": {
+                "multi_card": {
+                    "learning_rate": 0.0003,
+                    "train_batch_size": 8,
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--bf16",
+                        "--gradient_accumulation_steps 2",
+                        "--eval_strategy no",
+                        "--save_strategy no",
+                        "--warmup_ratio  0.03",
+                        "--lr_scheduler_type constant",
+                        "--max_grad_norm  0.3",
+                        "--logging_steps 1",
+                        "--use_hpu_graphs_for_inference",
+                        "--ln_target_module input_layernorm post_attention_layernorm norm",
+                        "--dataset_concatenation",
+                        "--max_seq_length 512",
+                        "--low_cpu_mem_usage True",
+                        "--adam_epsilon 1e-08",
+                        "--ddp_bucket_cap_mb 50",
+                        "--validation_split_percentage 10",
+                        "--attn_softmax_bf16",
+                        "--peft_type ln_tuning"
+                    ]
+                }
+            }
+        },
+        "tatsu-lab/alpaca_cp": {
+            "num_train_epochs": 1,
+            "eval_batch_size": 4,
+            "distribution": {
+                "deepspeed": {
+                    "learning_rate": 0.0003,
+                    "train_batch_size": 8,
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--bf16 True",
                         "--gradient_accumulation_steps 4",
@@ -606,4 +1300,4 @@
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/tests/configs/examples/llava_1_5_7b_hf.json b/tests/configs/examples/llava_1_5_7b_hf.json
index 774ca979e0..b378a77213 100644
--- a/tests/configs/examples/llava_1_5_7b_hf.json
+++ b/tests/configs/examples/llava_1_5_7b_hf.json
@@ -5,9 +5,51 @@
             "eval_batch_size": 4,
             "distribution": {
                 "multi_card": {
-                    "learning_rate": 5e-5,
+                    "learning_rate": 5e-05,
                     "train_batch_size": 2,
-                    "metrics": ["eval_accuracy", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_accuracy",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--bf16",
+                        "--gradient_accumulation_steps 8",
+                        "--eval_strategy no",
+                        "--save_strategy no",
+                        "--warmup_steps 50",
+                        "--lr_scheduler_type constant",
+                        "--max_grad_norm 0.3",
+                        "--logging_steps 1",
+                        "--use_hpu_graphs_for_inference",
+                        "--lora_rank 8",
+                        "--lora_alpha 8",
+                        "--lora_dropout 0.1",
+                        "--lora_target_modules '.*(language_model).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$'",
+                        "--low_cpu_mem_usage True",
+                        "--adam_epsilon 1e-08",
+                        "--input_column_name image query",
+                        "--output_column_name answers",
+                        "--remove_unused_columns False",
+                        "--max_seq_length 512"
+                    ]
+                }
+            }
+        }
+    },
+    "gaudi3": {
+        "image2text_lora_finetune": {
+            "num_train_epochs": 1,
+            "eval_batch_size": 4,
+            "distribution": {
+                "multi_card": {
+                    "learning_rate": 5e-05,
+                    "train_batch_size": 2,
+                    "metrics": [
+                        "eval_accuracy",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--bf16",
                         "--gradient_accumulation_steps 8",
@@ -33,4 +75,4 @@
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/tests/configs/examples/protst_esm1b_for_sequential_classification.json b/tests/configs/examples/protst_esm1b_for_sequential_classification.json
index d80c1dd57d..808354d887 100644
--- a/tests/configs/examples/protst_esm1b_for_sequential_classification.json
+++ b/tests/configs/examples/protst_esm1b_for_sequential_classification.json
@@ -5,9 +5,39 @@
             "eval_batch_size": 4,
             "distribution": {
                 "multi_card": {
-                    "learning_rate": 5e-5,
+                    "learning_rate": 5e-05,
                     "train_batch_size": 32,
-                    "metrics": ["eval_accuracy", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_accuracy",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--save_strategy no",
+                        "--tokenizer_name facebook/esm1b_t33_650M_UR50S",
+                        "--use_hpu_graphs_for_inference",
+                        "--use_hpu_graphs_for_training",
+                        "--trust_remote_code",
+                        "--torch_dtype bfloat16",
+                        "--label_names labels"
+                    ]
+                }
+            }
+        }
+    },
+    "gaudi3": {
+        "prost-sequence-classification": {
+            "num_train_epochs": 1,
+            "eval_batch_size": 4,
+            "distribution": {
+                "multi_card": {
+                    "learning_rate": 5e-05,
+                    "train_batch_size": 32,
+                    "metrics": [
+                        "eval_accuracy",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--save_strategy no",
                         "--tokenizer_name facebook/esm1b_t33_650M_UR50S",
@@ -21,4 +51,4 @@
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/tests/configs/examples/roberta_base.json b/tests/configs/examples/roberta_base.json
index 8409805d8c..ac8477c654 100644
--- a/tests/configs/examples/roberta_base.json
+++ b/tests/configs/examples/roberta_base.json
@@ -5,18 +5,26 @@
             "eval_batch_size": 8,
             "distribution": {
                 "single_card": {
-                    "learning_rate": 3e-5,
+                    "learning_rate": 3e-05,
                     "train_batch_size": 12,
-                    "metrics": ["eval_f1", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--max_seq_length 384",
                         "--use_hpu_graphs_for_inference"
                     ]
                 },
                 "multi_card": {
-                    "learning_rate": 8e-5,
+                    "learning_rate": 8e-05,
                     "train_batch_size": 12,
-                    "metrics": ["eval_f1", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--max_seq_length 384",
                         "--use_hpu_graphs_for_inference"
@@ -29,9 +37,13 @@
             "eval_batch_size": 8,
             "distribution": {
                 "multi_card": {
-                    "learning_rate": 5e-5,
+                    "learning_rate": 5e-05,
                     "train_batch_size": 24,
-                    "metrics": ["perplexity", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--dataset_config_name wikitext-2-raw-v1",
                         "--use_hpu_graphs_for_inference",
@@ -47,18 +59,26 @@
             "eval_batch_size": 8,
             "distribution": {
                 "single_card": {
-                    "learning_rate": 7e-5,
+                    "learning_rate": 7e-05,
                     "train_batch_size": 64,
-                    "metrics": ["eval_f1", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--max_seq_length 384",
                         "--use_hpu_graphs_for_inference"
                     ]
                 },
                 "multi_card": {
-                    "learning_rate": 2e-4,
+                    "learning_rate": 0.0002,
                     "train_batch_size": 64,
-                    "metrics": ["eval_f1", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--max_seq_length 384",
                         "--use_hpu_graphs_for_inference"
@@ -71,9 +91,67 @@
             "eval_batch_size": 8,
             "distribution": {
                 "multi_card": {
-                    "learning_rate": 8e-5,
+                    "learning_rate": 8e-05,
                     "train_batch_size": 32,
-                    "metrics": ["perplexity", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--dataset_config_name wikitext-2-raw-v1",
+                        "--use_hpu_graphs_for_inference",
+                        "--ddp_find_unused_parameters True"
+                    ]
+                }
+            }
+        }
+    },
+    "gaudi3": {
+        "squad": {
+            "num_train_epochs": 1,
+            "eval_batch_size": 8,
+            "distribution": {
+                "single_card": {
+                    "learning_rate": 7e-05,
+                    "train_batch_size": 64,
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--max_seq_length 384",
+                        "--use_hpu_graphs_for_inference"
+                    ]
+                },
+                "multi_card": {
+                    "learning_rate": 0.0002,
+                    "train_batch_size": 64,
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--max_seq_length 384",
+                        "--use_hpu_graphs_for_inference"
+                    ]
+                }
+            }
+        },
+        "wikitext": {
+            "num_train_epochs": 2,
+            "eval_batch_size": 8,
+            "distribution": {
+                "multi_card": {
+                    "learning_rate": 8e-05,
+                    "train_batch_size": 32,
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--dataset_config_name wikitext-2-raw-v1",
                         "--use_hpu_graphs_for_inference",
@@ -83,4 +161,4 @@
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/tests/configs/examples/roberta_large.json b/tests/configs/examples/roberta_large.json
index 90b6dd5dce..72b7989a13 100755
--- a/tests/configs/examples/roberta_large.json
+++ b/tests/configs/examples/roberta_large.json
@@ -5,18 +5,26 @@
             "eval_batch_size": 8,
             "distribution": {
                 "single_card": {
-                    "learning_rate": 3e-5,
+                    "learning_rate": 3e-05,
                     "train_batch_size": 12,
-                    "metrics": ["eval_f1", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--max_seq_length 384",
                         "--use_hpu_graphs_for_inference"
                     ]
                 },
                 "multi_card": {
-                    "learning_rate": 8e-5,
+                    "learning_rate": 8e-05,
                     "train_batch_size": 12,
-                    "metrics": ["eval_f1", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--max_seq_length 384",
                         "--use_hpu_graphs_for_inference"
@@ -29,9 +37,13 @@
             "eval_batch_size": 8,
             "distribution": {
                 "multi_card": {
-                    "learning_rate": 5e-5,
+                    "learning_rate": 5e-05,
                     "train_batch_size": 8,
-                    "metrics": ["perplexity", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--dataset_config_name wikitext-2-raw-v1",
                         "--use_hpu_graphs_for_inference",
@@ -47,18 +59,26 @@
             "eval_batch_size": 8,
             "distribution": {
                 "single_card": {
-                    "learning_rate": 3e-5,
+                    "learning_rate": 3e-05,
                     "train_batch_size": 32,
-                    "metrics": ["eval_f1", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--max_seq_length 384",
                         "--use_hpu_graphs_for_inference"
                     ]
                 },
                 "multi_card": {
-                    "learning_rate": 7e-5,
+                    "learning_rate": 7e-05,
                     "train_batch_size": 32,
-                    "metrics": ["eval_f1", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--max_seq_length 384",
                         "--use_hpu_graphs_for_inference"
@@ -71,9 +91,67 @@
             "eval_batch_size": 8,
             "distribution": {
                 "multi_card": {
-                    "learning_rate": 7e-5,
+                    "learning_rate": 7e-05,
                     "train_batch_size": 16,
-                    "metrics": ["perplexity", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--dataset_config_name wikitext-2-raw-v1",
+                        "--use_hpu_graphs_for_inference",
+                        "--ddp_find_unused_parameters True"
+                    ]
+                }
+            }
+        }
+    },
+    "gaudi3": {
+        "squad": {
+            "num_train_epochs": 1,
+            "eval_batch_size": 8,
+            "distribution": {
+                "single_card": {
+                    "learning_rate": 3e-05,
+                    "train_batch_size": 32,
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--max_seq_length 384",
+                        "--use_hpu_graphs_for_inference"
+                    ]
+                },
+                "multi_card": {
+                    "learning_rate": 7e-05,
+                    "train_batch_size": 32,
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--max_seq_length 384",
+                        "--use_hpu_graphs_for_inference"
+                    ]
+                }
+            }
+        },
+        "wikitext": {
+            "num_train_epochs": 2,
+            "eval_batch_size": 8,
+            "distribution": {
+                "multi_card": {
+                    "learning_rate": 7e-05,
+                    "train_batch_size": 16,
+                    "metrics": [
+                        "perplexity",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--dataset_config_name wikitext-2-raw-v1",
                         "--use_hpu_graphs_for_inference",
@@ -83,4 +161,4 @@
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/tests/configs/examples/swin_base_patch4_window7_224_in22k.json b/tests/configs/examples/swin_base_patch4_window7_224_in22k.json
index 3f6a6c8693..e02bfe6610 100644
--- a/tests/configs/examples/swin_base_patch4_window7_224_in22k.json
+++ b/tests/configs/examples/swin_base_patch4_window7_224_in22k.json
@@ -5,9 +5,13 @@
             "eval_batch_size": 64,
             "distribution": {
                 "single_card": {
-                    "learning_rate": 3e-5,
+                    "learning_rate": 3e-05,
                     "train_batch_size": 64,
-                    "metrics": ["eval_accuracy", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_accuracy",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--remove_unused_columns False",
                         "--image_column_name img",
@@ -20,9 +24,13 @@
                     ]
                 },
                 "multi_card": {
-                    "learning_rate": 2e-4,
+                    "learning_rate": 0.0002,
                     "train_batch_size": 64,
-                    "metrics": ["eval_accuracy", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_accuracy",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--remove_unused_columns False",
                         "--image_column_name img",
@@ -43,9 +51,13 @@
             "eval_batch_size": 64,
             "distribution": {
                 "single_card": {
-                    "learning_rate": 6e-5,
+                    "learning_rate": 6e-05,
                     "train_batch_size": 160,
-                    "metrics": ["eval_accuracy", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_accuracy",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--remove_unused_columns False",
                         "--image_column_name img",
@@ -58,9 +70,59 @@
                     ]
                 },
                 "multi_card": {
-                    "learning_rate": 2e-4,
+                    "learning_rate": 0.0002,
                     "train_batch_size": 160,
-                    "metrics": ["eval_accuracy", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_accuracy",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--remove_unused_columns False",
+                        "--image_column_name img",
+                        "--seed 1337",
+                        "--use_hpu_graphs_for_inference",
+                        "--ignore_mismatched_sizes",
+                        "--dataloader_num_workers 1",
+                        "--pipelining_fwd_bwd True",
+                        "--non_blocking_data_copy True"
+                    ]
+                }
+            }
+        }
+    },
+    "gaudi3": {
+        "cifar10": {
+            "num_train_epochs": 1,
+            "eval_batch_size": 64,
+            "distribution": {
+                "single_card": {
+                    "learning_rate": 6e-05,
+                    "train_batch_size": 160,
+                    "metrics": [
+                        "eval_accuracy",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--remove_unused_columns False",
+                        "--image_column_name img",
+                        "--seed 1337",
+                        "--use_hpu_graphs_for_inference",
+                        "--ignore_mismatched_sizes",
+                        "--dataloader_num_workers 1",
+                        "--pipelining_fwd_bwd True",
+                        "--non_blocking_data_copy True"
+                    ]
+                },
+                "multi_card": {
+                    "learning_rate": 0.0002,
+                    "train_batch_size": 160,
+                    "metrics": [
+                        "eval_accuracy",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--remove_unused_columns False",
                         "--image_column_name img",
@@ -75,4 +137,4 @@
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/tests/configs/examples/t5_small.json b/tests/configs/examples/t5_small.json
index 38b1b4f11f..d42f257737 100644
--- a/tests/configs/examples/t5_small.json
+++ b/tests/configs/examples/t5_small.json
@@ -5,9 +5,14 @@
             "eval_batch_size": 4,
             "distribution": {
                 "multi_card": {
-                    "learning_rate": 5e-5,
+                    "learning_rate": 5e-05,
                     "train_batch_size": 4,
-                    "metrics": ["eval_rougeLsum", "eval_samples_per_second", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_rougeLsum",
+                        "eval_samples_per_second",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--dataset_config \"3.0.0\"",
                         "--source_prefix \"summarize: \"",
@@ -25,9 +30,13 @@
             "eval_batch_size": 33,
             "distribution": {
                 "multi_card": {
-                    "learning_rate": 2e-4,
+                    "learning_rate": 0.0002,
                     "train_batch_size": 16,
-                    "metrics": ["eval_f1", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--context_column context",
                         "--question_column question",
@@ -50,9 +59,14 @@
             "eval_batch_size": 4,
             "distribution": {
                 "multi_card": {
-                    "learning_rate": 2e-4,
+                    "learning_rate": 0.0002,
                     "train_batch_size": 32,
-                    "metrics": ["eval_rougeLsum", "eval_samples_per_second", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_rougeLsum",
+                        "eval_samples_per_second",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--dataset_config \"3.0.0\"",
                         "--source_prefix \"summarize: \"",
@@ -70,9 +84,13 @@
             "eval_batch_size": 33,
             "distribution": {
                 "multi_card": {
-                    "learning_rate": 2e-3,
+                    "learning_rate": 0.002,
                     "train_batch_size": 64,
-                    "metrics": ["eval_f1", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--context_column context",
                         "--question_column question",
@@ -93,9 +111,13 @@
             "eval_batch_size": 33,
             "distribution": {
                 "multi_card": {
-                    "learning_rate": 2e-3,
+                    "learning_rate": 0.002,
                     "train_batch_size": 64,
-                    "metrics": ["eval_f1", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--use_hpu_graphs_for_inference",
                         "--use_hpu_graphs_for_training",
@@ -112,9 +134,115 @@
             "eval_batch_size": 4,
             "distribution": {
                 "multi_card": {
-                    "learning_rate": 2e-3,
+                    "learning_rate": 0.002,
                     "train_batch_size": 8,
-                    "metrics": ["eval_accuracy", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_accuracy",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--use_hpu_graphs_for_inference",
+                        "--use_hpu_graphs_for_training",
+                        "--max_source_length 256",
+                        "--max_target_length 2",
+                        "--max_train_samples 1000",
+                        "--max_eval_samples 100",
+                        "--bf16",
+                        "--trust_remote_code True"
+                    ]
+                }
+            }
+        }
+    },
+    "gaudi3": {
+        "cnn_dailymail": {
+            "num_train_epochs": 1,
+            "eval_batch_size": 4,
+            "distribution": {
+                "multi_card": {
+                    "learning_rate": 0.0002,
+                    "train_batch_size": 32,
+                    "metrics": [
+                        "eval_rougeLsum",
+                        "eval_samples_per_second",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--dataset_config \"3.0.0\"",
+                        "--source_prefix \"summarize: \"",
+                        "--predict_with_generate",
+                        "--ignore_pad_token_for_loss False",
+                        "--pad_to_max_length",
+                        "--use_hpu_graphs_for_inference",
+                        "--save_strategy epoch"
+                    ]
+                }
+            }
+        },
+        "squad_v2": {
+            "num_train_epochs": 2,
+            "eval_batch_size": 33,
+            "distribution": {
+                "multi_card": {
+                    "learning_rate": 0.002,
+                    "train_batch_size": 64,
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--context_column context",
+                        "--question_column question",
+                        "--answer_column answers",
+                        "--version_2_with_negative",
+                        "--max_seq_length 384",
+                        "--predict_with_generate",
+                        "--ignore_pad_token_for_loss False",
+                        "--pad_to_max_length",
+                        "--use_hpu_graphs_for_inference",
+                        "--save_strategy epoch"
+                    ]
+                }
+            }
+        },
+        "multitask-prompt-tuning": {
+            "num_train_epochs": 1,
+            "eval_batch_size": 33,
+            "distribution": {
+                "multi_card": {
+                    "learning_rate": 0.002,
+                    "train_batch_size": 64,
+                    "metrics": [
+                        "eval_f1",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--use_hpu_graphs_for_inference",
+                        "--use_hpu_graphs_for_training",
+                        "--max_source_length 256",
+                        "--max_target_length 16",
+                        "--bf16",
+                        "--trust_remote_code True"
+                    ]
+                }
+            }
+        },
+        "poly-tuning": {
+            "num_train_epochs": 1,
+            "eval_batch_size": 4,
+            "distribution": {
+                "multi_card": {
+                    "learning_rate": 0.002,
+                    "train_batch_size": 8,
+                    "metrics": [
+                        "eval_accuracy",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--use_hpu_graphs_for_inference",
                         "--use_hpu_graphs_for_training",
@@ -129,4 +257,4 @@
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/tests/configs/examples/vit_base_patch16_224_in21k.json b/tests/configs/examples/vit_base_patch16_224_in21k.json
index 1071455031..bf9b6b297f 100644
--- a/tests/configs/examples/vit_base_patch16_224_in21k.json
+++ b/tests/configs/examples/vit_base_patch16_224_in21k.json
@@ -5,9 +5,13 @@
             "eval_batch_size": 64,
             "distribution": {
                 "single_card": {
-                    "learning_rate": 5e-5,
+                    "learning_rate": 5e-05,
                     "train_batch_size": 64,
-                    "metrics": ["eval_accuracy", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_accuracy",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--remove_unused_columns False",
                         "--image_column_name img",
@@ -19,9 +23,13 @@
                     ]
                 },
                 "multi_card": {
-                    "learning_rate": 2e-4,
+                    "learning_rate": 0.0002,
                     "train_batch_size": 64,
-                    "metrics": ["eval_accuracy", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_accuracy",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--remove_unused_columns False",
                         "--image_column_name img",
@@ -42,9 +50,13 @@
             "eval_batch_size": 64,
             "distribution": {
                 "single_card": {
-                    "learning_rate": 3e-5,
+                    "learning_rate": 3e-05,
                     "train_batch_size": 128,
-                    "metrics": ["eval_accuracy", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_accuracy",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--remove_unused_columns False",
                         "--image_column_name img",
@@ -56,9 +68,58 @@
                     ]
                 },
                 "multi_card": {
-                    "learning_rate": 2e-4,
+                    "learning_rate": 0.0002,
                     "train_batch_size": 128,
-                    "metrics": ["eval_accuracy", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_accuracy",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--remove_unused_columns False",
+                        "--image_column_name img",
+                        "--seed 1337",
+                        "--use_hpu_graphs_for_inference",
+                        "--dataloader_num_workers 1",
+                        "--pipelining_fwd_bwd True",
+                        "--non_blocking_data_copy True",
+                        "--throughput_warmup_steps 8"
+                    ]
+                }
+            }
+        }
+    },
+    "gaudi3": {
+        "cifar10": {
+            "num_train_epochs": 1,
+            "eval_batch_size": 64,
+            "distribution": {
+                "single_card": {
+                    "learning_rate": 3e-05,
+                    "train_batch_size": 128,
+                    "metrics": [
+                        "eval_accuracy",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--remove_unused_columns False",
+                        "--image_column_name img",
+                        "--seed 1337",
+                        "--use_hpu_graphs_for_inference",
+                        "--dataloader_num_workers 1",
+                        "--pipelining_fwd_bwd True",
+                        "--non_blocking_data_copy True"
+                    ]
+                },
+                "multi_card": {
+                    "learning_rate": 0.0002,
+                    "train_batch_size": 128,
+                    "metrics": [
+                        "eval_accuracy",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--remove_unused_columns False",
                         "--image_column_name img",
@@ -73,4 +134,4 @@
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/tests/configs/examples/wav2vec2_base.json b/tests/configs/examples/wav2vec2_base.json
index 3b8d8e2b70..b56f9ab86a 100644
--- a/tests/configs/examples/wav2vec2_base.json
+++ b/tests/configs/examples/wav2vec2_base.json
@@ -5,9 +5,14 @@
             "eval_batch_size": 64,
             "distribution": {
                 "multi_card": {
-                    "learning_rate": 5e-4,
+                    "learning_rate": 0.0005,
                     "train_batch_size": 32,
-                    "metrics": ["eval_accuracy", "eval_samples_per_second", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_accuracy",
+                        "eval_samples_per_second",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--audio_column_name audio",
                         "--label_column_name language",
@@ -31,9 +36,45 @@
             "eval_batch_size": 64,
             "distribution": {
                 "multi_card": {
-                    "learning_rate": 3e-4,
+                    "learning_rate": 0.0003,
                     "train_batch_size": 32,
-                    "metrics": ["eval_accuracy", "eval_samples_per_second", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_accuracy",
+                        "eval_samples_per_second",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--audio_column_name audio",
+                        "--label_column_name language",
+                        "--remove_unused_columns False",
+                        "--max_length_seconds 8",
+                        "--attention_mask False",
+                        "--warmup_ratio 0.1",
+                        "--seed 0",
+                        "--dataloader_num_workers 1",
+                        "--use_hpu_graphs_for_training",
+                        "--use_hpu_graphs_for_inference",
+                        "--trust_remote_code True"
+                    ]
+                }
+            }
+        }
+    },
+    "gaudi3": {
+        "common_language": {
+            "num_train_epochs": 5,
+            "eval_batch_size": 64,
+            "distribution": {
+                "multi_card": {
+                    "learning_rate": 0.0003,
+                    "train_batch_size": 32,
+                    "metrics": [
+                        "eval_accuracy",
+                        "eval_samples_per_second",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--audio_column_name audio",
                         "--label_column_name language",
@@ -51,4 +92,4 @@
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/tests/configs/examples/wav2vec2_large_lv60.json b/tests/configs/examples/wav2vec2_large_lv60.json
index 3ac83a4638..648d299a4b 100644
--- a/tests/configs/examples/wav2vec2_large_lv60.json
+++ b/tests/configs/examples/wav2vec2_large_lv60.json
@@ -5,9 +5,14 @@
             "eval_batch_size": 8,
             "distribution": {
                 "multi_card": {
-                    "learning_rate": 6e-4,
+                    "learning_rate": 0.0006,
                     "train_batch_size": 8,
-                    "metrics": ["eval_wer", "eval_samples_per_second", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_wer",
+                        "eval_samples_per_second",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--dataset_config_name clean",
                         "--train_split_name train.100",
@@ -18,7 +23,7 @@
                         "--layerdrop 0.0",
                         "--freeze_feature_encoder",
                         "--dataloader_num_workers 8",
-                        "--chars_to_ignore ',?.!-;:\"“%‘”'",
+                        "--chars_to_ignore ',?.!-;:\"\u201c%\u2018\u201d'",
                         "--trust_remote_code True"
                     ]
                 }
@@ -31,9 +36,14 @@
             "eval_batch_size": 8,
             "distribution": {
                 "multi_card": {
-                    "learning_rate": 4e-4,
+                    "learning_rate": 0.0004,
                     "train_batch_size": 8,
-                    "metrics": ["eval_wer", "eval_samples_per_second", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_wer",
+                        "eval_samples_per_second",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--dataset_config_name clean",
                         "--train_split_name train.100",
@@ -44,7 +54,40 @@
                         "--layerdrop 0.0",
                         "--freeze_feature_encoder",
                         "--dataloader_num_workers 8",
-                        "--chars_to_ignore ',?.!-;:\"“%‘”'",
+                        "--chars_to_ignore ',?.!-;:\"\u201c%\u2018\u201d'",
+                        "--use_hpu_graphs_for_training",
+                        "--use_hpu_graphs_for_inference",
+                        "--trust_remote_code True"
+                    ]
+                }
+            }
+        }
+    },
+    "gaudi3": {
+        "regisss/librispeech_asr_for_optimum_habana_ci": {
+            "num_train_epochs": 2,
+            "eval_batch_size": 8,
+            "distribution": {
+                "multi_card": {
+                    "learning_rate": 0.0004,
+                    "train_batch_size": 8,
+                    "metrics": [
+                        "eval_wer",
+                        "eval_samples_per_second",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--dataset_config_name clean",
+                        "--train_split_name train.100",
+                        "--eval_split_name validation",
+                        "--preprocessing_num_workers 1",
+                        "--warmup_steps 500",
+                        "--text_column_name text",
+                        "--layerdrop 0.0",
+                        "--freeze_feature_encoder",
+                        "--dataloader_num_workers 8",
+                        "--chars_to_ignore ',?.!-;:\"\u201c%\u2018\u201d'",
                         "--use_hpu_graphs_for_training",
                         "--use_hpu_graphs_for_inference",
                         "--trust_remote_code True"
@@ -53,4 +96,4 @@
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/tests/configs/examples/whisper_small.json b/tests/configs/examples/whisper_small.json
index b971a404da..4893b261ab 100644
--- a/tests/configs/examples/whisper_small.json
+++ b/tests/configs/examples/whisper_small.json
@@ -5,9 +5,14 @@
             "eval_batch_size": 2,
             "distribution": {
                 "multi_card": {
-                    "learning_rate": 1e-4,
+                    "learning_rate": 0.0001,
                     "train_batch_size": 8,
-                    "metrics": ["eval_wer", "eval_samples_per_second", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_wer",
+                        "eval_samples_per_second",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--dataset_config_name hi",
                         "--language hindi",
@@ -36,9 +41,49 @@
             "eval_batch_size": 8,
             "distribution": {
                 "multi_card": {
-                    "learning_rate": 8e-5,
+                    "learning_rate": 8e-05,
                     "train_batch_size": 32,
-                    "metrics": ["eval_wer", "eval_samples_per_second", "train_runtime", "train_samples_per_second"],
+                    "metrics": [
+                        "eval_wer",
+                        "eval_samples_per_second",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
+                    "extra_arguments": [
+                        "--dataset_config_name hi",
+                        "--language hindi",
+                        "--task transcribe",
+                        "--train_split_name train+validation",
+                        "--eval_split_name test",
+                        "--preprocessing_num_workers 1",
+                        "--generation_max_length 225",
+                        "--max_duration_in_seconds 30",
+                        "--text_column_name sentence",
+                        "--freeze_feature_encoder False",
+                        "--dataloader_num_workers 8",
+                        "--predict_with_generate",
+                        "--use_hpu_graphs_for_inference",
+                        "--label_features_max_length 128",
+                        "--trust_remote_code True"
+                    ]
+                }
+            }
+        }
+    },
+    "gaudi3": {
+        "mozilla-foundation/common_voice_11_0": {
+            "num_train_epochs": 10,
+            "eval_batch_size": 8,
+            "distribution": {
+                "multi_card": {
+                    "learning_rate": 8e-05,
+                    "train_batch_size": 32,
+                    "metrics": [
+                        "eval_wer",
+                        "eval_samples_per_second",
+                        "train_runtime",
+                        "train_samples_per_second"
+                    ],
                     "extra_arguments": [
                         "--dataset_config_name hi",
                         "--language hindi",
@@ -60,4 +105,4 @@
             }
         }
     }
-}
+}
\ No newline at end of file