diff --git a/tests/test_custom_file_input.py b/tests/test_custom_file_input.py index 14dc84058e..050231b90e 100644 --- a/tests/test_custom_file_input.py +++ b/tests/test_custom_file_input.py @@ -13,7 +13,9 @@ PATH_TO_RESOURCES = Path(__file__).resolve().parent.parent / "tests/resource" -if OH_DEVICE_CONTEXT in ["gaudi2"]: + +if OH_DEVICE_CONTEXT not in ["gaudi1"]: + # gaudi2+ MODEL_FILE_OPTIONS_TO_TEST = { "bf16": [ ( diff --git a/tests/test_examples.py b/tests/test_examples.py index 71616ecf4f..eb4a745cfe 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -64,7 +64,6 @@ TIME_PERF_FACTOR = 1.05 -IS_GAUDI2 = bool("gaudi2" == OH_DEVICE_CONTEXT) IS_GAUDI1 = bool("gaudi1" == OH_DEVICE_CONTEXT) @@ -262,12 +261,12 @@ def to_test( models_measured_on_eager_mode = ["google/gemma-2b-it"] - if (fsdp or fp8) and not IS_GAUDI2: + if (fsdp or fp8) and IS_GAUDI1: return False elif ( any(case in example_name for case in case_only_in_gaudi2) or task_name in ("llama-adapter", "vera", "ia3", "adalora", "ln_tuning", "mamamiya405/finred") - ) and not IS_GAUDI2: + ) and IS_GAUDI1: return False elif "Qwen2-72B" in model_name and task_name != "trl-sft-qwen": return False @@ -294,23 +293,23 @@ def to_test( return False elif eager_mode and model_name not in models_measured_on_eager_mode: return False - elif "gemma" in model_name and not IS_GAUDI2: + elif "gemma" in model_name and IS_GAUDI1: return False elif model_name not in models_with_specific_rules and not deepspeed: return True elif model_name == "gpt2-xl" and deepspeed: # GPT2-XL is tested only with DeepSpeed return True - elif "gpt-neox" in model_name and IS_GAUDI2 and deepspeed: - # GPT-NeoX is tested only on Gaudi2 and with DeepSpeed + elif "gpt-neox" in model_name and not IS_GAUDI1 and deepspeed: + # GPT-NeoX is tested only on Gaudi2+ and with DeepSpeed return True - elif "flan-t5" in model_name and IS_GAUDI2 and deepspeed: - # Flan-T5 is tested only on Gaudi2 and with DeepSpeed + elif "flan-t5" in model_name and not IS_GAUDI1 and deepspeed: + # Flan-T5 is tested only on Gaudi2+ and with DeepSpeed return True - elif "CodeLlama" in model_name and IS_GAUDI2 and deepspeed: - # CodeLlama is tested only on Gaudi2 and with DeepSpeed + elif "CodeLlama" in model_name and not IS_GAUDI1 and deepspeed: + # CodeLlama is tested only on Gaudi2+ and with DeepSpeed return True - elif "Qwen2-72B" in model_name and IS_GAUDI2 and deepspeed: + elif "Qwen2-72B" in model_name and not IS_GAUDI1 and deepspeed: return True elif model_name == "albert-xxlarge-v1": if (("RUN_ALBERT_XXL_1X" in os.environ) and strtobool(os.environ["RUN_ALBERT_XXL_1X"])) or multi_card: @@ -320,21 +319,21 @@ def to_test( return True elif "wav2vec2-large" in model_name and example_name == "run_speech_recognition_ctc": return True - elif "bridgetower" in model_name and IS_GAUDI2: + elif "bridgetower" in model_name and not IS_GAUDI1: return True - elif "falcon" in model_name and IS_GAUDI2 and not fsdp and not fp8: + elif "falcon" in model_name and not IS_GAUDI1 and not fsdp and not fp8: return True - elif "bloom" in model_name and deepspeed and not IS_GAUDI2: + elif "bloom" in model_name and deepspeed and IS_GAUDI1: return True - elif "LlamaGuard" in model_name and deepspeed and IS_GAUDI2: + elif "LlamaGuard" in model_name and deepspeed and not IS_GAUDI1: return True - elif "ast-finetuned-speech-commands-v2" in model_name and IS_GAUDI2: + elif "ast-finetuned-speech-commands-v2" in model_name and not IS_GAUDI1: return True - elif "huggyllama" in model_name and IS_GAUDI2 and deepspeed: + elif "huggyllama" in model_name and not IS_GAUDI1 and deepspeed: return True - elif "gemma" in model_name and IS_GAUDI2: + elif "gemma" in model_name and not IS_GAUDI1: return True - elif "chatglm3" in model_name and IS_GAUDI2 and deepspeed: + elif "chatglm3" in model_name and not IS_GAUDI1 and deepspeed: return True return False @@ -444,7 +443,7 @@ def test(self): # Assess accuracy with open(Path(tmp_dir) / "accuracy_metrics.json") as fp: results = json.load(fp) - baseline = 0.43 if IS_GAUDI2 else 0.42 + baseline = 0.42 if not IS_GAUDI1 else 0.43 self.assertGreaterEqual(results["accuracy"], baseline) return elif self.EXAMPLE_NAME == "run_clip": diff --git a/tests/test_fp8_examples.py b/tests/test_fp8_examples.py index 94ccc360f8..09f8764e13 100644 --- a/tests/test_fp8_examples.py +++ b/tests/test_fp8_examples.py @@ -10,24 +10,19 @@ from .utils import OH_DEVICE_CONTEXT -if OH_DEVICE_CONTEXT in ["gaudi2"]: - # Gaudi2 CI baselines - MODELS_TO_TEST = { - "fp8": [ - ( - "mistralai/Mistral-7B-Instruct-v0.2", - "tatsu-lab/alpaca", - "", - "language-modeling", - 8, - 8, - "run_lora_clm.py", - ), - ], - } -else: - # FP8 is not supported on Gaudi1 - MODELS_TO_TEST = {"fp8": []} +MODELS_TO_TEST = { + "fp8": [ + ( + "mistralai/Mistral-7B-Instruct-v0.2", + "tatsu-lab/alpaca", + "", + "language-modeling", + 8, + 8, + "run_lora_clm.py", + ), + ], +} def _test_fp8_train( @@ -122,6 +117,7 @@ def _test_fp8_train( ) +@pytest.mark.skipif("gaudi1" == OH_DEVICE_CONTEXT, reason="FP8 is not supported on Gaudi1") @pytest.mark.parametrize( "model_name, dataset_name, gaudi_config, task, bs_train, bs_eval, script", MODELS_TO_TEST["fp8"], diff --git a/tests/test_fsdp_examples.py b/tests/test_fsdp_examples.py index 69ca704f96..06b61c2a92 100644 --- a/tests/test_fsdp_examples.py +++ b/tests/test_fsdp_examples.py @@ -11,33 +11,28 @@ from .utils import OH_DEVICE_CONTEXT -if OH_DEVICE_CONTEXT in ["gaudi2"]: - # Gaudi2 CI baselines - MODELS_TO_TEST = { - "bf16": [ - ( - "bert-base-uncased", - "Habana/bert-base-uncased", - "question-answering", - 24, - 8, - "run_qa.py", - "full_shard", - ), - ( - "meta-llama/Llama-2-7b-hf", - "", - "language-modeling", - 8, - 8, - "run_lora_clm.py", - "auto_wrap", - ), - ], - } -else: - # FSDP is not supported on Gaudi1 - MODELS_TO_TEST = {"bf16": []} +MODELS_TO_TEST = { + "bf16": [ + ( + "bert-base-uncased", + "Habana/bert-base-uncased", + "question-answering", + 24, + 8, + "run_qa.py", + "full_shard", + ), + ( + "meta-llama/Llama-2-7b-hf", + "", + "language-modeling", + 8, + 8, + "run_lora_clm.py", + "auto_wrap", + ), + ], +} def _test_fsdp( @@ -166,6 +161,7 @@ def _test_fsdp( ) +@pytest.mark.skipif("gaudi1" == OH_DEVICE_CONTEXT, reason="FSDP is not supported on Gaudi1") @pytest.mark.parametrize("model_name, gaudi_config, task, bs_train, bs_eval, script, policy", MODELS_TO_TEST["bf16"]) def test_fsdp_bf16( model_name: str, diff --git a/tests/test_image_to_text_example.py b/tests/test_image_to_text_example.py index 192ba75dab..9663de80a8 100644 --- a/tests/test_image_to_text_example.py +++ b/tests/test_image_to_text_example.py @@ -11,8 +11,8 @@ from .utils import OH_DEVICE_CONTEXT -if OH_DEVICE_CONTEXT in ["gaudi2"]: - # Gaudi2 CI baselines +if OH_DEVICE_CONTEXT not in ["gaudi1"]: + # Gaudi2+ MODELS_TO_TEST = { "bf16": [ # ("llava-hf/llava-1.5-7b-hf", 1), @@ -36,7 +36,7 @@ ], } else: - # Gaudi1 CI baselines + # Gaudi1 MODELS_TO_TEST = { "bf16": [ ("llava-hf/llava-1.5-7b-hf", 1), diff --git a/tests/test_text_generation_example.py b/tests/test_text_generation_example.py index 1725c587ba..0d71357568 100644 --- a/tests/test_text_generation_example.py +++ b/tests/test_text_generation_example.py @@ -19,7 +19,8 @@ prev_quant_model_name = None prev_quant_rank = 0 -if OH_DEVICE_CONTEXT in ["gaudi2"]: +if OH_DEVICE_CONTEXT not in ["gaudi1"]: + # Gaudi2+ MODELS_TO_TEST = { "bf16_1x": [ ("bigscience/bloomz-7b1", 1, False, False), @@ -114,7 +115,7 @@ ], } else: - # Gaudi1 CI + # Gaudi1 MODELS_TO_TEST = { "bf16_1x": [ ("bigscience/bloomz-7b1", 1, False, False),