Gaudi1 is the exception, not Gaudi2

We want to run the same tests for Gaudi2 on Gaudi3. Hence, Gaudi1 is the exception case... so reverse test definition conditional logic as needed. Signed-off-by: U. Artie Eoff <[email protected]>
uartie · Feb 24, 2025 · f363ddc · f363ddc
1 parent bae2585
commit f363ddc
Show file tree

Hide file tree

Showing 6 changed files with 65 additions and 71 deletions.
diff --git a/tests/test_custom_file_input.py b/tests/test_custom_file_input.py
@@ -13,7 +13,9 @@
 
 PATH_TO_RESOURCES = Path(__file__).resolve().parent.parent / "tests/resource"
 
-if OH_DEVICE_CONTEXT in ["gaudi2"]:
+
+if OH_DEVICE_CONTEXT not in ["gaudi1"]:
+    # gaudi2+
     MODEL_FILE_OPTIONS_TO_TEST = {
         "bf16": [
             (

diff --git a/tests/test_examples.py b/tests/test_examples.py
@@ -64,7 +64,6 @@
 TIME_PERF_FACTOR = 1.05
 
 
-IS_GAUDI2 = bool("gaudi2" == OH_DEVICE_CONTEXT)
 IS_GAUDI1 = bool("gaudi1" == OH_DEVICE_CONTEXT)
 
 
@@ -262,12 +261,12 @@ def to_test(
 
         models_measured_on_eager_mode = ["google/gemma-2b-it"]
 
-        if (fsdp or fp8) and not IS_GAUDI2:
+        if (fsdp or fp8) and IS_GAUDI1:
             return False
         elif (
             any(case in example_name for case in case_only_in_gaudi2)
             or task_name in ("llama-adapter", "vera", "ia3", "adalora", "ln_tuning", "mamamiya405/finred")
-        ) and not IS_GAUDI2:
+        ) and IS_GAUDI1:
             return False
         elif "Qwen2-72B" in model_name and task_name != "trl-sft-qwen":
             return False
@@ -294,23 +293,23 @@ def to_test(
             return False
         elif eager_mode and model_name not in models_measured_on_eager_mode:
             return False
-        elif "gemma" in model_name and not IS_GAUDI2:
+        elif "gemma" in model_name and IS_GAUDI1:
             return False
         elif model_name not in models_with_specific_rules and not deepspeed:
             return True
         elif model_name == "gpt2-xl" and deepspeed:
             # GPT2-XL is tested only with DeepSpeed
             return True
-        elif "gpt-neox" in model_name and IS_GAUDI2 and deepspeed:
-            # GPT-NeoX is tested only on Gaudi2 and with DeepSpeed
+        elif "gpt-neox" in model_name and not IS_GAUDI1 and deepspeed:
+            # GPT-NeoX is tested only on Gaudi2+ and with DeepSpeed
             return True
-        elif "flan-t5" in model_name and IS_GAUDI2 and deepspeed:
-            # Flan-T5 is tested only on Gaudi2 and with DeepSpeed
+        elif "flan-t5" in model_name and not IS_GAUDI1 and deepspeed:
+            # Flan-T5 is tested only on Gaudi2+ and with DeepSpeed
             return True
-        elif "CodeLlama" in model_name and IS_GAUDI2 and deepspeed:
-            # CodeLlama is tested only on Gaudi2 and with DeepSpeed
+        elif "CodeLlama" in model_name and not IS_GAUDI1 and deepspeed:
+            # CodeLlama is tested only on Gaudi2+ and with DeepSpeed
             return True
-        elif "Qwen2-72B" in model_name and IS_GAUDI2 and deepspeed:
+        elif "Qwen2-72B" in model_name and not IS_GAUDI1 and deepspeed:
             return True
         elif model_name == "albert-xxlarge-v1":
             if (("RUN_ALBERT_XXL_1X" in os.environ) and strtobool(os.environ["RUN_ALBERT_XXL_1X"])) or multi_card:
@@ -320,21 +319,21 @@ def to_test(
             return True
         elif "wav2vec2-large" in model_name and example_name == "run_speech_recognition_ctc":
             return True
-        elif "bridgetower" in model_name and IS_GAUDI2:
+        elif "bridgetower" in model_name and not IS_GAUDI1:
             return True
-        elif "falcon" in model_name and IS_GAUDI2 and not fsdp and not fp8:
+        elif "falcon" in model_name and not IS_GAUDI1 and not fsdp and not fp8:
             return True
-        elif "bloom" in model_name and deepspeed and not IS_GAUDI2:
+        elif "bloom" in model_name and deepspeed and IS_GAUDI1:
             return True
-        elif "LlamaGuard" in model_name and deepspeed and IS_GAUDI2:
+        elif "LlamaGuard" in model_name and deepspeed and not IS_GAUDI1:
             return True
-        elif "ast-finetuned-speech-commands-v2" in model_name and IS_GAUDI2:
+        elif "ast-finetuned-speech-commands-v2" in model_name and not IS_GAUDI1:
             return True
-        elif "huggyllama" in model_name and IS_GAUDI2 and deepspeed:
+        elif "huggyllama" in model_name and not IS_GAUDI1 and deepspeed:
             return True
-        elif "gemma" in model_name and IS_GAUDI2:
+        elif "gemma" in model_name and not IS_GAUDI1:
             return True
-        elif "chatglm3" in model_name and IS_GAUDI2 and deepspeed:
+        elif "chatglm3" in model_name and not IS_GAUDI1 and deepspeed:
             return True
 
         return False
@@ -444,7 +443,7 @@ def test(self):
                     # Assess accuracy
                     with open(Path(tmp_dir) / "accuracy_metrics.json") as fp:
                         results = json.load(fp)
-                        baseline = 0.43 if IS_GAUDI2 else 0.42
+                        baseline = 0.42 if not IS_GAUDI1 else 0.43
                         self.assertGreaterEqual(results["accuracy"], baseline)
                 return
             elif self.EXAMPLE_NAME == "run_clip":

diff --git a/tests/test_fp8_examples.py b/tests/test_fp8_examples.py
@@ -10,24 +10,19 @@
 from .utils import OH_DEVICE_CONTEXT
 
 
-if OH_DEVICE_CONTEXT in ["gaudi2"]:
-    # Gaudi2 CI baselines
-    MODELS_TO_TEST = {
-        "fp8": [
-            (
-                "mistralai/Mistral-7B-Instruct-v0.2",
-                "tatsu-lab/alpaca",
-                "",
-                "language-modeling",
-                8,
-                8,
-                "run_lora_clm.py",
-            ),
-        ],
-    }
-else:
-    # FP8 is not supported on Gaudi1
-    MODELS_TO_TEST = {"fp8": []}
+MODELS_TO_TEST = {
+    "fp8": [
+        (
+            "mistralai/Mistral-7B-Instruct-v0.2",
+            "tatsu-lab/alpaca",
+            "",
+            "language-modeling",
+            8,
+            8,
+            "run_lora_clm.py",
+        ),
+    ],
+}
 
 
 def _test_fp8_train(
@@ -122,6 +117,7 @@ def _test_fp8_train(
         )
 
 
+@pytest.mark.skipif("gaudi1" == OH_DEVICE_CONTEXT, reason="FP8 is not supported on Gaudi1")
 @pytest.mark.parametrize(
     "model_name, dataset_name, gaudi_config, task, bs_train, bs_eval, script",
     MODELS_TO_TEST["fp8"],

diff --git a/tests/test_fsdp_examples.py b/tests/test_fsdp_examples.py
@@ -11,33 +11,28 @@
 from .utils import OH_DEVICE_CONTEXT
 
 
-if OH_DEVICE_CONTEXT in ["gaudi2"]:
-    # Gaudi2 CI baselines
-    MODELS_TO_TEST = {
-        "bf16": [
-            (
-                "bert-base-uncased",
-                "Habana/bert-base-uncased",
-                "question-answering",
-                24,
-                8,
-                "run_qa.py",
-                "full_shard",
-            ),
-            (
-                "meta-llama/Llama-2-7b-hf",
-                "",
-                "language-modeling",
-                8,
-                8,
-                "run_lora_clm.py",
-                "auto_wrap",
-            ),
-        ],
-    }
-else:
-    # FSDP is not supported on Gaudi1
-    MODELS_TO_TEST = {"bf16": []}
+MODELS_TO_TEST = {
+    "bf16": [
+        (
+            "bert-base-uncased",
+            "Habana/bert-base-uncased",
+            "question-answering",
+            24,
+            8,
+            "run_qa.py",
+            "full_shard",
+        ),
+        (
+            "meta-llama/Llama-2-7b-hf",
+            "",
+            "language-modeling",
+            8,
+            8,
+            "run_lora_clm.py",
+            "auto_wrap",
+        ),
+    ],
+}
 
 
 def _test_fsdp(
@@ -166,6 +161,7 @@ def _test_fsdp(
             )
 
 
+@pytest.mark.skipif("gaudi1" == OH_DEVICE_CONTEXT, reason="FSDP is not supported on Gaudi1")
 @pytest.mark.parametrize("model_name, gaudi_config, task, bs_train, bs_eval, script, policy", MODELS_TO_TEST["bf16"])
 def test_fsdp_bf16(
     model_name: str,

diff --git a/tests/test_image_to_text_example.py b/tests/test_image_to_text_example.py
@@ -11,8 +11,8 @@
 from .utils import OH_DEVICE_CONTEXT
 
 
-if OH_DEVICE_CONTEXT in ["gaudi2"]:
-    # Gaudi2 CI baselines
+if OH_DEVICE_CONTEXT not in ["gaudi1"]:
+    # Gaudi2+
     MODELS_TO_TEST = {
         "bf16": [
             # ("llava-hf/llava-1.5-7b-hf", 1),
@@ -36,7 +36,7 @@
         ],
     }
 else:
-    # Gaudi1 CI baselines
+    # Gaudi1
     MODELS_TO_TEST = {
         "bf16": [
             ("llava-hf/llava-1.5-7b-hf", 1),

diff --git a/tests/test_text_generation_example.py b/tests/test_text_generation_example.py
@@ -19,7 +19,8 @@
 prev_quant_model_name = None
 prev_quant_rank = 0
 
-if OH_DEVICE_CONTEXT in ["gaudi2"]:
+if OH_DEVICE_CONTEXT not in ["gaudi1"]:
+    # Gaudi2+
     MODELS_TO_TEST = {
         "bf16_1x": [
             ("bigscience/bloomz-7b1", 1, False, False),
@@ -114,7 +115,7 @@
         ],
     }
 else:
-    # Gaudi1 CI
+    # Gaudi1
     MODELS_TO_TEST = {
         "bf16_1x": [
             ("bigscience/bloomz-7b1", 1, False, False),