extended test for python implementation + adjusted exit codes for con…

…sistency
gpu-mode · Jan 13, 2025 · 542d814 · 542d814
1 parent f58a407
commit 542d814
Showing 6 changed files with 133 additions and 43 deletions.
diff --git a/.github/workflows/cuda_test.yml b/.github/workflows/cuda_test.yml
diff --git a/.github/workflows/runner_ci.yml b/.github/workflows/runner_ci.yml
@@ -0,0 +1,65 @@
+name: Runner CI
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+
+jobs:
+  check-cuda:
+    runs-on: [gpumode-nvidia-arc]
+    timeout-minutes: 10
+    container:
+      image: nvidia/cuda:12.4.0-devel-ubuntu22.04
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.10'
+
+      - name: Install pytest
+        shell: bash
+        run: pip install pytest
+
+      - name: Run script
+        shell: bash
+        run: pytest scripts/ci_test_cuda.py
+
+    env:
+      CUDA_VISIBLE_DEVICES: 0
+
+  check-pytorch:
+    runs-on: [gpumode-nvidia-arc]
+    timeout-minutes: 10
+    container:
+      image: nvidia/cuda:12.4.0-devel-ubuntu22.04
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.10'
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v3
+        with:
+          version: "latest"
+
+      - name: Setup Python environment
+        run: |
+          uv venv .venv
+          echo "VIRTUAL_ENV=$PWD/.venv" >> $GITHUB_ENV
+          echo "$PWD/.venv/bin" >> $GITHUB_PATH
+          uv pip install numpy torch setuptools ninja pytest
+
+      - name: Run script
+        shell: bash
+        run: pytest scripts/ci_test_python.py
+
+    env:
+      CUDA_VISIBLE_DEVICES: 0
+
diff --git a/scripts/ci_test_python.py b/scripts/ci_test_python.py
@@ -0,0 +1,52 @@
+import os
+import sys
+from pathlib import Path
+
+if Path().resolve().name == "scripts":
+    os.chdir("..")
+
+sys.path.append("src/discord-cluster-manager")
+
+from leaderboard_eval import py_eval
+from run_eval import run_pytorch_script
+
+ref = Path("examples/identity_py/reference.py")
+
+
+def test_does_not_import():
+    # input_tt is a typo, so this won't compile
+    sub = """
+    this is a syntax error
+    """
+
+    run = run_pytorch_script(py_eval, ref.read_text(), sub, arch=None)
+    assert run.success is False
+    assert run.exit_code == 1
+    assert "IndentationError: unexpected indent\n" in run.stderr
+
+
+def test_error():
+    # no-op, runs fine but isn't correct
+    sub = """
+import torch
+def custom_kernel(input):
+    return [torch.zeros_like(i) for i in input]
+        """
+    run = run_pytorch_script(py_eval, ref.read_text(), sub, arch=None)
+    assert run.success is True
+    assert run.command == "python eval.py"
+    # we never reach the benchmark part, because the test fails
+    assert "warming up..." not in run.stdout
+    assert "mismatch found! custom implementation doesnt match reference." in run.stdout
+    assert run.exit_code == 112
+    assert run.result["check"] == "fail"
+
+
+def test_correct():
+    sub = Path("examples/identity_py/submission.py").read_text()
+
+    run = run_pytorch_script(py_eval, ref.read_text(), sub, arch=None)
+    assert run.success is True
+    assert "warming up..." in run.stdout
+    assert run.exit_code == 0
+    assert run.result["check"] == "pass"
diff --git a/src/discord-cluster-manager/eval.cu b/src/discord-cluster-manager/eval.cu
@@ -46,8 +46,7 @@ static void cuda_check(cudaError_t status, const char* expr, const char* file, i
                   << line << ") in `"
                   << function << "`: "
                   << cudaGetErrorString(status) << std::endl;
-        // following pytest convention, exit code 3 means internal error
-        std::exit(3);
+        std::exit(110);
     }
 }
 
@@ -83,7 +82,7 @@ void measure_runtime(PopcornOutput& logger) {
         auto reference_output = ref_kernel(copy);
         if (!check_implementation(submission_output, reference_output)) {
             logger.log("check", "fail");
-            std::exit(1);
+            std::exit(112);
         }
 
     }
@@ -122,7 +121,7 @@ int main() {
         int fd = std::stoi(output_fd);
         logger.File.reset(::fdopen(fd, "w"));
     } else {
-        return 4;       // pytest: usage error
+        return 111;
     }
 
     auto data = generate_input();
@@ -131,7 +130,7 @@ int main() {
 
     if (!check_implementation(submission_output, reference_output)) {
         logger.log("check", "fail");
-        return 1;
+        return 112;
     }
 
     measure_runtime(logger);

diff --git a/src/discord-cluster-manager/eval.py b/src/discord-cluster-manager/eval.py
@@ -1,5 +1,6 @@
 import math
 import os
+import sys
 import time
 
 import torch
@@ -56,7 +57,7 @@ def metric(logger: PopcornLogger):
         torch.cuda.synchronize()
         if not check_implementation(custom_output, ref_output):
             logger.log("check", "fail")
-            exit(1)
+            exit(112)
 
     total_time = sum(times)
     average_duration = total_time / timed_runs
@@ -75,10 +76,15 @@ def metric(logger: PopcornLogger):
 
 
 def main():
-    logger = PopcornLogger(int(os.environ["POPCORN_FD"]))
+    try:
+        logger = PopcornLogger(int(os.environ["POPCORN_FD"]))
+    except Exception as e:
+        print(e, file=sys.stderr)
+        exit(111)
+
     if not correctness():
         logger.log("check", "fail")
-        exit(1)
+        exit(112)
     metric(logger)
 
 

diff --git a/src/discord-cluster-manager/run_eval.py b/src/discord-cluster-manager/run_eval.py
@@ -141,10 +141,10 @@ def run_program(args: list[str]) -> RunResult:
         key, _, value = line.partition(":")
         result_dict[key.strip()] = value.strip()
 
+    # 0     everything was fine
+    # 112   program ran fine, but we detected a test failure
     return RunResult(
-        # TODO should we return 0 also on test failure?
-        # TODO check what return codes python uses, e.g. on uncaught exception
-        success=(run_process.returncode == 0 or run_process.returncode == 1),
+        success=(run_process.returncode == 0 or run_process.returncode == 112),
         command=_make_cmd(run_process.args),
         stdout=run_process.stdout,
         stderr=run_process.stderr,