From 542d814780d7c87a4c8aefb6959a2c1a0a7fd3af Mon Sep 17 00:00:00 2001 From: Erik Schultheis Date: Tue, 14 Jan 2025 00:02:15 +0200 Subject: [PATCH] extended test for python implementation + adjusted exit codes for consistency --- .github/workflows/cuda_test.yml | 32 ------------ .github/workflows/runner_ci.yml | 65 +++++++++++++++++++++++++ scripts/ci_test_python.py | 52 ++++++++++++++++++++ src/discord-cluster-manager/eval.cu | 9 ++-- src/discord-cluster-manager/eval.py | 12 +++-- src/discord-cluster-manager/run_eval.py | 6 +-- 6 files changed, 133 insertions(+), 43 deletions(-) delete mode 100644 .github/workflows/cuda_test.yml create mode 100644 .github/workflows/runner_ci.yml create mode 100644 scripts/ci_test_python.py diff --git a/.github/workflows/cuda_test.yml b/.github/workflows/cuda_test.yml deleted file mode 100644 index d88aa102..00000000 --- a/.github/workflows/cuda_test.yml +++ /dev/null @@ -1,32 +0,0 @@ -name: CUDA Test - -on: - push: - branches: [ main ] - pull_request: - branches: [ main ] - -jobs: - check-identity: - runs-on: [gpumode-nvidia-arc] - timeout-minutes: 10 - container: - image: nvidia/cuda:12.4.0-devel-ubuntu22.04 - steps: - - uses: actions/checkout@v3 - - - name: Setup Python - uses: actions/setup-python@v5 - with: - python-version: '3.10' - - - name: Install pytest - shell: bash - run: pip install pytest - - - name: Run script - shell: bash - run: pytest scripts/ci_test_cuda.py - - env: - CUDA_VISIBLE_DEVICES: 0 diff --git a/.github/workflows/runner_ci.yml b/.github/workflows/runner_ci.yml new file mode 100644 index 00000000..216926f6 --- /dev/null +++ b/.github/workflows/runner_ci.yml @@ -0,0 +1,65 @@ +name: Runner CI + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + check-cuda: + runs-on: [gpumode-nvidia-arc] + timeout-minutes: 10 + container: + image: nvidia/cuda:12.4.0-devel-ubuntu22.04 + steps: + - uses: actions/checkout@v3 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + + - name: Install pytest + shell: bash + run: pip install pytest + + - name: Run script + shell: bash + run: pytest scripts/ci_test_cuda.py + + env: + CUDA_VISIBLE_DEVICES: 0 + + check-pytorch: + runs-on: [gpumode-nvidia-arc] + timeout-minutes: 10 + container: + image: nvidia/cuda:12.4.0-devel-ubuntu22.04 + steps: + - uses: actions/checkout@v3 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + + - name: Install uv + uses: astral-sh/setup-uv@v3 + with: + version: "latest" + + - name: Setup Python environment + run: | + uv venv .venv + echo "VIRTUAL_ENV=$PWD/.venv" >> $GITHUB_ENV + echo "$PWD/.venv/bin" >> $GITHUB_PATH + uv pip install numpy torch setuptools ninja pytest + + - name: Run script + shell: bash + run: pytest scripts/ci_test_python.py + + env: + CUDA_VISIBLE_DEVICES: 0 + diff --git a/scripts/ci_test_python.py b/scripts/ci_test_python.py new file mode 100644 index 00000000..791e2b1c --- /dev/null +++ b/scripts/ci_test_python.py @@ -0,0 +1,52 @@ +import os +import sys +from pathlib import Path + +if Path().resolve().name == "scripts": + os.chdir("..") + +sys.path.append("src/discord-cluster-manager") + +from leaderboard_eval import py_eval +from run_eval import run_pytorch_script + +ref = Path("examples/identity_py/reference.py") + + +def test_does_not_import(): + # input_tt is a typo, so this won't compile + sub = """ + this is a syntax error + """ + + run = run_pytorch_script(py_eval, ref.read_text(), sub, arch=None) + assert run.success is False + assert run.exit_code == 1 + assert "IndentationError: unexpected indent\n" in run.stderr + + +def test_error(): + # no-op, runs fine but isn't correct + sub = """ +import torch +def custom_kernel(input): + return [torch.zeros_like(i) for i in input] + """ + run = run_pytorch_script(py_eval, ref.read_text(), sub, arch=None) + assert run.success is True + assert run.command == "python eval.py" + # we never reach the benchmark part, because the test fails + assert "warming up..." not in run.stdout + assert "mismatch found! custom implementation doesnt match reference." in run.stdout + assert run.exit_code == 112 + assert run.result["check"] == "fail" + + +def test_correct(): + sub = Path("examples/identity_py/submission.py").read_text() + + run = run_pytorch_script(py_eval, ref.read_text(), sub, arch=None) + assert run.success is True + assert "warming up..." in run.stdout + assert run.exit_code == 0 + assert run.result["check"] == "pass" diff --git a/src/discord-cluster-manager/eval.cu b/src/discord-cluster-manager/eval.cu index a1b663a3..de2f909b 100644 --- a/src/discord-cluster-manager/eval.cu +++ b/src/discord-cluster-manager/eval.cu @@ -46,8 +46,7 @@ static void cuda_check(cudaError_t status, const char* expr, const char* file, i << line << ") in `" << function << "`: " << cudaGetErrorString(status) << std::endl; - // following pytest convention, exit code 3 means internal error - std::exit(3); + std::exit(110); } } @@ -83,7 +82,7 @@ void measure_runtime(PopcornOutput& logger) { auto reference_output = ref_kernel(copy); if (!check_implementation(submission_output, reference_output)) { logger.log("check", "fail"); - std::exit(1); + std::exit(112); } } @@ -122,7 +121,7 @@ int main() { int fd = std::stoi(output_fd); logger.File.reset(::fdopen(fd, "w")); } else { - return 4; // pytest: usage error + return 111; } auto data = generate_input(); @@ -131,7 +130,7 @@ int main() { if (!check_implementation(submission_output, reference_output)) { logger.log("check", "fail"); - return 1; + return 112; } measure_runtime(logger); diff --git a/src/discord-cluster-manager/eval.py b/src/discord-cluster-manager/eval.py index 7928ce50..f3a9a63f 100644 --- a/src/discord-cluster-manager/eval.py +++ b/src/discord-cluster-manager/eval.py @@ -1,5 +1,6 @@ import math import os +import sys import time import torch @@ -56,7 +57,7 @@ def metric(logger: PopcornLogger): torch.cuda.synchronize() if not check_implementation(custom_output, ref_output): logger.log("check", "fail") - exit(1) + exit(112) total_time = sum(times) average_duration = total_time / timed_runs @@ -75,10 +76,15 @@ def metric(logger: PopcornLogger): def main(): - logger = PopcornLogger(int(os.environ["POPCORN_FD"])) + try: + logger = PopcornLogger(int(os.environ["POPCORN_FD"])) + except Exception as e: + print(e, file=sys.stderr) + exit(111) + if not correctness(): logger.log("check", "fail") - exit(1) + exit(112) metric(logger) diff --git a/src/discord-cluster-manager/run_eval.py b/src/discord-cluster-manager/run_eval.py index 938827f3..41e34c6f 100644 --- a/src/discord-cluster-manager/run_eval.py +++ b/src/discord-cluster-manager/run_eval.py @@ -141,10 +141,10 @@ def run_program(args: list[str]) -> RunResult: key, _, value = line.partition(":") result_dict[key.strip()] = value.strip() + # 0 everything was fine + # 112 program ran fine, but we detected a test failure return RunResult( - # TODO should we return 0 also on test failure? - # TODO check what return codes python uses, e.g. on uncaught exception - success=(run_process.returncode == 0 or run_process.returncode == 1), + success=(run_process.returncode == 0 or run_process.returncode == 112), command=_make_cmd(run_process.args), stdout=run_process.stdout, stderr=run_process.stderr,