Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
extended test for python implementation + adjusted exit codes for con…
Browse files Browse the repository at this point in the history
…sistency
ngc92 committed Jan 13, 2025
1 parent f58a407 commit 542d814
Showing 6 changed files with 133 additions and 43 deletions.
32 changes: 0 additions & 32 deletions .github/workflows/cuda_test.yml

This file was deleted.

65 changes: 65 additions & 0 deletions .github/workflows/runner_ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
name: Runner CI

on:
push:
branches: [ main ]
pull_request:
branches: [ main ]

jobs:
check-cuda:
runs-on: [gpumode-nvidia-arc]
timeout-minutes: 10
container:
image: nvidia/cuda:12.4.0-devel-ubuntu22.04
steps:
- uses: actions/checkout@v3

- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: '3.10'

- name: Install pytest
shell: bash
run: pip install pytest

- name: Run script
shell: bash
run: pytest scripts/ci_test_cuda.py

env:
CUDA_VISIBLE_DEVICES: 0

check-pytorch:
runs-on: [gpumode-nvidia-arc]
timeout-minutes: 10
container:
image: nvidia/cuda:12.4.0-devel-ubuntu22.04
steps:
- uses: actions/checkout@v3

- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: '3.10'

- name: Install uv
uses: astral-sh/setup-uv@v3
with:
version: "latest"

- name: Setup Python environment
run: |
uv venv .venv
echo "VIRTUAL_ENV=$PWD/.venv" >> $GITHUB_ENV
echo "$PWD/.venv/bin" >> $GITHUB_PATH
uv pip install numpy torch setuptools ninja pytest
- name: Run script
shell: bash
run: pytest scripts/ci_test_python.py

env:
CUDA_VISIBLE_DEVICES: 0

52 changes: 52 additions & 0 deletions scripts/ci_test_python.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import os
import sys
from pathlib import Path

if Path().resolve().name == "scripts":
os.chdir("..")

sys.path.append("src/discord-cluster-manager")

from leaderboard_eval import py_eval
from run_eval import run_pytorch_script

ref = Path("examples/identity_py/reference.py")


def test_does_not_import():
# input_tt is a typo, so this won't compile
sub = """
this is a syntax error
"""

run = run_pytorch_script(py_eval, ref.read_text(), sub, arch=None)
assert run.success is False
assert run.exit_code == 1
assert "IndentationError: unexpected indent\n" in run.stderr


def test_error():
# no-op, runs fine but isn't correct
sub = """
import torch
def custom_kernel(input):
return [torch.zeros_like(i) for i in input]
"""
run = run_pytorch_script(py_eval, ref.read_text(), sub, arch=None)
assert run.success is True
assert run.command == "python eval.py"
# we never reach the benchmark part, because the test fails
assert "warming up..." not in run.stdout
assert "mismatch found! custom implementation doesnt match reference." in run.stdout
assert run.exit_code == 112
assert run.result["check"] == "fail"


def test_correct():
sub = Path("examples/identity_py/submission.py").read_text()

run = run_pytorch_script(py_eval, ref.read_text(), sub, arch=None)
assert run.success is True
assert "warming up..." in run.stdout
assert run.exit_code == 0
assert run.result["check"] == "pass"
9 changes: 4 additions & 5 deletions src/discord-cluster-manager/eval.cu
Original file line number Diff line number Diff line change
@@ -46,8 +46,7 @@ static void cuda_check(cudaError_t status, const char* expr, const char* file, i
<< line << ") in `"
<< function << "`: "
<< cudaGetErrorString(status) << std::endl;
// following pytest convention, exit code 3 means internal error
std::exit(3);
std::exit(110);
}
}

@@ -83,7 +82,7 @@ void measure_runtime(PopcornOutput& logger) {
auto reference_output = ref_kernel(copy);
if (!check_implementation(submission_output, reference_output)) {
logger.log("check", "fail");
std::exit(1);
std::exit(112);
}

}
@@ -122,7 +121,7 @@ int main() {
int fd = std::stoi(output_fd);
logger.File.reset(::fdopen(fd, "w"));
} else {
return 4; // pytest: usage error
return 111;
}

auto data = generate_input();
@@ -131,7 +130,7 @@ int main() {

if (!check_implementation(submission_output, reference_output)) {
logger.log("check", "fail");
return 1;
return 112;
}

measure_runtime(logger);
12 changes: 9 additions & 3 deletions src/discord-cluster-manager/eval.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import math
import os
import sys
import time

import torch
@@ -56,7 +57,7 @@ def metric(logger: PopcornLogger):
torch.cuda.synchronize()
if not check_implementation(custom_output, ref_output):
logger.log("check", "fail")
exit(1)
exit(112)

total_time = sum(times)
average_duration = total_time / timed_runs
@@ -75,10 +76,15 @@ def metric(logger: PopcornLogger):


def main():
logger = PopcornLogger(int(os.environ["POPCORN_FD"]))
try:
logger = PopcornLogger(int(os.environ["POPCORN_FD"]))
except Exception as e:
print(e, file=sys.stderr)
exit(111)

if not correctness():
logger.log("check", "fail")
exit(1)
exit(112)
metric(logger)


6 changes: 3 additions & 3 deletions src/discord-cluster-manager/run_eval.py
Original file line number Diff line number Diff line change
@@ -141,10 +141,10 @@ def run_program(args: list[str]) -> RunResult:
key, _, value = line.partition(":")
result_dict[key.strip()] = value.strip()

# 0 everything was fine
# 112 program ran fine, but we detected a test failure
return RunResult(
# TODO should we return 0 also on test failure?
# TODO check what return codes python uses, e.g. on uncaught exception
success=(run_process.returncode == 0 or run_process.returncode == 1),
success=(run_process.returncode == 0 or run_process.returncode == 112),
command=_make_cmd(run_process.args),
stdout=run_process.stdout,
stderr=run_process.stderr,

0 comments on commit 542d814

Please sign in to comment.