Individual test cases and benchmarks for cuda

gpu-mode · Jan 21, 2025 · 8faa2f9 · 8faa2f9
1 parent e5cf813
commit 8faa2f9
Show file tree

Hide file tree

Showing 15 changed files with 775 additions and 159 deletions.
diff --git a/examples/identity_cuda/eval.cu b/examples/identity_cuda/eval.cu
diff --git a/examples/identity_cuda/reference.cuh b/examples/identity_cuda/reference.cuh
@@ -11,17 +11,15 @@
 
 #include "task.h"
 
-static input_t generate_input(int seed) {
+static input_t generate_input(int seed, int size) {
   std::mt19937 rng(seed);
   input_t data;
 
   std::uniform_real_distribution<float> dist(0, 1);
 
-  for (int i = 0; i < N_SIZES; ++i) {
-    data[i].resize(Ns[i]);
-    for (int j = 0; j < Ns[i]; ++j) {
-      data[i][j] = dist(rng);
-    }
+  data.resize(size);
+  for (int j = 0; j < size; ++j) {
+    data[j] = dist(rng);
   }
 
   return data;
@@ -32,28 +30,22 @@ static output_t ref_kernel(input_t data) {
   return (output_t) data;
 }
 
-static bool check_implementation(output_t out, output_t ref, float epsilon = 1e-5) {
+static void check_implementation(TestReporter& reporter, output_t out, output_t ref, float epsilon = 1e-5) {
   // input_t data = generate_input();
   // output_t reference_out = reference(data);
 
-  for (int i = 0; i < N_SIZES; ++i) {
-    auto ref_ptr = ref[i];
-    auto out_ptr = out[i];
-
-    if(out[i].size() != Ns[i]) {
-        std::cerr <<  "SIZE MISMATCH at " << i << ": " << Ns[i] << " " << out[i].size() << std::endl;
-        return false;
-    }
+  if(out.size() != ref.size()) {
+      if(!reporter.check_equal("size mismatch", out.size(), ref.size())) return;
+  }
 
-    for (int j = 0; j < Ns[i]; ++j) {
-      if (std::fabs(ref_ptr[j] - out_ptr[j]) > epsilon) {
-        std::cerr <<  "ERROR AT " << i << ", "<< j << ": " << ref_ptr[j] << " " << out_ptr[j] << std::endl;
-        return false;
-      }
+  for (int j = 0; j < ref.size(); ++j) {
+    if (std::fabs(ref[j] - out[j]) > epsilon) {
+        reporter.fail() << "error at " << j << ": " << ref[j] << " "  << std::to_string(out[j]);
+        return;
     }
   }
 
-  return true;
+  reporter.pass();
 }
 
 #endif
diff --git a/examples/identity_cuda/submission.cu b/examples/identity_cuda/submission.cu
@@ -14,8 +14,12 @@ __global__ void copy_kernel(float *input, float *output, int N)
 
 output_t custom_kernel(input_t data)
 {
+    if(data.size() > 256) {
+        data[0] = -1;
+    }
+    return data;
     output_t result;
-
+/*
     for (int i = 0; i < N_SIZES; ++i)
     {
         int N = Ns[i];
@@ -41,6 +45,6 @@ output_t custom_kernel(input_t data)
         CUDA_CHECK(cudaFree(d_input));
         CUDA_CHECK(cudaFree(d_output));
     }
-
+*/
     return result;
 }
diff --git a/examples/identity_cuda/task.h b/examples/identity_cuda/task.h
@@ -1,14 +1,12 @@
-#ifndef __TASK_H__
-#define __TASK_H__
+#ifndef __POPCORN_TASK_H__
+#define __POPCORN_TASK_H__
 
 #include <vector>
 #include <array>
 
-#define N_SIZES 10
-const int Ns[N_SIZES] = {128,  256,  512,   1024,  2048,
-                         4096, 8192, 16384, 32768, 65536};
-
-using input_t = std::array<std::vector<float>, N_SIZES>;
+using input_t = std::vector<float>;
 using output_t = input_t;
 
+constexpr std::array<const char*, 2> ArgumentNames = {"seed", "size"};
+
 #endif
diff --git a/examples/identity_cuda/task.yml b/examples/identity_cuda/task.yml
@@ -22,3 +22,20 @@ config:
 
   # additional include directories
   include_dirs: []
+
+# small test cases. should be cheap to run.
+tests:
+  - {"size": 127, "seed": 4242}
+  - {"size": 128, "seed": 5236}
+  - {"size": 129, "seed": 1001}
+  - {"size": 256, "seed": 5531}
+  - {"size": 512, "seed": 9173}
+
+benchmarks:
+  - {"size": 1024, "seed": 54352}
+  - {"size": 2048, "seed": 93246}
+  - {"size": 4096, "seed": 6256}
+  - {"size": 8192, "seed": 8841}
+  - {"size": 16384, "seed": 6252}
+  - {"size": 32768, "seed": 52624}
+  - {"size": 65536, "seed": 125432}
diff --git a/examples/identity_cuda/utils.h b/examples/identity_cuda/utils.h
@@ -2,6 +2,16 @@
 #define POPCORN_UTILS_H
 
 #include <iostream>
+#include <string>
+#include <sstream>
+
+enum ExitCodes: int {
+    USAGE_ERROR = 2,            // (standard?) exit code for wrong command line
+    EXIT_CUDA_API = 110,        // cuda API call returned an error
+    EXIT_PIPE_FAIL = 111,       // could not set up communication with runner
+    EXIT_TEST_FAIL = 112,       // a test case failed
+    EXIT_TEST_SPEC = 113        // error when trying to construct a test case
+};
 
 // checks that a CUDA API call returned successfully, otherwise prints an error message and exits.
 static inline void cuda_check_(cudaError_t status, const char* expr, const char* file, int line, const char* function)
@@ -13,12 +23,69 @@ static inline void cuda_check_(cudaError_t status, const char* expr, const char*
                   << line << ") in `"
                   << function << "`: "
                   << cudaGetErrorString(status) << std::endl;
-        std::exit(110);
+        std::exit(ExitCodes::EXIT_CUDA_API);
     }
 }
 
 // Convenience macro, automatically logs expression, file, line, and function name
 // of the error.
 #define CUDA_CHECK(expr) cuda_check_(expr, #expr, __FILE__, __LINE__, __FUNCTION__)
 
+
+struct TestVerdict {
+    bool Pass;
+    std::string Message = "";
+};
+
+class TestReporter {
+public:
+    TestReporter() = default;
+
+    void pass() {
+        if(m_State != NONE) {
+            std::cerr << "Trying to mark result of test twice."
+                         " This indicates an error in the task definition, please report."
+                      << std::endl;
+            std::exit(EXIT_TEST_SPEC);
+        }
+        m_State = PASS;
+    }
+    std::stringstream& fail() {
+        if(m_State != NONE) {
+            std::cerr << "Trying to mark result of test twice."
+                         " This indicates an error in the task definition, please report."
+                      << std::endl;
+            std::exit(EXIT_TEST_SPEC);
+        }
+        m_State = FAIL;
+        return m_Message;
+    }
+
+    bool has_passed() const {
+        if(m_State == NONE) {
+            std::cerr << "Trying to query result of unfinished test."
+                         " This indicates an error in the task definition, please report."
+                      << std::endl;
+            std::exit(EXIT_TEST_SPEC);
+        }
+        return m_State == PASS;
+    }
+
+    template<class T>
+    bool check_equal(const char* message, const T& value, const T& expected) {
+        if(value == expected) return true;
+        fail() << message << ": " << expected << "`" << expected << "`, got `" << value << "`";
+        return false;
+    }
+
+    std::string message() const {
+        return m_Message.str();
+    }
+private:
+    enum State {
+        NONE, PASS, FAIL
+    };
+    State m_State = NONE;
+    std::stringstream m_Message;
+};
 #endif
diff --git a/scripts/local-test.py b/scripts/local-test.py
@@ -1,19 +1,25 @@
+import pprint
 import sys
 from pathlib import Path
 
 sys.path.append("src/discord-cluster-manager")
 
-from leaderboard_eval import cu_eval
 from run_eval import run_cuda_script
 
 ref = Path("examples/identity_cuda/reference.cuh")
-sub = Path("examples/identity_cuda/submission.cuh")
+sub = Path("examples/identity_cuda/submission.cu")
+util = Path("examples/identity_cuda/utils.h")
+task = Path("examples/identity_cuda/task.h")
 
-cout, score = run_cuda_script(
-    {"eval.cu": cu_eval},
-    {"reference.cuh": ref.read_text(), "submission.cuh": sub.read_text()},
+result = run_cuda_script(
+    {
+        "eval.cu": Path("examples/identity_cuda/eval.cu").read_text(),
+        "submission.cu": sub.read_text(),
+    },
+    {"reference.cuh": ref.read_text(), "utils.h": util.read_text(), "task.h": task.read_text()},
     arch=None,
+    tests="size: 128; seed: 45\nsize: 512; seed: 123",
+    mode="test",
 )
-print(cout)
-print(score)
-exit(0 if score > 0 else 1)
+
+pprint.pprint(result)
diff --git a/src/discord-cluster-manager/cogs/leaderboard_cog.py b/src/discord-cluster-manager/cogs/leaderboard_cog.py
@@ -11,7 +11,7 @@
     GPU_SELECTION,
     AllGPU,
     GitHubGPU,
-    ModalGPU,
+    ModalGPU, SubmissionMode,
 )
 from discord import app_commands
 from discord.ext import commands, tasks
@@ -42,7 +42,8 @@ async def async_submit_cog_job(
         task: LeaderboardTask,
         submission_content,
         gpu: AllGPU,
-        runner_name: str = "GitHub",
+        runner_name: str,
+        mode: SubmissionMode,
     ):
         discord_thread, result = await command(
             interaction,
@@ -52,13 +53,24 @@ async def async_submit_cog_job(
                 value=gpu.value,
             ),
             task=task,
+            mode=mode,
         )
 
         # no point going further if this already failed
         if discord_thread is None:
             return -1
 
+        if mode == SubmissionMode.LEADERBOARD:
+            pass
+            # public leaderboard run
+        elif mode == SubmissionMode.PRIVATE:
+            pass
+            # private leaderboard run
+        else:
+            return 0
+
         try:
+            print(result)
             if result.success:
                 score = float(result.run.result["duration.mean"]) / 1e9
 
@@ -176,6 +188,7 @@ async def on_submit_hook(
         command: Callable,
         GPUsEnum: Type[Enum],
         runner_name: str,
+        mode: SubmissionMode,
     ) -> int:
         """
         Called as the main body of a submission to route to the correct runner.
@@ -216,10 +229,28 @@ async def on_submit_hook(
                 submission_content,
                 AllGPU[gpu],
                 runner_name,
+                mode,
             )
             for gpu in selected_gpus
         ]
 
+        # also schedule secret run
+        if mode == SubmissionMode.LEADERBOARD:
+            tasks += [
+                self.async_submit_cog_job(
+                    interaction,
+                    leaderboard_name,
+                    script,
+                    command,
+                    task,
+                    submission_content,
+                    AllGPU[gpu],
+                    runner_name,
+                    SubmissionMode.PRIVATE,
+                )
+                for gpu in selected_gpus
+            ]
+
         await asyncio.gather(*tasks)
         return 0
 
@@ -238,6 +269,7 @@ async def submit(
         interaction: discord.Interaction,
         leaderboard_name: str,
         script: discord.Attachment,
+        mode: SubmissionMode,
     ):
         # Call Modal runner
         runner_cog = self.bot.get_cog(f"{runner_name}Cog")
@@ -256,6 +288,7 @@ async def submit(
                 runner_command,
                 GPU_SELECTION[runner_name],
                 runner_name,
+                mode,
             )
         except Exception as e:
             logger.error("Error handling leaderboard submission", exc_info=e)
@@ -280,7 +313,7 @@ async def submit_modal(
         leaderboard_name: str,
         script: discord.Attachment,
     ):
-        return await self.submit("Modal", interaction, leaderboard_name, script)
+        return await self.submit("Modal", interaction, leaderboard_name, script, mode=SubmissionMode.LEADERBOARD)
 
     @app_commands.command(name="github", description="Submit leaderboard data for GitHub")
     @app_commands.describe(
@@ -294,7 +327,43 @@ async def submit_github(
         leaderboard_name: str,
         script: discord.Attachment,
     ):
-        return await self.submit("GitHub", interaction, leaderboard_name, script)
+        return await self.submit(
+            "GitHub", interaction, leaderboard_name, script, mode=SubmissionMode.LEADERBOARD
+        )
+
+    @app_commands.command(name="test", description="Start a testing/debugging run")
+    @app_commands.describe(
+        leaderboard_name="Name of the competition / kernel to optimize",
+        runner="Name of the runner to run on",
+        script="The Python / CUDA script file to run",
+    )
+    @app_commands.autocomplete(leaderboard_name=leaderboard_name_autocomplete)
+    async def submit_test(
+        self,
+        interaction: discord.Interaction,
+        runner: str,
+        leaderboard_name: str,
+        script: discord.Attachment,
+    ):
+        runner = {"github": "GitHub", "modal": "Modal"}[runner.lower()]
+        return await self.submit(runner, interaction, leaderboard_name, script, mode=SubmissionMode.TEST)
+
+    @app_commands.command(name="benchmark", description="Start a benchmarking run")
+    @app_commands.describe(
+        leaderboard_name="Name of the competition / kernel to optimize",
+        runner="Name of the runner to run on",
+        script="The Python / CUDA script file to run",
+    )
+    @app_commands.autocomplete(leaderboard_name=leaderboard_name_autocomplete)
+    async def submit_bench(
+        self,
+        interaction: discord.Interaction,
+        runner: str,
+        leaderboard_name: str,
+        script: discord.Attachment,
+    ):
+        runner = {"github": "GitHub", "modal": "Modal"}[runner.lower()]
+        return await self.submit(runner, interaction, leaderboard_name, script, mode=SubmissionMode.BENCHMARK)
 
 
 class LeaderboardCog(commands.Cog):