diff --git a/src/discord-cluster-manager/cogs/github_cog.py b/src/discord-cluster-manager/cogs/github_cog.py index b4170d3..422123e 100644 --- a/src/discord-cluster-manager/cogs/github_cog.py +++ b/src/discord-cluster-manager/cogs/github_cog.py @@ -44,12 +44,12 @@ async def run_github( gpu_type: app_commands.Choice[str], reference_script: discord.Attachment = None, reference_code: str = None, - ) -> discord.Thread: + ) -> tuple[discord.Thread, FullResult]: if not script.filename.endswith((".py", ".cu", ".cuh", ".cpp")): await send_discord_message( interaction, "Please provide a Python (.py) or CUDA (.cu / .cuh / .cpp) file" ) - return None + return None, None thread = await self.bot.create_thread(interaction, gpu_type.name, "GitHub Job") await thread.send(f"Processing `{script.filename}` with {gpu_type.name}...") @@ -98,7 +98,7 @@ async def run_github( "Failed to trigger GitHub Action. Please check the configuration." ) - return thread + return thread, result except Exception as e: logger.error(f"Error processing request: {str(e)}", exc_info=True) diff --git a/src/discord-cluster-manager/cogs/leaderboard_cog.py b/src/discord-cluster-manager/cogs/leaderboard_cog.py index e0d021a..ee9718c 100644 --- a/src/discord-cluster-manager/cogs/leaderboard_cog.py +++ b/src/discord-cluster-manager/cogs/leaderboard_cog.py @@ -17,7 +17,6 @@ from ui.misc import DeleteConfirmationModal, GPUSelectionView from ui.table import create_table from utils import ( - extract_score, get_user_from_id, send_discord_message, setup_logging, @@ -44,7 +43,7 @@ async def async_submit_cog_job( runner_name: str = "GitHub", ): try: - discord_thread = await command.callback( + discord_thread, result = await command.callback( cog, interaction, script, @@ -58,55 +57,40 @@ async def async_submit_cog_job( print(f"Webhook not found: {e}") await send_discord_message(interaction, "❌ The webhook was not found.") - message_contents = [msg.content async for msg in discord_thread.history(limit=None)] - try: - # For CUDA leaderboards, make more robust - if "check_implementation failed" in message_contents: - await send_discord_message( - interaction, - "check_implementation failed. User kernel and reference kernel do not match.", - ephemeral=True, - ) - return - - # TODO: Make this more robust later - score = extract_score("".join(message_contents)) + if result.success: + score = float(result.run.result["duration.mean"]) / 1e9 + + with self.bot.leaderboard_db as db: + db.create_submission( + { + "submission_name": script.filename, + "submission_time": datetime.now(), + "leaderboard_name": leaderboard_name, + "code": submission_content, + "user_id": interaction.user.id, + "submission_score": score, + "gpu_type": gpu.name, + } + ) - with self.bot.leaderboard_db as db: - db.create_submission( - { - "submission_name": script.filename, - "submission_time": datetime.now(), - "leaderboard_name": leaderboard_name, - "code": submission_content, - "user_id": interaction.user.id, - "submission_score": score, - "gpu_type": gpu.name, - } + user_id = ( + interaction.user.global_name + if interaction.user.nick is None + else interaction.user.nick ) - user_id = ( - interaction.user.global_name - if interaction.user.nick is None - else interaction.user.nick - ) - - await send_discord_message( - interaction, - f"Successfully ran on {gpu.name} using {runner_name} runners!\n" - + f"Leaderboard '{leaderboard_name}'.\n" - + f"Submission title: {script.filename}.\n" - + f"Submission user: {user_id}.\n" - + f"Runtime: {score:.9f} seconds.", - ephemeral=True, - ) + await discord_thread.send( + f"Successfully ran on {gpu.name} using {runner_name} runners!\n" + + f"Leaderboard '{leaderboard_name}'.\n" + + f"Submission title: {script.filename}.\n" + + f"Submission user: {user_id}.\n" + + f"Runtime: {score:.9f} seconds.", + ) except Exception: - await send_discord_message( - interaction, + await discord_thread.send( f"Leaderboard submission to '{leaderboard_name}' on {gpu.name} " + f"using {runner_name} runners failed!\n", - ephemeral=True, ) async def select_gpu_view( diff --git a/src/discord-cluster-manager/cogs/modal_cog.py b/src/discord-cluster-manager/cogs/modal_cog.py index 46d5299..fdebe32 100644 --- a/src/discord-cluster-manager/cogs/modal_cog.py +++ b/src/discord-cluster-manager/cogs/modal_cog.py @@ -8,7 +8,8 @@ from discord.ext import commands from leaderboard_eval import cu_eval, py_eval from report import generate_report -from utils import send_discord_message, send_logs, setup_logging +from run_eval import FullResult +from utils import send_discord_message, setup_logging logger = setup_logging() @@ -34,7 +35,7 @@ async def run_modal( gpu_type: app_commands.Choice[str], reference_script: Optional[discord.Attachment] = None, reference_code: str = None, - ) -> discord.Thread: + ) -> tuple[discord.Thread, FullResult]: thread = None status_msg = None try: @@ -44,11 +45,11 @@ async def run_modal( "Please provide a Python (.py) or CUDA (.cu / .cuh / .cpp) file", ephemeral=True, ) - return None + return None, None - # TODO: Maybe find a better way? if not interaction.response.is_done(): await interaction.response.defer(ephemeral=True) + channel = interaction.channel message = await channel.send(f"Starting Modal job with {gpu_type.name}...") thread = await message.create_thread(name=f"{gpu_type.name} Modal Job") @@ -67,7 +68,7 @@ async def run_modal( else (await reference_script.read()).decode("utf-8") ) - await self.handle_modal_execution( + result = await self.handle_modal_execution( interaction, thread, script_content, @@ -76,7 +77,7 @@ async def run_modal( reference_content, status_msg, ) - return thread + return thread, result except Exception as e: logger.error(f"Error processing request: {str(e)}", exc_info=True) @@ -94,7 +95,7 @@ async def handle_modal_execution( gpu_type: str, reference_content: Optional[str], status_msg: discord.Message, - ): + ) -> FullResult: try: loop = asyncio.get_event_loop() func_type = "pytorch" if filename.endswith(".py") else "cuda" @@ -113,9 +114,11 @@ async def handle_modal_execution( # Send results await thread.send(f"\n**Script size:** {len(script_content)} bytes") await generate_report(thread, result) + return result else: - result, score = await loop.run_in_executor( + # Currently broken? + result = await loop.run_in_executor( None, lambda: modal.Function.lookup("discord-bot-runner", func_name).remote( script_content, @@ -127,18 +130,10 @@ async def handle_modal_execution( # Send results await thread.send(f"\n**Script size:** {len(script_content)} bytes") - await thread.send(f"**Execution time:** {score:.3f} s\n") - - if "check_implementation failed" in result or "Error" in result: - await thread.send("Modal run failed.\n") - await send_logs(thread, result) - await status_msg.edit(content="**Running on Modal...**\n> ❌ Job failed!") - return result, 0 - - if result is not None: - await thread.send(f"**score:{score:.9f}**\n```") + await thread.send(f"**Execution time:** {result.run.duration:.3f} s\n") await status_msg.edit(content="**Running on Modal...**\n> ✅ Job completed!") + return result except Exception as e: logger.error(f"Error in handle_modal_execution: {str(e)}", exc_info=True) diff --git a/src/discord-cluster-manager/cogs/verify_run_cog.py b/src/discord-cluster-manager/cogs/verify_run_cog.py index b55836a..1d593ba 100644 --- a/src/discord-cluster-manager/cogs/verify_run_cog.py +++ b/src/discord-cluster-manager/cogs/verify_run_cog.py @@ -55,7 +55,7 @@ async def verify_github_run( ) ref_code = Path("examples/identity_cuda/reference.cuh").read_text() - github_thread = await github_command.callback( + github_thread, result = await github_command.callback( github_cog, interaction, sub_code, choice, reference_code=ref_code ) @@ -89,7 +89,8 @@ async def verify_github_run( ] await send_discord_message( interaction, - f"❌ GitHub run ({choice.name}) for {lang} verification failed. Missing expected messages:\n" + f"❌ GitHub run ({choice.name}) for {lang} verification failed. " + + "Missing expected messages:\n" + "\n".join(f"- {pattern}" for pattern in missing_patterns), ) return False @@ -111,7 +112,7 @@ async def verify_modal_run( ) ref_code = Path("examples/identity_cuda/reference.cuh").read_text() - modal_thread = await modal_command.callback( + modal_thread, result = await modal_command.callback( modal_cog, interaction, sub_code, t4, reference_code=ref_code ) diff --git a/src/discord-cluster-manager/report.py b/src/discord-cluster-manager/report.py index 5737236..d911ff1 100644 --- a/src/discord-cluster-manager/report.py +++ b/src/discord-cluster-manager/report.py @@ -38,7 +38,7 @@ async def _send_split_log(thread: discord.Thread, partial_message: str, header: return "" -async def generate_report(thread: discord.Thread, result: FullResult): +async def generate_report(thread: discord.Thread, result: FullResult): # noqa: C901 message = "" if not result.success: message += "# Failure\n" @@ -106,7 +106,7 @@ async def generate_report(thread: discord.Thread, result: FullResult): if len(message) != 0: await thread.send(message) - # TODO dedicated "error" entry in our results dict that gets populated by check_implementation + # TODO dedicated "error" entry in our results that gets populated by check_implementation return # OK, we were successful