From 6215a759e64d85e9782450c8e3dc1774cd339d14 Mon Sep 17 00:00:00 2001 From: Junnan Liu Date: Mon, 6 Jan 2025 10:25:07 +0800 Subject: [PATCH] Update Leaderboard --- docs/LiveMathBench-A.csv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/LiveMathBench-A.csv b/docs/LiveMathBench-A.csv index 897fa45..38512d0 100644 --- a/docs/LiveMathBench-A.csv +++ b/docs/LiveMathBench-A.csv @@ -5,8 +5,8 @@ Llama-3.3-70B-Instruct,40.3,36.2,28.9,19.1,27.5,https://github.com/facebookresea Qwen2.5-7B-Instruct,37.0,36.5,27.2,16.0,25.8,https://github.com/QwenLM/Qwen,TRUE,FALSE,FALSE Qwen2.5-32B-Instruct,50.8,48.3,39.5,28.6,38.1,https://github.com/QwenLM/Qwen,TRUE,FALSE,FALSE Qwen2.5-72B-Instruct,51.7,47.3,39.6,29.0,37.8,https://github.com/QwenLM/Qwen,TRUE,FALSE,FALSE -DeepSeek-V2.5-1210,38.7,38.9,27.9,17.3,26.7,https://github.com/deepseek-ai/DeepSeek-LLM,FALSE,FALSE,FALSE -DeepSeek-V3.0-Chat,55.0,59.5,49.9,35.0,47.9,https://github.com/deepseek-ai/DeepSeek-V3,FALSE,FALSE,FALSE +DeepSeek-V2.5-1210,38.7,38.9,27.9,17.3,26.7,https://github.com/deepseek-ai/DeepSeek-LLM,TRUE,FALSE,FALSE +DeepSeek-V3.0-Chat,55.0,59.5,49.9,35.0,47.9,https://github.com/deepseek-ai/DeepSeek-V3,TRUE,FALSE,FALSE Mistral-Large-Instruct-2411-123B,41.6,39.4,37.1,32.9,36.4,https://example.com/mistral,TRUE,FALSE,FALSE Gemini-1.5-Pro-Latest,59.1,55.9,47.3,31.0,44.3,https://example.com/gemini,FALSE,FALSE,FALSE Claude-3.5-Sonnet,46.7,44.1,36.2,26.6,35.3,https://docs.anthropic.com/claude/docs/models-overview,FALSE,FALSE,FALSE