Start epilogue

Signed-off-by: ElizaWszola <[email protected]>
neuralmagic · Dec 16, 2024 · ffe772b · ffe772b
1 parent c570c69
commit ffe772b
Show file tree

Hide file tree

Showing 9 changed files with 650 additions and 60 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -264,7 +264,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
   if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER 12.0 AND SCALED_MM_3X_ARCHS)
     set(SRCS
         "csrc/quantization/cutlass_w8a8/scaled_mm_c3x.cu"
-        "csrc/quantization/cutlass_w8a8/grouped_gemm_test.cu")
+        "csrc/quantization/cutlass_w8a8/grouped_gemm_c3x.cu")
     set_gencode_flags_for_srcs(
       SRCS "${SRCS}"
       CUDA_ARCHS "${SCALED_MM_3X_ARCHS}")

diff --git a/csrc/cpu/torch_bindings.cpp b/csrc/cpu/torch_bindings.cpp
@@ -123,7 +123,8 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
       "cutlass_grouped_mm(Tensor! out, Tensor a, Tensor b, Tensor a_scales, "
       "                   Tensor b_scales, Tensor problem_sizes, "
       "                   Tensor out_offsets, Tensor a_offsets, "
-      "                   Tensor b_offsets) -> ()");
+      "                   Tensor b_offsets, Tensor a_scales_offsets, "
+      "                   Tensor b_scales_offsets) -> ()");
   ops.impl("cutlass_grouped_mm", torch::kCUDA, &cutlass_grouped_mm);
   // w8a8 GEMM, supporting asymmetric per-tensor or per-row/column
   // quantization.