Skip to content

Commit

Permalink
Start epilogue
Browse files Browse the repository at this point in the history
Signed-off-by: ElizaWszola <[email protected]>
  • Loading branch information
ElizaWszola committed Dec 16, 2024
1 parent c570c69 commit ffe772b
Show file tree
Hide file tree
Showing 9 changed files with 650 additions and 60 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER 12.0 AND SCALED_MM_3X_ARCHS)
set(SRCS
"csrc/quantization/cutlass_w8a8/scaled_mm_c3x.cu"
"csrc/quantization/cutlass_w8a8/grouped_gemm_test.cu")
"csrc/quantization/cutlass_w8a8/grouped_gemm_c3x.cu")
set_gencode_flags_for_srcs(
SRCS "${SRCS}"
CUDA_ARCHS "${SCALED_MM_3X_ARCHS}")
Expand Down
3 changes: 2 additions & 1 deletion csrc/cpu/torch_bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,8 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
"cutlass_grouped_mm(Tensor! out, Tensor a, Tensor b, Tensor a_scales, "
" Tensor b_scales, Tensor problem_sizes, "
" Tensor out_offsets, Tensor a_offsets, "
" Tensor b_offsets) -> ()");
" Tensor b_offsets, Tensor a_scales_offsets, "
" Tensor b_scales_offsets) -> ()");
ops.impl("cutlass_grouped_mm", torch::kCUDA, &cutlass_grouped_mm);
// w8a8 GEMM, supporting asymmetric per-tensor or per-row/column
// quantization.
Expand Down
Loading

0 comments on commit ffe772b

Please sign in to comment.