From 14154ec1d317915718550cedbe9b74a149cd257f Mon Sep 17 00:00:00 2001 From: Xiwen Yu <13230610+VALLIS-NERIA@users.noreply.github.com> Date: Mon, 1 Sep 2025 10:47:58 +0800 Subject: [PATCH] disable sm103 moe kernel Signed-off-by: Xiwen Yu <13230610+VALLIS-NERIA@users.noreply.github.com> --- .../moe_gemm/moe_gemm_template_dispatch_tma_ws.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/tensorrt_llm/kernels/cutlass_kernels/moe_gemm/moe_gemm_template_dispatch_tma_ws.h b/cpp/tensorrt_llm/kernels/cutlass_kernels/moe_gemm/moe_gemm_template_dispatch_tma_ws.h index de45ee18f5..ae75a96464 100644 --- a/cpp/tensorrt_llm/kernels/cutlass_kernels/moe_gemm/moe_gemm_template_dispatch_tma_ws.h +++ b/cpp/tensorrt_llm/kernels/cutlass_kernels/moe_gemm/moe_gemm_template_dispatch_tma_ws.h @@ -102,22 +102,22 @@ void dispatchMoeGemmSelectBiasTmaWarpSpecialized(TmaWarpSpecializedGroupedGemmIn TLLM_THROW("Please recompile with support for hopper by passing 90-real as an arch to build_wheel.py."); } #endif -#ifndef COMPILE_BLACKWELL_SM103_TMA_GROUPED_GEMMS + // #ifndef COMPILE_BLACKWELL_SM103_TMA_GROUPED_GEMMS else if constexpr (Arch::kMinComputeCapability == 103) { static bool first_time = true; if (first_time) { TLLM_LOG_WARNING( - "Falling back to sm100f version. For best performance please recompile with support for blackwell by " - "passing 103-real as an arch to build_wheel.py."); + "Falling back to sm100f version due to a bug in cutlass." /*"For best performance please recompile with support for blackwell by " + "passing 103-real as an arch to build_wheel.py."*/); first_time = false; } return dispatchMoeGemmSelectBiasTmaWarpSpecialized( hopper_input, num_experts, multi_processor_count, stream, occupancy, workspace_size); } -#endif +// #endif #ifndef COMPILE_BLACKWELL_TMA_GROUPED_GEMMS else if constexpr (Arch::kMinComputeCapability >= 100 && Arch::kMinComputeCapability < 120) {