diff --git a/3rdparty/cutlass b/3rdparty/cutlass index a49a78ffef..b2dd65dc86 160000 --- a/3rdparty/cutlass +++ b/3rdparty/cutlass @@ -1 +1 @@ -Subproject commit a49a78ffefc86a87160dfe0ccc3a3a2d1622c918 +Subproject commit b2dd65dc864e09688245b316ac46c4a6cd07e15c diff --git a/cpp/tensorrt_llm/kernels/cutlass_kernels/moe_gemm/moe_gemm_template_dispatch_tma_ws.h b/cpp/tensorrt_llm/kernels/cutlass_kernels/moe_gemm/moe_gemm_template_dispatch_tma_ws.h index 168c50a8a2..de45ee18f5 100644 --- a/cpp/tensorrt_llm/kernels/cutlass_kernels/moe_gemm/moe_gemm_template_dispatch_tma_ws.h +++ b/cpp/tensorrt_llm/kernels/cutlass_kernels/moe_gemm/moe_gemm_template_dispatch_tma_ws.h @@ -113,6 +113,9 @@ void dispatchMoeGemmSelectBiasTmaWarpSpecialized(TmaWarpSpecializedGroupedGemmIn "passing 103-real as an arch to build_wheel.py."); first_time = false; } + return dispatchMoeGemmSelectBiasTmaWarpSpecialized( + hopper_input, num_experts, multi_processor_count, stream, occupancy, workspace_size); } #endif #ifndef COMPILE_BLACKWELL_TMA_GROUPED_GEMMS