diff --git a/3rdparty/cutlass b/3rdparty/cutlass
index a49a78ffef..b2dd65dc86 160000
--- a/3rdparty/cutlass
+++ b/3rdparty/cutlass
@@ -1 +1 @@
-Subproject commit a49a78ffefc86a87160dfe0ccc3a3a2d1622c918
+Subproject commit b2dd65dc864e09688245b316ac46c4a6cd07e15c
diff --git a/cpp/tensorrt_llm/kernels/cutlass_kernels/moe_gemm/moe_gemm_template_dispatch_tma_ws.h b/cpp/tensorrt_llm/kernels/cutlass_kernels/moe_gemm/moe_gemm_template_dispatch_tma_ws.h
index 168c50a8a2..de45ee18f5 100644
--- a/cpp/tensorrt_llm/kernels/cutlass_kernels/moe_gemm/moe_gemm_template_dispatch_tma_ws.h
+++ b/cpp/tensorrt_llm/kernels/cutlass_kernels/moe_gemm/moe_gemm_template_dispatch_tma_ws.h
@@ -113,6 +113,9 @@ void dispatchMoeGemmSelectBiasTmaWarpSpecialized(TmaWarpSpecializedGroupedGemmIn
                 "passing 103-real as an arch to build_wheel.py.");
             first_time = false;
         }
+        return dispatchMoeGemmSelectBiasTmaWarpSpecialized<cutlass::arch::Sm100, T, WeightType, OutputType, EpilogueTag,
+            FUSION, TileShape, ClusterShape>(
+            hopper_input, num_experts, multi_processor_count, stream, occupancy, workspace_size);
     }
 #endif
 #ifndef COMPILE_BLACKWELL_TMA_GROUPED_GEMMS