[None][chore] Pass without_comm to cutlass and deepgemm (#11229)

Signed-off-by: xxi <xxi@nvidia.com>
2026-02-16 07:53:55 +08:00 · 2026-02-05 15:07:59 +08:00 · 2026-02-05 15:07:59 +08:00 · 4c1d9d0c10
commit 4c1d9d0c10
parent 36cb5f8c93
1 changed files with 2 additions and 0 deletions
--- a/tensorrt_llm/_torch/modules/fused_moe/create_moe.py
+++ b/tensorrt_llm/_torch/modules/fused_moe/create_moe.py
@ -196,6 +196,7 @@ def create_moe_backend(
            swiglu_beta=swiglu_beta,
            swiglu_limit=swiglu_limit,
            init_load_balancer=init_load_balancer,
+            without_comm=without_comm,
            activation_type=activation_type,
        )
    elif moe_cls == WideEPMoE:
@ -256,6 +257,7 @@ def create_moe_backend(
            weight_loading_mode=weight_loading_mode,
            apply_router_weight_on_input=apply_router_weight_on_input,
            layer_idx=layer_idx,
+            without_comm=without_comm,
        )
    elif moe_cls == TritonFusedMoE:
        assert not apply_router_weight_on_input, "apply_router_weight_on_input is not supported in TritonFusedMoE."