diff --git a/tensorrt_llm/_torch/modules/fused_moe/create_moe.py b/tensorrt_llm/_torch/modules/fused_moe/create_moe.py index 1818daab7e..99a6847461 100644 --- a/tensorrt_llm/_torch/modules/fused_moe/create_moe.py +++ b/tensorrt_llm/_torch/modules/fused_moe/create_moe.py @@ -196,6 +196,7 @@ def create_moe_backend( swiglu_beta=swiglu_beta, swiglu_limit=swiglu_limit, init_load_balancer=init_load_balancer, + without_comm=without_comm, activation_type=activation_type, ) elif moe_cls == WideEPMoE: @@ -256,6 +257,7 @@ def create_moe_backend( weight_loading_mode=weight_loading_mode, apply_router_weight_on_input=apply_router_weight_on_input, layer_idx=layer_idx, + without_comm=without_comm, ) elif moe_cls == TritonFusedMoE: assert not apply_router_weight_on_input, "apply_router_weight_on_input is not supported in TritonFusedMoE."