mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-02-16 07:53:55 +08:00
[None][chore] Pass without_comm to cutlass and deepgemm (#11229)
Signed-off-by: xxi <xxi@nvidia.com>
This commit is contained in:
parent
36cb5f8c93
commit
4c1d9d0c10
@ -196,6 +196,7 @@ def create_moe_backend(
|
||||
swiglu_beta=swiglu_beta,
|
||||
swiglu_limit=swiglu_limit,
|
||||
init_load_balancer=init_load_balancer,
|
||||
without_comm=without_comm,
|
||||
activation_type=activation_type,
|
||||
)
|
||||
elif moe_cls == WideEPMoE:
|
||||
@ -256,6 +257,7 @@ def create_moe_backend(
|
||||
weight_loading_mode=weight_loading_mode,
|
||||
apply_router_weight_on_input=apply_router_weight_on_input,
|
||||
layer_idx=layer_idx,
|
||||
without_comm=without_comm,
|
||||
)
|
||||
elif moe_cls == TritonFusedMoE:
|
||||
assert not apply_router_weight_on_input, "apply_router_weight_on_input is not supported in TritonFusedMoE."
|
||||
|
||||
Loading…
Reference in New Issue
Block a user