From 78c0f31c002095f668947f0e70e77c9f2856b191 Mon Sep 17 00:00:00 2001 From: Ludwig Schneider Date: Tue, 6 Jan 2026 09:45:25 -0800 Subject: [PATCH] activate NCCL_SYMMETRIC auto-tuning Signed-off-by: Ludwig Schneider --- tensorrt_llm/_torch/custom_ops/torch_custom_ops.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tensorrt_llm/_torch/custom_ops/torch_custom_ops.py b/tensorrt_llm/_torch/custom_ops/torch_custom_ops.py index 2ee8d29ccc..74dd97b3fe 100644 --- a/tensorrt_llm/_torch/custom_ops/torch_custom_ops.py +++ b/tensorrt_llm/_torch/custom_ops/torch_custom_ops.py @@ -1690,8 +1690,7 @@ class AllReduceRunner(TunableRunner): **kwargs, ) -> List[int]: valid_strategies = [ - # TODO: NCCL_SYMMETRIC will cause hang during tuning process - # AllReduceStrategy.NCCL_SYMMETRIC.value, + AllReduceStrategy.NCCL_SYMMETRIC.value, AllReduceStrategy.NCCL.value, ] # Fallback in allreduceOp is set to NCCL_SYMMETRIC as default @@ -1720,7 +1719,7 @@ class AllReduceRunner(TunableRunner): input, residual, norm_weight, scale, bias, workspace = inputs if tactic == -1: # TODO: Use NCCL instead of NCCL_SYMMETRIC to avoid hanging during tuning process - tactic = AllReduceStrategy.NCCL.value + tactic = AllReduceStrategy.NCCL_SYMMETRIC.value return torch.ops.trtllm.allreduce( input,