From 78c0f31c002095f668947f0e70e77c9f2856b191 Mon Sep 17 00:00:00 2001
From: Ludwig Schneider <lschneider@nvidia.com>
Date: Tue, 6 Jan 2026 09:45:25 -0800
Subject: [PATCH] activate NCCL_SYMMETRIC auto-tuning

Signed-off-by: Ludwig Schneider <lschneider@nvidia.com>
---
 tensorrt_llm/_torch/custom_ops/torch_custom_ops.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tensorrt_llm/_torch/custom_ops/torch_custom_ops.py b/tensorrt_llm/_torch/custom_ops/torch_custom_ops.py
index 2ee8d29ccc..74dd97b3fe 100644
--- a/tensorrt_llm/_torch/custom_ops/torch_custom_ops.py
+++ b/tensorrt_llm/_torch/custom_ops/torch_custom_ops.py
@@ -1690,8 +1690,7 @@ class AllReduceRunner(TunableRunner):
         **kwargs,
     ) -> List[int]:
         valid_strategies = [
-            # TODO: NCCL_SYMMETRIC will cause hang during tuning process
-            # AllReduceStrategy.NCCL_SYMMETRIC.value,
+            AllReduceStrategy.NCCL_SYMMETRIC.value,
             AllReduceStrategy.NCCL.value,
         ]
         # Fallback in allreduceOp is set to NCCL_SYMMETRIC as default
@@ -1720,7 +1719,7 @@ class AllReduceRunner(TunableRunner):
         input, residual, norm_weight, scale, bias, workspace = inputs
         if tactic == -1:
             # TODO: Use NCCL instead of NCCL_SYMMETRIC to avoid hanging during tuning process
-            tactic = AllReduceStrategy.NCCL.value
+            tactic = AllReduceStrategy.NCCL_SYMMETRIC.value
 
         return torch.ops.trtllm.allreduce(
             input,