From 44d7c3b245cfbd7de0fa056dfad40a0911cd7a73 Mon Sep 17 00:00:00 2001
From: "Tao Li @ NVIDIA" <tali@nvidia.com>
Date: Thu, 18 Sep 2025 03:34:05 +0800
Subject: [PATCH] [https://nvbugs/1234567][fix] Revert
 https://github.com/NVIDIA/TensorRT-LLM/pull/7768/files (#7813)

Signed-off-by: Tao Li
Signed-off-by: Wangshanshan <30051912+dominicshanshan@users.noreply.github.com>
---
 tensorrt_llm/_torch/models/modeling_llama.py | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/tensorrt_llm/_torch/models/modeling_llama.py b/tensorrt_llm/_torch/models/modeling_llama.py
index 2b98800037..64246736a8 100644
--- a/tensorrt_llm/_torch/models/modeling_llama.py
+++ b/tensorrt_llm/_torch/models/modeling_llama.py
@@ -11,8 +11,7 @@ from transformers.modeling_utils import load_sharded_checkpoint
 from transformers.models.llama4.modeling_llama4 import Llama4MultiModalProjector
 
 from tensorrt_llm._torch.distributed import (AllReduce, AllReduceFusionOp,
-                                             AllReduceParams, AllReduceStrategy,
-                                             MoEAllReduce)
+                                             AllReduceParams, MoEAllReduce)
 from tensorrt_llm._torch.models.checkpoints.base_weight_mapper import \
     BaseWeightMapper
 from tensorrt_llm._utils import get_sm_version
@@ -650,12 +649,7 @@ class LlamaDecoderLayer(DecoderLayer):
                                                 eps=config.rms_norm_eps,
                                                 dtype=config.torch_dtype)
 
-        # TODO: This is a temporary fix to disable oneshot kernel for pre-Blackwell arch to avoid perf regressions
-        self.all_reduce = AllReduce(
-            strategy=model_config.allreduce_strategy
-            if get_sm_version() >= 100 else AllReduceStrategy.NCCL,
-            mapping=model_config.mapping,
-        )
+        self.all_reduce = AllReduce(mapping=model_config.mapping)
 
         self.next_layer_layernorm: RMSNorm = None
         self.next_attn: LlamaAttention = None