From afa55c12b6220cbdcab845682a77dfcf25fbc6ae Mon Sep 17 00:00:00 2001
From: Yuxian Qiu <142763828+yuxianq@users.noreply.github.com>
Date: Fri, 9 Jan 2026 10:50:04 +0800
Subject: [PATCH] [None][fix] revert
 https://github.com/NVIDIA/TensorRT-LLM/pull/10445. (#10547)

Signed-off-by: Yuxian Qiu <142763828+yuxianq@users.noreply.github.com>
---
 tensorrt_llm/_torch/models/modeling_speculative.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/tensorrt_llm/_torch/models/modeling_speculative.py b/tensorrt_llm/_torch/models/modeling_speculative.py
index 312d5b1dca..dc4b3b1d54 100755
--- a/tensorrt_llm/_torch/models/modeling_speculative.py
+++ b/tensorrt_llm/_torch/models/modeling_speculative.py
@@ -953,14 +953,6 @@ class SpecDecOneEngineForCausalLM(DecoderModelForCausalLM[TModel, TConfig],
             hidden_states = hidden_states[:attn_metadata.num_tokens]
 
         if self.draft_model is not None:
-            # For one-model speculative decoding with PP, only the last PP rank
-            # has valid hidden_states from the target model. The spec_worker (which
-            # runs the draft model loop) should only run on the last PP rank.
-            # Non-last PP ranks return None and let the PP sync handle the results.
-            mapping = self.model.model_config.mapping
-            if mapping.has_pp() and not mapping.is_last_pp_rank():
-                return None
-
             # get logits
             logits = self.logits_processor.forward(
                 hidden_states[spec_metadata.gather_ids],