From afa55c12b6220cbdcab845682a77dfcf25fbc6ae Mon Sep 17 00:00:00 2001 From: Yuxian Qiu <142763828+yuxianq@users.noreply.github.com> Date: Fri, 9 Jan 2026 10:50:04 +0800 Subject: [PATCH] [None][fix] revert https://github.com/NVIDIA/TensorRT-LLM/pull/10445. (#10547) Signed-off-by: Yuxian Qiu <142763828+yuxianq@users.noreply.github.com> --- tensorrt_llm/_torch/models/modeling_speculative.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/tensorrt_llm/_torch/models/modeling_speculative.py b/tensorrt_llm/_torch/models/modeling_speculative.py index 312d5b1dca..dc4b3b1d54 100755 --- a/tensorrt_llm/_torch/models/modeling_speculative.py +++ b/tensorrt_llm/_torch/models/modeling_speculative.py @@ -953,14 +953,6 @@ class SpecDecOneEngineForCausalLM(DecoderModelForCausalLM[TModel, TConfig], hidden_states = hidden_states[:attn_metadata.num_tokens] if self.draft_model is not None: - # For one-model speculative decoding with PP, only the last PP rank - # has valid hidden_states from the target model. The spec_worker (which - # runs the draft model loop) should only run on the last PP rank. - # Non-last PP ranks return None and let the PP sync handle the results. - mapping = self.model.model_config.mapping - if mapping.has_pp() and not mapping.is_last_pp_rank(): - return None - # get logits logits = self.logits_processor.forward( hidden_states[spec_metadata.gather_ids],