From c66941036ff01f2a7b8c3199379ddd66f3ed4506 Mon Sep 17 00:00:00 2001
From: Fanrong Li <23290157+lfr-0531@users.noreply.github.com>
Date: Mon, 14 Jul 2025 09:41:27 +0800
Subject: [PATCH] fix: fix index out of bounds error in spec decoding (#5954)

---
 tensorrt_llm/_torch/pyexecutor/model_engine.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorrt_llm/_torch/pyexecutor/model_engine.py b/tensorrt_llm/_torch/pyexecutor/model_engine.py
index 1a22caf2d7..3e364ac9a9 100644
--- a/tensorrt_llm/_torch/pyexecutor/model_engine.py
+++ b/tensorrt_llm/_torch/pyexecutor/model_engine.py
@@ -1216,7 +1216,8 @@ class PyTorchModelEngine(ModelEngine):
             if next_draft_tokens_device is None or request.is_dummy or request.py_batch_idx is None:
                 # get token ids, including input token ids and draft token ids. For these dummy requests,
                 # no need to copy the token ids.
-                if not request.is_dummy:
+                if not (request.is_attention_dp_dummy
+                        or request.is_cuda_graph_dummy):
                     input_ids.append(request.get_last_tokens(0))
                     input_ids.extend(request.py_draft_tokens)
                     draft_tokens.extend(request.py_draft_tokens)