From c66941036ff01f2a7b8c3199379ddd66f3ed4506 Mon Sep 17 00:00:00 2001 From: Fanrong Li <23290157+lfr-0531@users.noreply.github.com> Date: Mon, 14 Jul 2025 09:41:27 +0800 Subject: [PATCH] fix: fix index out of bounds error in spec decoding (#5954) --- tensorrt_llm/_torch/pyexecutor/model_engine.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorrt_llm/_torch/pyexecutor/model_engine.py b/tensorrt_llm/_torch/pyexecutor/model_engine.py index 1a22caf2d7..3e364ac9a9 100644 --- a/tensorrt_llm/_torch/pyexecutor/model_engine.py +++ b/tensorrt_llm/_torch/pyexecutor/model_engine.py @@ -1216,7 +1216,8 @@ class PyTorchModelEngine(ModelEngine): if next_draft_tokens_device is None or request.is_dummy or request.py_batch_idx is None: # get token ids, including input token ids and draft token ids. For these dummy requests, # no need to copy the token ids. - if not request.is_dummy: + if not (request.is_attention_dp_dummy + or request.is_cuda_graph_dummy): input_ids.append(request.get_last_tokens(0)) input_ids.extend(request.py_draft_tokens) draft_tokens.extend(request.py_draft_tokens)