fix: fix index out of bounds error in spec decoding (#5954)

2026-01-14 06:27:45 +08:00 · 2025-07-14 09:41:27 +08:00 · 2025-07-14 09:41:27 +08:00 · c66941036f
commit c66941036f
parent 9d26b7891a
1 changed files with 2 additions and 1 deletions
--- a/tensorrt_llm/_torch/pyexecutor/model_engine.py
+++ b/tensorrt_llm/_torch/pyexecutor/model_engine.py
@ -1216,7 +1216,8 @@ class PyTorchModelEngine(ModelEngine):
            if next_draft_tokens_device is None or request.is_dummy or request.py_batch_idx is None:
                # get token ids, including input token ids and draft token ids. For these dummy requests,
                # no need to copy the token ids.
-                if not request.is_dummy:
+                if not (request.is_attention_dp_dummy
+                        or request.is_cuda_graph_dummy):
                    input_ids.append(request.get_last_tokens(0))
                    input_ids.extend(request.py_draft_tokens)
                    draft_tokens.extend(request.py_draft_tokens)