From af34c9713a7019111ffb0c967b27d237a2da802d Mon Sep 17 00:00:00 2001 From: HuiGao-NV Date: Tue, 9 Sep 2025 08:32:31 +0800 Subject: [PATCH] [https://nvbugs/5474169][fix] seq_len mismatch between kv cache manager and graph attn metadata (#7606) Signed-off-by: Hui Gao Signed-off-by: Wangshanshan <30051912+dominicshanshan@users.noreply.github.com> --- tensorrt_llm/_torch/pyexecutor/_util.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorrt_llm/_torch/pyexecutor/_util.py b/tensorrt_llm/_torch/pyexecutor/_util.py index 16986acaa1..e8d68a5938 100644 --- a/tensorrt_llm/_torch/pyexecutor/_util.py +++ b/tensorrt_llm/_torch/pyexecutor/_util.py @@ -486,6 +486,9 @@ class KvCacheCreator: # When SWA is enabled, max_seq_len is updated inside kv_cache_manager. if kv_cache_manager is not None: + if kv_cache_manager.max_seq_len < self._max_seq_len: + self._dummy_reqs = self._create_dummy_context_requests( + max(1, kv_cache_manager.max_seq_len - 1)) self._max_seq_len = kv_cache_manager.max_seq_len return kv_cache_manager