[https://nvbugs/5474169][fix] seq_len mismatch between kv cache manager and graph attn metadata (#7606)

Signed-off-by: Hui Gao <huig@nvidia.com>
Signed-off-by: Wangshanshan <30051912+dominicshanshan@users.noreply.github.com>
This commit is contained in:
HuiGao-NV 2025-09-09 08:32:31 +08:00 committed by Yanchao Lu
parent 3cc16c2438
commit af34c9713a

View File

@ -486,6 +486,9 @@ class KvCacheCreator:
# When SWA is enabled, max_seq_len is updated inside kv_cache_manager.
if kv_cache_manager is not None:
if kv_cache_manager.max_seq_len < self._max_seq_len:
self._dummy_reqs = self._create_dummy_context_requests(
max(1, kv_cache_manager.max_seq_len - 1))
self._max_seq_len = kv_cache_manager.max_seq_len
return kv_cache_manager