mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
[https://nvbugs/5474169][fix] seq_len mismatch between kv cache manager and graph attn metadata (#7606)
Signed-off-by: Hui Gao <huig@nvidia.com> Signed-off-by: Wangshanshan <30051912+dominicshanshan@users.noreply.github.com>
This commit is contained in:
parent
3cc16c2438
commit
af34c9713a
@ -486,6 +486,9 @@ class KvCacheCreator:
|
||||
|
||||
# When SWA is enabled, max_seq_len is updated inside kv_cache_manager.
|
||||
if kv_cache_manager is not None:
|
||||
if kv_cache_manager.max_seq_len < self._max_seq_len:
|
||||
self._dummy_reqs = self._create_dummy_context_requests(
|
||||
max(1, kv_cache_manager.max_seq_len - 1))
|
||||
self._max_seq_len = kv_cache_manager.max_seq_len
|
||||
|
||||
return kv_cache_manager
|
||||
|
||||
Loading…
Reference in New Issue
Block a user