mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-02-04 02:02:01 +08:00
[None][bug] AutoDeploy: fix regression in kv cache resize memory estimation (#10726)
Signed-off-by: Lucas Liebenwein <11156568+lucaslie@users.noreply.github.com>
This commit is contained in:
parent
5ad8cf6d5e
commit
49c6f73554
@ -358,7 +358,9 @@ class ResizeKVCache(BaseTransform):
|
||||
f"Extra memory used during forward pass (MB): {mem_used_during_forward_pass_mb}"
|
||||
)
|
||||
|
||||
free_mem_post, _ = get_mem_info_in_mb(empty_cache=True)
|
||||
# TODO (lucaslie): logic needs overhaul, too much going on. For now, this is just reverting
|
||||
# to the original logic. Full overhaulwill be done as part of #10013
|
||||
free_mem_post, _ = get_mem_info_in_mb(empty_cache=False)
|
||||
self._log_info(f"Free memory after forward pass (MB): {free_mem_post}")
|
||||
|
||||
memory_for_forward_pass = free_mem_pre - free_mem_post
|
||||
|
||||
Loading…
Reference in New Issue
Block a user