[None][bug] AutoDeploy: fix regression in kv cache resize memory estimation (#10726)

Signed-off-by: Lucas Liebenwein <11156568+lucaslie@users.noreply.github.com>
2026-02-04 02:02:01 +08:00 · 2026-01-15 20:52:03 -05:00 · 2026-01-15 20:52:03 -05:00 · 49c6f73554
commit 49c6f73554
parent 5ad8cf6d5e
1 changed files with 3 additions and 1 deletions
--- a/tensorrt_llm/_torch/auto_deploy/transform/library/kvcache.py
+++ b/tensorrt_llm/_torch/auto_deploy/transform/library/kvcache.py
@ -358,7 +358,9 @@ class ResizeKVCache(BaseTransform):
            f"Extra memory used during forward pass (MB): {mem_used_during_forward_pass_mb}"
        )

-        free_mem_post, _ = get_mem_info_in_mb(empty_cache=True)
+        # TODO (lucaslie): logic needs overhaul, too much going on. For now, this is just reverting
+        # to the original logic. Full overhaulwill be done as part of #10013
+        free_mem_post, _ = get_mem_info_in_mb(empty_cache=False)
        self._log_info(f"Free memory after forward pass (MB): {free_mem_post}")

        memory_for_forward_pass = free_mem_pre - free_mem_post