[https://nvbugs/5811697][fix] Fix buffer reuse. (#10716)

Signed-off-by: Yuxian Qiu <142763828+yuxianq@users.noreply.github.com> Signed-off-by: Wangshanshan <30051912+dominicshanshan@users.noreply.github.com>
2026-02-05 02:31:33 +08:00 · 2026-01-19 10:11:18 +08:00 · 2026-01-19 10:11:18 +08:00 · 2b3bb2e9b0
commit 2b3bb2e9b0
parent 4b833492fb
1 changed files with 6 additions and 2 deletions
--- a/tensorrt_llm/_torch/memory_buffer_utils.py
+++ b/tensorrt_llm/_torch/memory_buffer_utils.py
@ -79,16 +79,20 @@ class Buffers:
                best_fit_block = block
                smallest_sufficient_size = block.buffer.numel()

-        if reserve_buffer and best_fit_block is not None:
+        if best_fit_block is not None:
+            if reserve_buffer:
+                best_fit_block.is_reserved = True
            # A suitable buffer was found, so reuse it.
-            best_fit_block.is_reserved = True
            return self._view_as(best_fit_block.buffer, tensor_shape, dtype)

        for block in list(candidate_blocks):
            if not block.is_reserved:
                # Need to call del BufferBlock.buffer, otherwise memory isn't
                # released and OOM may happen.
+                buffer_size = block.buffer.numel()
                del block.buffer
+                if buffer_size >= 1024 * 1024 * 1024:
+                    torch.cuda.empty_cache()
                candidate_blocks.remove(block)

        # No suitable buffer was found, so allocate a new one.