From 2b3bb2e9b0a015549b6d87c2a092047c76f6f628 Mon Sep 17 00:00:00 2001 From: Yuxian Qiu <142763828+yuxianq@users.noreply.github.com> Date: Mon, 19 Jan 2026 10:11:18 +0800 Subject: [PATCH] [https://nvbugs/5811697][fix] Fix buffer reuse. (#10716) Signed-off-by: Yuxian Qiu <142763828+yuxianq@users.noreply.github.com> Signed-off-by: Wangshanshan <30051912+dominicshanshan@users.noreply.github.com> --- tensorrt_llm/_torch/memory_buffer_utils.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tensorrt_llm/_torch/memory_buffer_utils.py b/tensorrt_llm/_torch/memory_buffer_utils.py index 8d75cd354a..3be29e30a7 100644 --- a/tensorrt_llm/_torch/memory_buffer_utils.py +++ b/tensorrt_llm/_torch/memory_buffer_utils.py @@ -79,16 +79,20 @@ class Buffers: best_fit_block = block smallest_sufficient_size = block.buffer.numel() - if reserve_buffer and best_fit_block is not None: + if best_fit_block is not None: + if reserve_buffer: + best_fit_block.is_reserved = True # A suitable buffer was found, so reuse it. - best_fit_block.is_reserved = True return self._view_as(best_fit_block.buffer, tensor_shape, dtype) for block in list(candidate_blocks): if not block.is_reserved: # Need to call del BufferBlock.buffer, otherwise memory isn't # released and OOM may happen. + buffer_size = block.buffer.numel() del block.buffer + if buffer_size >= 1024 * 1024 * 1024: + torch.cuda.empty_cache() candidate_blocks.remove(block) # No suitable buffer was found, so allocate a new one.