mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-02-05 02:31:33 +08:00
[https://nvbugs/5811697][fix] Fix buffer reuse. (#10716)
Signed-off-by: Yuxian Qiu <142763828+yuxianq@users.noreply.github.com> Signed-off-by: Wangshanshan <30051912+dominicshanshan@users.noreply.github.com>
This commit is contained in:
parent
4b833492fb
commit
2b3bb2e9b0
@ -79,16 +79,20 @@ class Buffers:
|
||||
best_fit_block = block
|
||||
smallest_sufficient_size = block.buffer.numel()
|
||||
|
||||
if reserve_buffer and best_fit_block is not None:
|
||||
if best_fit_block is not None:
|
||||
if reserve_buffer:
|
||||
best_fit_block.is_reserved = True
|
||||
# A suitable buffer was found, so reuse it.
|
||||
best_fit_block.is_reserved = True
|
||||
return self._view_as(best_fit_block.buffer, tensor_shape, dtype)
|
||||
|
||||
for block in list(candidate_blocks):
|
||||
if not block.is_reserved:
|
||||
# Need to call del BufferBlock.buffer, otherwise memory isn't
|
||||
# released and OOM may happen.
|
||||
buffer_size = block.buffer.numel()
|
||||
del block.buffer
|
||||
if buffer_size >= 1024 * 1024 * 1024:
|
||||
torch.cuda.empty_cache()
|
||||
candidate_blocks.remove(block)
|
||||
|
||||
# No suitable buffer was found, so allocate a new one.
|
||||
|
||||
Loading…
Reference in New Issue
Block a user