mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-02-05 02:31:33 +08:00
[https://nvbugs/5816267][fix] Remove weight tensor holder to release memory earlier (#10876)
Signed-off-by: Dongxu Yang <78518666+dongxuy04@users.noreply.github.com>
This commit is contained in:
parent
5450485bec
commit
635cbf01ba
@ -256,6 +256,9 @@ class ModelLoader:
|
||||
f"Fallback to regular model init: {traceback.format_exc(limit=10)}\n"
|
||||
)
|
||||
model = AutoModelForCausalLM.from_config(config)
|
||||
finally:
|
||||
if 'memo' in locals():
|
||||
del memo
|
||||
|
||||
model.to("cuda")
|
||||
rank_model_storage = get_rank_model_storage(model)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user