diff --git a/tensorrt_llm/_torch/pyexecutor/model_loader.py b/tensorrt_llm/_torch/pyexecutor/model_loader.py index cc44248ebc..9ca714afcc 100644 --- a/tensorrt_llm/_torch/pyexecutor/model_loader.py +++ b/tensorrt_llm/_torch/pyexecutor/model_loader.py @@ -256,6 +256,9 @@ class ModelLoader: f"Fallback to regular model init: {traceback.format_exc(limit=10)}\n" ) model = AutoModelForCausalLM.from_config(config) + finally: + if 'memo' in locals(): + del memo model.to("cuda") rank_model_storage = get_rank_model_storage(model)