[TRTLLM-6835][fix] Fix potential hang caused by python multiprocessing when prefetching weights (#6927)

Signed-off-by: Lance Liao <108499334+lancelly@users.noreply.github.com>
2026-01-14 06:27:45 +08:00 · 2025-08-18 10:20:09 +08:00 · 2025-08-18 10:20:09 +08:00 · d9b9b5d053
commit d9b9b5d053
parent 7f7a301f6e
1 changed files with 5 additions and 4 deletions
--- a/tensorrt_llm/_torch/models/checkpoints/hf/weight_loader.py
+++ b/tensorrt_llm/_torch/models/checkpoints/hf/weight_loader.py
@ -1,6 +1,7 @@
 import glob
 import multiprocessing
 import os
+from concurrent.futures import ThreadPoolExecutor
 from typing import Any, List

 import psutil
@ -120,7 +121,7 @@ class HfWeightLoader(BaseWeightLoader):
        if len(local_file_names) == 0:
            return

-        max_processes = min(multiprocessing.cpu_count() * 2, 16,
-                            len(local_file_names))
-        with multiprocessing.Pool(processes=max_processes) as pool:
-            pool.map(self._prefetch_one_file, local_file_names)
+        max_workers = min(multiprocessing.cpu_count() * 2, 16,
+                          len(local_file_names))
+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
+            list(executor.map(self._prefetch_one_file, local_file_names))