[TRTLLM-6835][fix] Fix potential hang caused by python multiprocessing when prefetching weights (#6927)

Signed-off-by: Lance Liao <108499334+lancelly@users.noreply.github.com>
This commit is contained in:
Liao Lanyu 2025-08-18 10:20:09 +08:00 committed by GitHub
parent 7f7a301f6e
commit d9b9b5d053
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,6 +1,7 @@
import glob
import multiprocessing
import os
from concurrent.futures import ThreadPoolExecutor
from typing import Any, List
import psutil
@ -120,7 +121,7 @@ class HfWeightLoader(BaseWeightLoader):
if len(local_file_names) == 0:
return
max_processes = min(multiprocessing.cpu_count() * 2, 16,
len(local_file_names))
with multiprocessing.Pool(processes=max_processes) as pool:
pool.map(self._prefetch_one_file, local_file_names)
max_workers = min(multiprocessing.cpu_count() * 2, 16,
len(local_file_names))
with ThreadPoolExecutor(max_workers=max_workers) as executor:
list(executor.map(self._prefetch_one_file, local_file_names))