mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
fix: fix hang in mgmn with trtllm-llmapi-launch command (#3119)
* init Signed-off-by: Superjomn <328693+Superjomn@users.noreply.github.com> * restore Signed-off-by: Superjomn <328693+Superjomn@users.noreply.github.com> --------- Signed-off-by: Superjomn <328693+Superjomn@users.noreply.github.com>
This commit is contained in:
parent
0976360204
commit
87ab794aa2
@ -435,13 +435,14 @@ class ExecutorBindingsWorker(GenerationExecutor):
|
||||
self._client_id_to_request_id.pop(client_id, None)
|
||||
|
||||
def shutdown(self):
|
||||
print_colored_debug(f'Worker {mpi_rank()} shutdown...\n', "yellow")
|
||||
|
||||
if self.doing_shutdown:
|
||||
return
|
||||
else:
|
||||
self.doing_shutdown = True
|
||||
|
||||
print_colored_debug(f'Worker {mpi_rank()} shutdown...\n', "yellow")
|
||||
|
||||
if self.engine is not None:
|
||||
if self.engine.can_enqueue_requests():
|
||||
|
||||
@ -500,6 +501,10 @@ def worker_main(
|
||||
is_llm_executor: Optional[
|
||||
bool] = True, # whether it's the main executor instance
|
||||
) -> None:
|
||||
mpi_comm().barrier()
|
||||
print_colored_debug(f"Worker {mpi_rank()} entering worker_main...\n",
|
||||
"green")
|
||||
|
||||
pid = os.getpid()
|
||||
cpus = os.sched_getaffinity(pid)
|
||||
if cpus:
|
||||
@ -608,6 +613,10 @@ def worker_main(
|
||||
# and handled by future.done_callback, that will propagate the
|
||||
# error to the error_queue in the main thread.
|
||||
|
||||
mpi_comm().barrier()
|
||||
print_colored_debug(f"Worker {mpi_rank()} ready to setup backend...\n",
|
||||
"green")
|
||||
|
||||
try:
|
||||
worker: ExecutorBindingsWorker = worker_cls(
|
||||
engine,
|
||||
|
||||
Loading…
Reference in New Issue
Block a user