mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
fix: fix hang in mgmn with trtllm-llmapi-launch command (#3119)
* init Signed-off-by: Superjomn <328693+Superjomn@users.noreply.github.com> * restore Signed-off-by: Superjomn <328693+Superjomn@users.noreply.github.com> --------- Signed-off-by: Superjomn <328693+Superjomn@users.noreply.github.com>
This commit is contained in:
parent
0976360204
commit
87ab794aa2
@ -435,13 +435,14 @@ class ExecutorBindingsWorker(GenerationExecutor):
|
|||||||
self._client_id_to_request_id.pop(client_id, None)
|
self._client_id_to_request_id.pop(client_id, None)
|
||||||
|
|
||||||
def shutdown(self):
|
def shutdown(self):
|
||||||
print_colored_debug(f'Worker {mpi_rank()} shutdown...\n', "yellow")
|
|
||||||
|
|
||||||
if self.doing_shutdown:
|
if self.doing_shutdown:
|
||||||
return
|
return
|
||||||
else:
|
else:
|
||||||
self.doing_shutdown = True
|
self.doing_shutdown = True
|
||||||
|
|
||||||
|
print_colored_debug(f'Worker {mpi_rank()} shutdown...\n', "yellow")
|
||||||
|
|
||||||
if self.engine is not None:
|
if self.engine is not None:
|
||||||
if self.engine.can_enqueue_requests():
|
if self.engine.can_enqueue_requests():
|
||||||
|
|
||||||
@ -500,6 +501,10 @@ def worker_main(
|
|||||||
is_llm_executor: Optional[
|
is_llm_executor: Optional[
|
||||||
bool] = True, # whether it's the main executor instance
|
bool] = True, # whether it's the main executor instance
|
||||||
) -> None:
|
) -> None:
|
||||||
|
mpi_comm().barrier()
|
||||||
|
print_colored_debug(f"Worker {mpi_rank()} entering worker_main...\n",
|
||||||
|
"green")
|
||||||
|
|
||||||
pid = os.getpid()
|
pid = os.getpid()
|
||||||
cpus = os.sched_getaffinity(pid)
|
cpus = os.sched_getaffinity(pid)
|
||||||
if cpus:
|
if cpus:
|
||||||
@ -608,6 +613,10 @@ def worker_main(
|
|||||||
# and handled by future.done_callback, that will propagate the
|
# and handled by future.done_callback, that will propagate the
|
||||||
# error to the error_queue in the main thread.
|
# error to the error_queue in the main thread.
|
||||||
|
|
||||||
|
mpi_comm().barrier()
|
||||||
|
print_colored_debug(f"Worker {mpi_rank()} ready to setup backend...\n",
|
||||||
|
"green")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
worker: ExecutorBindingsWorker = worker_cls(
|
worker: ExecutorBindingsWorker = worker_cls(
|
||||||
engine,
|
engine,
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user