diff --git a/tensorrt_llm/executor/worker.py b/tensorrt_llm/executor/worker.py index 2504f6e5cb..36eb4bf0a8 100644 --- a/tensorrt_llm/executor/worker.py +++ b/tensorrt_llm/executor/worker.py @@ -435,13 +435,14 @@ class ExecutorBindingsWorker(GenerationExecutor): self._client_id_to_request_id.pop(client_id, None) def shutdown(self): - print_colored_debug(f'Worker {mpi_rank()} shutdown...\n', "yellow") if self.doing_shutdown: return else: self.doing_shutdown = True + print_colored_debug(f'Worker {mpi_rank()} shutdown...\n', "yellow") + if self.engine is not None: if self.engine.can_enqueue_requests(): @@ -500,6 +501,10 @@ def worker_main( is_llm_executor: Optional[ bool] = True, # whether it's the main executor instance ) -> None: + mpi_comm().barrier() + print_colored_debug(f"Worker {mpi_rank()} entering worker_main...\n", + "green") + pid = os.getpid() cpus = os.sched_getaffinity(pid) if cpus: @@ -608,6 +613,10 @@ def worker_main( # and handled by future.done_callback, that will propagate the # error to the error_queue in the main thread. + mpi_comm().barrier() + print_colored_debug(f"Worker {mpi_rank()} ready to setup backend...\n", + "green") + try: worker: ExecutorBindingsWorker = worker_cls( engine,