[https://nvbugs/5791830][fix] fix pp loop hang caused by i-sending new requests (#10665)

Signed-off-by: Lizhi Zhou <1432185+reasonsolo@users.noreply.github.com>
2026-02-04 02:02:01 +08:00 · 2026-01-15 16:33:55 +08:00 · 2026-01-15 16:33:55 +08:00 · ff277b591e
commit ff277b591e
parent cd55fb4551
1 changed files with 10 additions and 1 deletions
--- a/tensorrt_llm/_torch/pyexecutor/executor_request_queue.py
+++ b/tensorrt_llm/_torch/pyexecutor/executor_request_queue.py
@ -1,6 +1,7 @@
 import dataclasses
 import datetime
 import heapq
+import os
 import queue
 import threading
 import time
@ -611,12 +612,20 @@ class ExecutorRequestQueue:
            with nvtx_range("recv_requests_from_prev_pp"):
                payloads = self.dist.recv_object(self.dist.prev_pp_rank, tag)

+        # isend new requests may cause deadlock, when CUDA_LAUNCH_BLOCKING=1 or PP microbatches can't overlap,
+        # the deadlock will happen deterministicly:
+        # 1. rank1 will wait on nccl.send(rank2), without invoking mpi.wait(isend-handle)
+        # 2. rank2 will wait on mpi.recv(rank1) but never receive the new requests.
+        # 3. rank1 will hang on nccl.send because rank2 will never reach nccl.recv(rank1).
+        pp_send_func = self.dist.isend_object if os.environ.get(
+            "TRTLLM_PP_REQ_SEND_ASYNC", "0") == "1" else self.dist.send_object
+
        if not self.dist.is_last_pp_rank:
            if self.send_requests_handler is not None:
                with nvtx_range("wait_prev_send_requests_handler"):
                    self.send_requests_handler.wait()
            with nvtx_range("send_requests_to_next_pp"):
-                self.send_requests_handler = self.dist.isend_object(
+                self.send_requests_handler = pp_send_func(
                    payloads, self.dist.next_pp_rank, tag)

        return payloads