Add sleep function for disagg gen-only benchmarking (#5398)

Signed-off-by: Xianjie <5410381+qiaoxj07@users.noreply.github.com>
This commit is contained in:
Xianjie Qiao 2025-06-26 07:32:16 +08:00 committed by GitHub
parent feaf789342
commit 1e4fa13d33
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -236,6 +236,8 @@ class PyExecutor:
self.ctx_in_transmission_requests = []
self.previous_batch: Optional[BatchState] = None
self.num_scheduled_requests: int = 0
self.benchmark_req_queues_size = int(
os.environ.get("TLLM_BENCHMARK_REQ_QUEUES_SIZE", 0))
# list of requests in each PP micro batch
self.num_micro_batches = self.dist.pp_size
@ -996,6 +998,13 @@ class PyExecutor:
def _executor_loop_overlap(self):
torch.cuda.set_device(self.device_id)
if self.dist.rank == 0 and not self.is_warmup and self.benchmark_req_queues_size > 0 and self.kv_cache_transceiver:
while self.request_queue.qsize() < self.benchmark_req_queues_size:
logger.info(
f"sleep 5 seconds, num_request_queue: {self.request_queue.qsize()}"
)
time.sleep(5)
with self._profiler() as profile_step:
iter_start_time = time.time()
iter_stats = None