mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
Add sleep function for disagg gen-only benchmarking (#5398)
Signed-off-by: Xianjie <5410381+qiaoxj07@users.noreply.github.com>
This commit is contained in:
parent
feaf789342
commit
1e4fa13d33
@ -236,6 +236,8 @@ class PyExecutor:
|
||||
self.ctx_in_transmission_requests = []
|
||||
self.previous_batch: Optional[BatchState] = None
|
||||
self.num_scheduled_requests: int = 0
|
||||
self.benchmark_req_queues_size = int(
|
||||
os.environ.get("TLLM_BENCHMARK_REQ_QUEUES_SIZE", 0))
|
||||
|
||||
# list of requests in each PP micro batch
|
||||
self.num_micro_batches = self.dist.pp_size
|
||||
@ -996,6 +998,13 @@ class PyExecutor:
|
||||
|
||||
def _executor_loop_overlap(self):
|
||||
torch.cuda.set_device(self.device_id)
|
||||
if self.dist.rank == 0 and not self.is_warmup and self.benchmark_req_queues_size > 0 and self.kv_cache_transceiver:
|
||||
while self.request_queue.qsize() < self.benchmark_req_queues_size:
|
||||
logger.info(
|
||||
f"sleep 5 seconds, num_request_queue: {self.request_queue.qsize()}"
|
||||
)
|
||||
time.sleep(5)
|
||||
|
||||
with self._profiler() as profile_step:
|
||||
iter_start_time = time.time()
|
||||
iter_stats = None
|
||||
|
||||
Loading…
Reference in New Issue
Block a user