mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 22:18:36 +08:00
Merge 6de8d90b31 into 6df2c8a074
This commit is contained in:
commit
4deec9182f
@ -2789,9 +2789,23 @@ class PyExecutor:
|
||||
def _pause_requests(self, requests_to_pause):
|
||||
# todo: support work with self.inflight_req_ids.
|
||||
# Currently, self.inflight_req_ids is not.
|
||||
MAX_PAUSES_PER_STEP = 8
|
||||
max_input_len = self.max_input_len
|
||||
pauses_remaining = MAX_PAUSES_PER_STEP
|
||||
|
||||
for req in requests_to_pause:
|
||||
if pauses_remaining <= 0:
|
||||
break
|
||||
|
||||
if getattr(req, "_paused", False):
|
||||
continue
|
||||
|
||||
if req.request_id in self.inflight_req_ids:
|
||||
continue
|
||||
|
||||
req.pause(max_input_len)
|
||||
req._paused = True
|
||||
pauses_remaining -= 1
|
||||
self._terminate_request(req)
|
||||
|
||||
def _add_inflight_ids(self, scheduled_requests):
|
||||
|
||||
Loading…
Reference in New Issue
Block a user