From 6de8d90b31d20c59bef4f6af2648380e55173338 Mon Sep 17 00:00:00 2001 From: Srihari Unnikrishnan Date: Mon, 15 Dec 2025 11:43:39 +0530 Subject: [PATCH] Fix crash under MAX_UTILIZATION by making pause logic idempotent and bounded Signed-off-by: Srihari Unnikrishnan Developer Certificate of Origin Version 1.1 Copyright (C) 2004, 2006 The Linux Foundation and its contributors. 1 Letterman Drive Suite D4700 San Francisco, CA, 94129 Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Developer's Certificate of Origin 1.1 By making a contribution to this project, I certify that: (a) The contribution was created in whole or in part by me and I have the right to submit it under the open source license indicated in the file; or (b) The contribution is based upon previous work that, to the best of my knowledge, is covered under an appropriate open source license and I have the right under that license to submit that work with modifications, whether created in whole or in part by me, under the same open source license (unless I am permitted to submit under a different license), as indicated in the file; or (c) The contribution was provided directly to me by some other person who certified (a), (b) or (c) and I have not modified it. (d) I understand and agree that this project and the contribution are public and that a record of the contribution (including all personal information I submit with it, including my sign-off) is maintained indefinitely and may be redistributed consistent with this project or the open source license(s) involved. --- tensorrt_llm/_torch/pyexecutor/py_executor.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tensorrt_llm/_torch/pyexecutor/py_executor.py b/tensorrt_llm/_torch/pyexecutor/py_executor.py index 4f8bc8820e..5a8787e9df 100644 --- a/tensorrt_llm/_torch/pyexecutor/py_executor.py +++ b/tensorrt_llm/_torch/pyexecutor/py_executor.py @@ -2628,9 +2628,23 @@ class PyExecutor: def _pause_requests(self, requests_to_pause): # todo: support work with self.inflight_req_ids. # Currently, self.inflight_req_ids is not. + MAX_PAUSES_PER_STEP = 8 max_input_len = self.max_input_len + pauses_remaining = MAX_PAUSES_PER_STEP + for req in requests_to_pause: + if pauses_remaining <= 0: + break + + if getattr(req, "_paused", False): + continue + + if req.request_id in self.inflight_req_ids: + continue + req.pause(max_input_len) + req._paused = True + pauses_remaining -= 1 self._terminate_request(req) def _add_inflight_ids(self, scheduled_requests):