mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-02-16 07:53:55 +08:00
[https://nvbugs/5868038][fix] Gracefully terminate disagg serving servers to prevent leftover subprocess warnings (#11395)
Signed-off-by: peihu-nv <259410613+peihu-nv@users.noreply.github.com>
This commit is contained in:
parent
860054c859
commit
a982554190
@ -450,6 +450,30 @@ def launch_disaggregated_llm(
|
||||
_show_kvcache_time(kv_cache_perf_dir)
|
||||
_get_perf_metrics()
|
||||
|
||||
# Gracefully shut down all server processes
|
||||
all_processes = list(
|
||||
itertools.chain(ctx_processes, gen_processes, server_processes))
|
||||
|
||||
# SIGTERM triggers llm.shutdown() inside each trtllm-serve, cleaning up the executor and MPI workers.
|
||||
for process in all_processes:
|
||||
if process.poll() is None:
|
||||
process.terminate()
|
||||
|
||||
# Wait up to 5s total, then SIGKILL any process that doesn't exit.
|
||||
# This is a safety net for when llm.shutdown() hangs.
|
||||
deadline = time.monotonic() + 5
|
||||
for process in all_processes:
|
||||
remaining = max(0, deadline - time.monotonic())
|
||||
try:
|
||||
process.wait(timeout=remaining)
|
||||
except subprocess.TimeoutExpired:
|
||||
try:
|
||||
process.kill()
|
||||
except ProcessLookupError:
|
||||
pass # already exited between timeout and kill
|
||||
except OSError:
|
||||
pass # process already gone
|
||||
|
||||
|
||||
def run_parallel_test(model_name: str,
|
||||
model_path: str,
|
||||
|
||||
Loading…
Reference in New Issue
Block a user