chore: remove support for llmapi + TRT backend in Triton (#5856)

Signed-off-by: Aurelien Chartier <2567591+achartier@users.noreply.github.com>
This commit is contained in:
Aurelien Chartier 2025-07-09 21:30:34 -07:00 committed by GitHub
parent e289a98d5a
commit 3ec3ff1d82
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 3 additions and 27 deletions

View File

@ -42,9 +42,7 @@ from helpers import (get_input_tensor_by_name, get_output_config_from_request,
from mpi4py.futures import MPICommExecutor
from mpi4py.MPI import COMM_WORLD
from tensorrt_llm import LLM as PyTorchLLM
from tensorrt_llm import SamplingParams
from tensorrt_llm._tensorrt_engine import LLM
from tensorrt_llm import LLM, SamplingParams
from tensorrt_llm._utils import global_mpi_rank, global_mpi_size
from tensorrt_llm.llmapi.llm_utils import update_llm_args_with_extra_dict
@ -200,15 +198,8 @@ class TritonPythonModel:
# Create LLM in a thread to avoid blocking
loop = asyncio.get_running_loop()
try:
backend = self.llm_engine_args.get("backend", None)
# Update LLM engine args with disaggregated config if present
if backend == "pytorch":
llm = await loop.run_in_executor(
None, lambda: PyTorchLLM(**self.llm_engine_args))
else:
self.llm_engine_args["pytorch_backend_config"] = None
llm = await loop.run_in_executor(
None, lambda: LLM(**self.llm_engine_args))
llm = await loop.run_in_executor(
None, lambda: LLM(**self.llm_engine_args))
yield llm
finally:
if 'llm' in locals():

View File

@ -1,15 +0,0 @@
#!/bin/bash
set -ex
cd /code/
function serve {
export UCX_UD_TIMEOUT=120s
export PMIX_MCA_gds=hash # Required
/opt/tritonserver/bin/tritonserver --model-repo llmapi_repo
}
# task
nvidia-smi
serve