chore: remove support for llmapi + TRT backend in Triton (#5856)

Signed-off-by: Aurelien Chartier <2567591+achartier@users.noreply.github.com>
2026-01-13 22:18:36 +08:00 · 2025-07-09 21:30:34 -07:00 · 2025-07-09 21:30:34 -07:00 · 3ec3ff1d82
commit 3ec3ff1d82
parent e289a98d5a
2 changed files with 3 additions and 27 deletions
--- a/triton_backend/all_models/llmapi/tensorrt_llm/1/model.py
+++ b/triton_backend/all_models/llmapi/tensorrt_llm/1/model.py
@ -42,9 +42,7 @@ from helpers import (get_input_tensor_by_name, get_output_config_from_request,
 from mpi4py.futures import MPICommExecutor
 from mpi4py.MPI import COMM_WORLD

-from tensorrt_llm import LLM as PyTorchLLM
-from tensorrt_llm import SamplingParams
-from tensorrt_llm._tensorrt_engine import LLM
+from tensorrt_llm import LLM, SamplingParams
 from tensorrt_llm._utils import global_mpi_rank, global_mpi_size
 from tensorrt_llm.llmapi.llm_utils import update_llm_args_with_extra_dict

@ -200,15 +198,8 @@ class TritonPythonModel:
            # Create LLM in a thread to avoid blocking
            loop = asyncio.get_running_loop()
            try:
-                backend = self.llm_engine_args.get("backend", None)
-                # Update LLM engine args with disaggregated config if present
-                if backend == "pytorch":
-                    llm = await loop.run_in_executor(
-                        None, lambda: PyTorchLLM(**self.llm_engine_args))
-                else:
-                    self.llm_engine_args["pytorch_backend_config"] = None
-                    llm = await loop.run_in_executor(
-                        None, lambda: LLM(**self.llm_engine_args))
+                llm = await loop.run_in_executor(
+                    None, lambda: LLM(**self.llm_engine_args))
                yield llm
            finally:
                if 'llm' in locals():
--- a/triton_backend/scripts/triton_task.sh
+++ b/triton_backend/scripts/triton_task.sh
@ -1,15 +0,0 @@
-#!/bin/bash
-set -ex
-
-cd /code/
-
-function serve {
-    export UCX_UD_TIMEOUT=120s
-    export PMIX_MCA_gds=hash # Required
-
-    /opt/tritonserver/bin/tritonserver --model-repo llmapi_repo
-}
-
-# task
-nvidia-smi
-serve