mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 22:18:36 +08:00
chore: remove support for llmapi + TRT backend in Triton (#5856)
Signed-off-by: Aurelien Chartier <2567591+achartier@users.noreply.github.com>
This commit is contained in:
parent
e289a98d5a
commit
3ec3ff1d82
@ -42,9 +42,7 @@ from helpers import (get_input_tensor_by_name, get_output_config_from_request,
|
||||
from mpi4py.futures import MPICommExecutor
|
||||
from mpi4py.MPI import COMM_WORLD
|
||||
|
||||
from tensorrt_llm import LLM as PyTorchLLM
|
||||
from tensorrt_llm import SamplingParams
|
||||
from tensorrt_llm._tensorrt_engine import LLM
|
||||
from tensorrt_llm import LLM, SamplingParams
|
||||
from tensorrt_llm._utils import global_mpi_rank, global_mpi_size
|
||||
from tensorrt_llm.llmapi.llm_utils import update_llm_args_with_extra_dict
|
||||
|
||||
@ -200,15 +198,8 @@ class TritonPythonModel:
|
||||
# Create LLM in a thread to avoid blocking
|
||||
loop = asyncio.get_running_loop()
|
||||
try:
|
||||
backend = self.llm_engine_args.get("backend", None)
|
||||
# Update LLM engine args with disaggregated config if present
|
||||
if backend == "pytorch":
|
||||
llm = await loop.run_in_executor(
|
||||
None, lambda: PyTorchLLM(**self.llm_engine_args))
|
||||
else:
|
||||
self.llm_engine_args["pytorch_backend_config"] = None
|
||||
llm = await loop.run_in_executor(
|
||||
None, lambda: LLM(**self.llm_engine_args))
|
||||
llm = await loop.run_in_executor(
|
||||
None, lambda: LLM(**self.llm_engine_args))
|
||||
yield llm
|
||||
finally:
|
||||
if 'llm' in locals():
|
||||
|
||||
@ -1,15 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -ex
|
||||
|
||||
cd /code/
|
||||
|
||||
function serve {
|
||||
export UCX_UD_TIMEOUT=120s
|
||||
export PMIX_MCA_gds=hash # Required
|
||||
|
||||
/opt/tritonserver/bin/tritonserver --model-repo llmapi_repo
|
||||
}
|
||||
|
||||
# task
|
||||
nvidia-smi
|
||||
serve
|
||||
Loading…
Reference in New Issue
Block a user