mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 22:18:36 +08:00
[TRTLLM-9079][infra] upgrade tritonserver DLFW 25.10 (#8929)
Signed-off-by: ZhanruiSunCh <184402041+ZhanruiSunCh@users.noreply.github.com>
This commit is contained in:
parent
83122bfd64
commit
bdcf837784
@ -2,8 +2,7 @@
|
||||
ARG BASE_IMAGE=nvcr.io/nvidia/pytorch
|
||||
ARG TRITON_IMAGE=nvcr.io/nvidia/tritonserver
|
||||
ARG BASE_TAG=25.10-py3
|
||||
# [TODO] Update to NVIDIA Triton 25.10 when it's available
|
||||
ARG TRITON_BASE_TAG=25.09-py3
|
||||
ARG TRITON_BASE_TAG=25.10-py3
|
||||
ARG DEVEL_IMAGE=devel
|
||||
|
||||
FROM ${BASE_IMAGE}:${BASE_TAG} AS base
|
||||
|
||||
@ -454,7 +454,7 @@ def runLLMBuild(pipeline, buildFlags, tarName, is_linux_x86_64)
|
||||
def llmPath = sh (script: "realpath ${LLM_ROOT}",returnStdout: true).trim()
|
||||
// TODO: Remove after the cmake version is upgraded to 3.31.8
|
||||
// Get triton tag from docker/dockerfile.multi
|
||||
def tritonShortTag = "r25.09"
|
||||
def tritonShortTag = "r25.10"
|
||||
sh "cd ${LLM_ROOT}/triton_backend/inflight_batcher_llm && mkdir build && cd build && cmake .. -DTRTLLM_DIR=${llmPath} -DTRITON_COMMON_REPO_TAG=${tritonShortTag} -DTRITON_CORE_REPO_TAG=${tritonShortTag} -DTRITON_THIRD_PARTY_REPO_TAG=${tritonShortTag} -DTRITON_BACKEND_REPO_TAG=${tritonShortTag} -DUSE_CXX11_ABI=ON && make -j${buildJobs} install"
|
||||
|
||||
// Step 3: packaging wheels into tarfile
|
||||
|
||||
@ -1527,7 +1527,7 @@ def launchTestListCheck(pipeline)
|
||||
sh "tar -zxf ${tarName}"
|
||||
def llmPath = sh (script: "realpath .", returnStdout: true).trim()
|
||||
def llmSrc = "${llmPath}/TensorRT-LLM/src"
|
||||
sh "NVIDIA_TRITON_SERVER_VERSION=25.09 LLM_ROOT=${llmSrc} LLM_BACKEND_ROOT=${llmSrc}/triton_backend python3 ${llmSrc}/scripts/check_test_list.py --l0 --qa --waive"
|
||||
sh "NVIDIA_TRITON_SERVER_VERSION=25.10 LLM_ROOT=${llmSrc} LLM_BACKEND_ROOT=${llmSrc}/triton_backend python3 ${llmSrc}/scripts/check_test_list.py --l0 --qa --waive"
|
||||
} catch (InterruptedException e) {
|
||||
throw e
|
||||
} catch (Exception e) {
|
||||
|
||||
@ -13,7 +13,7 @@
|
||||
# images are adopted from PostMerge pipelines, the abbreviated commit hash is used instead.
|
||||
IMAGE_NAME=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm
|
||||
|
||||
LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.10-py3-x86_64-ubuntu24.04-trt10.13.3.9-skip-tritondevel-202511110140-8447
|
||||
LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.10-py3-aarch64-ubuntu24.04-trt10.13.3.9-skip-tritondevel-202511110140-8447
|
||||
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.2-devel-rocky8-x86_64-rocky8-py310-trt10.13.3.9-skip-tritondevel-202511110140-8447
|
||||
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.2-devel-rocky8-x86_64-rocky8-py312-trt10.13.3.9-skip-tritondevel-202511110140-8447
|
||||
LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.10-py3-x86_64-ubuntu24.04-trt10.13.3.9-skip-tritondevel-202511131803-8929
|
||||
LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.10-py3-aarch64-ubuntu24.04-trt10.13.3.9-skip-tritondevel-202511131803-8929
|
||||
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.2-devel-rocky8-x86_64-rocky8-py310-trt10.13.3.9-skip-tritondevel-202511131803-8929
|
||||
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.2-devel-rocky8-x86_64-rocky8-py312-trt10.13.3.9-skip-tritondevel-202511131803-8929
|
||||
|
||||
@ -19,7 +19,7 @@ pandas
|
||||
h5py==3.12.1
|
||||
StrEnum
|
||||
sentencepiece>=0.1.99
|
||||
tensorrt~=10.13.0
|
||||
tensorrt~=10.13.3
|
||||
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-10.html#rel-25-10 uses 2.9.0a0.
|
||||
torch>=2.9.0a0,<=2.9.0
|
||||
torchvision
|
||||
|
||||
Loading…
Reference in New Issue
Block a user