mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 22:18:36 +08:00
[TRTLLM-6224][infra] Upgrade dependencies to DLFW 25.06 and CUDA 12.9.1 (#5678)
Signed-off-by: Yiqing Yan <yiqingy@nvidia.com>
This commit is contained in:
parent
4da5cfc511
commit
3f7abf87bc
@ -7,7 +7,7 @@ TensorRT-LLM
|
||||
[](https://nvidia.github.io/TensorRT-LLM/)
|
||||
[](https://www.python.org/downloads/release/python-3123/)
|
||||
[](https://www.python.org/downloads/release/python-31012/)
|
||||
[](https://developer.nvidia.com/cuda-downloads)
|
||||
[](https://developer.nvidia.com/cuda-downloads)
|
||||
[](https://developer.nvidia.com/tensorrt)
|
||||
[](./tensorrt_llm/version.py)
|
||||
[](./LICENSE)
|
||||
|
||||
@ -1,13 +1,5 @@
|
||||
# These vulnerabilities were inherited from the base image (pytorch:25.05-py3) and should be removed when the base image
|
||||
# These vulnerabilities were inherited from the base image (pytorch:25.06-py3) and should be removed when the base image
|
||||
# is updated.
|
||||
|
||||
# WAR against https://github.com/advisories/GHSA-vqfr-h8mv-ghfj
|
||||
h11>=0.16.0
|
||||
# WAR against https://github.com/advisories/GHSA-7cx3-6m66-7c5m
|
||||
tornado>=6.5.0
|
||||
# WAR against https://github.com/advisories/GHSA-5rjg-fvgr-3xxf
|
||||
setuptools>=78.1.1
|
||||
# WAR against https://github.com/advisories/GHSA-8qvm-5x2c-j2w7
|
||||
protobuf>=4.25.8
|
||||
# WAR against https://github.com/advisories/GHSA-33p9-3p43-82vq
|
||||
jupyter-core>=5.8.1
|
||||
|
||||
@ -1,8 +1,8 @@
|
||||
# Multi-stage Dockerfile
|
||||
ARG BASE_IMAGE=nvcr.io/nvidia/pytorch
|
||||
ARG TRITON_IMAGE=nvcr.io/nvidia/tritonserver
|
||||
ARG BASE_TAG=25.05-py3
|
||||
ARG TRITON_BASE_TAG=25.05-py3
|
||||
ARG BASE_TAG=25.06-py3
|
||||
ARG TRITON_BASE_TAG=25.06-py3
|
||||
ARG DEVEL_IMAGE=devel
|
||||
|
||||
FROM ${BASE_IMAGE}:${BASE_TAG} AS base
|
||||
@ -74,18 +74,10 @@ ENV PYTORCH_CUDA_ALLOC_CONF="garbage_collection_threshold:0.99999"
|
||||
RUN pip3 uninstall -y opencv && rm -rf /usr/local/lib/python3*/dist-packages/cv2/
|
||||
RUN pip3 install opencv-python-headless --force-reinstall --no-deps --no-cache-dir
|
||||
|
||||
# WARs against security issues inherited from pytorch:25.04
|
||||
# * https://github.com/advisories/GHSA-vqfr-h8mv-ghfj
|
||||
# * https://github.com/advisories/GHSA-7cx3-6m66-7c5m
|
||||
# * https://github.com/advisories/GHSA-5rjg-fvgr-3xxf
|
||||
# WARs against security issues inherited from pytorch:25.06
|
||||
# * https://github.com/advisories/GHSA-8qvm-5x2c-j2w7
|
||||
# * https://github.com/advisories/GHSA-33p9-3p43-82vq
|
||||
RUN pip3 install --upgrade --no-cache-dir \
|
||||
"h11>=0.16" \
|
||||
"tornado>=6.5.0" \
|
||||
"setuptools>=78.1.1,<80" \
|
||||
"protobuf>=4.25.8" \
|
||||
"jupyter-core>=5.8.1"
|
||||
"protobuf>=4.25.8"
|
||||
|
||||
FROM ${TRITON_IMAGE}:${TRITON_BASE_TAG} AS triton
|
||||
|
||||
|
||||
@ -186,16 +186,16 @@ jenkins-rockylinux8_%: PYTHON_VERSION_TAG_ID = $(if $(findstring 3.12,${PYTHON_V
|
||||
jenkins-rockylinux8_%: IMAGE_WITH_TAG = $(shell . ../jenkins/current_image_tags.properties && echo $$LLM_ROCKYLINUX8_${PYTHON_VERSION_TAG_ID}_DOCKER_IMAGE)
|
||||
jenkins-rockylinux8_%: STAGE = tritondevel
|
||||
jenkins-rockylinux8_%: BASE_IMAGE = nvidia/cuda
|
||||
jenkins-rockylinux8_%: BASE_TAG = 12.9.0-devel-rockylinux8
|
||||
jenkins-rockylinux8_%: BASE_TAG = 12.9.1-devel-rockylinux8
|
||||
|
||||
rockylinux8_%: STAGE = tritondevel
|
||||
rockylinux8_%: BASE_IMAGE = nvidia/cuda
|
||||
rockylinux8_%: BASE_TAG = 12.9.0-devel-rockylinux8
|
||||
rockylinux8_%: BASE_TAG = 12.9.1-devel-rockylinux8
|
||||
|
||||
# For x86_64 and aarch64
|
||||
ubuntu22_%: STAGE = tritondevel
|
||||
ubuntu22_%: BASE_IMAGE = nvidia/cuda
|
||||
ubuntu22_%: BASE_TAG = 12.9.0-devel-ubuntu22.04
|
||||
ubuntu22_%: BASE_TAG = 12.9.1-devel-ubuntu22.04
|
||||
|
||||
trtllm_%: STAGE = release
|
||||
trtllm_%: PUSH_TO_STAGING := 0
|
||||
|
||||
@ -44,6 +44,8 @@ cleanup() {
|
||||
|
||||
init_ubuntu() {
|
||||
apt-get update
|
||||
# libibverbs-dev is installed but libmlx5.so is missing, reinstall the package
|
||||
apt-get --reinstall install -y libibverbs-dev
|
||||
apt-get install -y --no-install-recommends \
|
||||
ccache \
|
||||
gdb \
|
||||
@ -53,7 +55,6 @@ init_ubuntu() {
|
||||
llvm \
|
||||
libclang-rt-dev \
|
||||
libffi-dev \
|
||||
libibverbs-dev \
|
||||
libnuma1 \
|
||||
libnuma-dev \
|
||||
python3-dev \
|
||||
|
||||
@ -5,7 +5,7 @@ set -ex
|
||||
# This script is used for reinstalling CUDA on Rocky Linux 8 with the run file.
|
||||
# CUDA version is usually aligned with the latest NGC CUDA image tag.
|
||||
# Only use when public CUDA image is not ready.
|
||||
CUDA_VER="12.9.0_575.51.03"
|
||||
CUDA_VER="12.9.1_575.57.08"
|
||||
CUDA_VER_SHORT="${CUDA_VER%_*}"
|
||||
|
||||
NVCC_VERSION_OUTPUT=$(nvcc --version)
|
||||
|
||||
@ -4,7 +4,7 @@ set -ex
|
||||
|
||||
# Use latest stable version from https://pypi.org/project/torch/#history
|
||||
# and closest to the version specified in
|
||||
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-05.html#rel-25-05
|
||||
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-06.html#rel-25-06
|
||||
TORCH_VERSION="2.7.1"
|
||||
SYSTEM_ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
|
||||
|
||||
|
||||
@ -4,21 +4,22 @@ set -ex
|
||||
|
||||
TRT_VER="10.11.0.33"
|
||||
# Align with the pre-installed cuDNN / cuBLAS / NCCL versions from
|
||||
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-05.html#rel-25-05
|
||||
CUDA_VER="12.9" # 12.9.0
|
||||
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-06.html#rel-25-06
|
||||
CUDA_VER="12.9" # 12.9.1
|
||||
# Keep the installation for cuDNN if users want to install PyTorch with source codes.
|
||||
# PyTorch 2.x can compile with cuDNN v9.
|
||||
CUDNN_VER="9.10.1.4-1"
|
||||
# NCCL version 2.26.x used in the NGC PyTorch 25.05 image but has a performance regression issue.
|
||||
# Use NCCL version 2.27.5 which has the fixes.
|
||||
CUDNN_VER="9.10.2.21-1"
|
||||
# NGC PyTorch 25.06 image uses NCCL 2.27.3, while NCCL 2.27.5 resolves a perf regression issue.
|
||||
# Use NCCL version 2.27.5 instead.
|
||||
NCCL_VER="2.27.5-1+cuda12.9"
|
||||
# Use cuBLAS version 12.9.0.13 instead.
|
||||
# NGC PyTorch 25.06 image uses cuBLAS 12.9.1.4, but which leads to failures with MoE Lora (see https://nvbugs/5376270).
|
||||
# Continue using cuBLAS 12.9.0.13 until this issue is resolved.
|
||||
CUBLAS_VER="12.9.0.13-1"
|
||||
# Align with the pre-installed CUDA / NVCC / NVRTC versions from
|
||||
# https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html
|
||||
NVRTC_VER="12.9.41-1"
|
||||
CUDA_RUNTIME="12.9.37-1"
|
||||
CUDA_DRIVER_VERSION="575.51.03-1.el8"
|
||||
NVRTC_VER="12.9.86-1"
|
||||
CUDA_RUNTIME="12.9.79-1"
|
||||
CUDA_DRIVER_VERSION="575.57.08-1.el8"
|
||||
|
||||
for i in "$@"; do
|
||||
case $i in
|
||||
|
||||
@ -153,7 +153,7 @@ The following table shows the supported software for TensorRT-LLM.
|
||||
* -
|
||||
- Software Compatibility
|
||||
* - Container
|
||||
- [25.05](https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html)
|
||||
- [25.06](https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html)
|
||||
* - TensorRT
|
||||
- [10.11](https://docs.nvidia.com/deeplearning/tensorrt/release-notes/index.html)
|
||||
* - Precision
|
||||
|
||||
@ -446,7 +446,9 @@ def runLLMBuild(pipeline, buildFlags, tarName, is_linux_x86_64)
|
||||
// Build tritonserver artifacts
|
||||
def llmPath = sh (script: "realpath ${LLM_ROOT}",returnStdout: true).trim()
|
||||
// TODO: Remove after the cmake version is upgraded to 3.31.8
|
||||
sh "cd ${LLM_ROOT}/triton_backend/inflight_batcher_llm && mkdir build && cd build && cmake .. -DTRTLLM_DIR=${llmPath} -DTRITON_COMMON_REPO_TAG=r25.05 -DTRITON_CORE_REPO_TAG=r25.05 -DTRITON_THIRD_PARTY_REPO_TAG=r25.05 -DTRITON_BACKEND_REPO_TAG=r25.05 -DUSE_CXX11_ABI=ON && make -j${BUILD_JOBS} install"
|
||||
// Get triton tag from docker/dockerfile.multi
|
||||
def tritonShortTag = sh(script: "${LLM_ROOT}/jenkins/scripts/get_triton_tag.sh ${LLM_ROOT}", returnStdout: true).trim()
|
||||
sh "cd ${LLM_ROOT}/triton_backend/inflight_batcher_llm && mkdir build && cd build && cmake .. -DTRTLLM_DIR=${llmPath} -DTRITON_COMMON_REPO_TAG=${tritonShortTag} -DTRITON_CORE_REPO_TAG=${tritonShortTag} -DTRITON_THIRD_PARTY_REPO_TAG=${tritonShortTag} -DTRITON_BACKEND_REPO_TAG=${tritonShortTag} -DUSE_CXX11_ABI=ON && make -j${BUILD_JOBS} install"
|
||||
|
||||
// Step 3: packaging wheels into tarfile
|
||||
sh "cp ${LLM_ROOT}/build/tensorrt_llm-*.whl TensorRT-LLM/"
|
||||
|
||||
@ -39,7 +39,7 @@ LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE = env.wheelDockerImagePy310
|
||||
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE = env.wheelDockerImagePy312
|
||||
|
||||
// DLFW torch image
|
||||
DLFW_IMAGE = "nvcr.io/nvidia/pytorch:25.05-py3"
|
||||
DLFW_IMAGE = "nvcr.io/nvidia/pytorch:25.06-py3"
|
||||
|
||||
//Ubuntu base image
|
||||
UBUNTU_22_04_IMAGE = "urm.nvidia.com/docker/ubuntu:22.04"
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
|
||||
import java.lang.InterruptedException
|
||||
|
||||
DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.05-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507071100-5534"
|
||||
DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507251001-5678"
|
||||
|
||||
def createKubernetesPodConfig(image, arch = "amd64")
|
||||
{
|
||||
|
||||
@ -11,7 +11,7 @@
|
||||
#
|
||||
# NB: Typically, the suffix indicates the PR whose CI pipeline generated the images. In case that
|
||||
# images are adopted from PostMerge pipelines, the abbreviated commit hash is used instead.
|
||||
LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.05-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507162011-ec3ebae
|
||||
LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.05-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507162011-ec3ebae
|
||||
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.0-devel-rocky8-x86_64-rocky8-py310-trt10.11.0.33-skip-tritondevel-202507162011-ec3ebae
|
||||
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.0-devel-rocky8-x86_64-rocky8-py312-trt10.11.0.33-skip-tritondevel-202507162011-ec3ebae
|
||||
LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507251001-5678
|
||||
LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507251001-5678
|
||||
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py310-trt10.11.0.33-skip-tritondevel-202507251001-5678
|
||||
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py312-trt10.11.0.33-skip-tritondevel-202507251001-5678
|
||||
|
||||
37
jenkins/scripts/get_triton_tag.sh
Executable file
37
jenkins/scripts/get_triton_tag.sh
Executable file
@ -0,0 +1,37 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Script to get triton short tag from docker/Dockerfile.multi
|
||||
# Usage: ./get_triton_tag.sh [llm_root_path]
|
||||
# Output: triton short tag to stdout
|
||||
|
||||
set -e
|
||||
|
||||
# Default to current directory if no path provided
|
||||
LLM_ROOT="${1:-.}"
|
||||
|
||||
# Default triton tag
|
||||
TRITON_SHORT_TAG="main"
|
||||
|
||||
# Path to Dockerfile.multi
|
||||
DOCKERFILE_MULTI_PATH="${LLM_ROOT}/docker/Dockerfile.multi"
|
||||
|
||||
# Check if Dockerfile.multi exists
|
||||
if [[ -f "$DOCKERFILE_MULTI_PATH" ]]; then
|
||||
# Extract TRITON_BASE_TAG from Dockerfile.multi
|
||||
TRITON_BASE_TAG_LINE=$(grep -E '^ARG TRITON_BASE_TAG=' "$DOCKERFILE_MULTI_PATH" | tail -n1)
|
||||
|
||||
if [[ -n "$TRITON_BASE_TAG_LINE" ]]; then
|
||||
TRITON_BASE_TAG=$(echo "$TRITON_BASE_TAG_LINE" | cut -d'=' -f2)
|
||||
|
||||
if [[ -n "$TRITON_BASE_TAG" ]]; then
|
||||
# Remove -py3 suffix and add r prefix
|
||||
TRITON_SHORT_TAG="r${TRITON_BASE_TAG%-py3}"
|
||||
echo "Using triton tag from Dockerfile.multi: $TRITON_SHORT_TAG" >&2
|
||||
fi
|
||||
fi
|
||||
else
|
||||
echo "Dockerfile.multi not found at $DOCKERFILE_MULTI_PATH" >&2
|
||||
fi
|
||||
|
||||
# Output the triton short tag to stdout
|
||||
echo "$TRITON_SHORT_TAG"
|
||||
@ -22,7 +22,7 @@ h5py==3.12.1
|
||||
StrEnum
|
||||
sentencepiece>=0.1.99
|
||||
tensorrt~=10.11.0
|
||||
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-05.html#rel-25-05 uses 2.8.0a0.
|
||||
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-06.html#rel-25-06 uses 2.8.0a0.
|
||||
torch>=2.7.1,<=2.8.0a0
|
||||
torchvision
|
||||
nvidia-modelopt[torch]~=0.33.0
|
||||
|
||||
@ -1984,8 +1984,6 @@ def test_ptp_quickstart_multimodal(llm_root, llm_venv, model_name, model_path,
|
||||
modality, use_cuda_graph):
|
||||
# NOTE: individual tests need to be enabled in
|
||||
# tests/integration/test_lists/qa/examples_test_list.txt
|
||||
llm_venv.run_cmd(
|
||||
['-m', 'pip', 'install', 'flash-attn==2.7.3', '--no-build-isolation'])
|
||||
|
||||
example_root = Path(os.path.join(llm_root, "examples", "llm-api"))
|
||||
test_data_root = Path(
|
||||
|
||||
@ -506,9 +506,20 @@ def test_cpp_unit_tests(tritonserver_test_root, test_name, llm_root):
|
||||
"rm -rf build && "
|
||||
"mkdir -p build", llm_root)
|
||||
|
||||
# Get the value of TRITON_SHORT_TAG from docker/Dockerfile.multi
|
||||
import subprocess
|
||||
triton_short_tag = subprocess.check_output(
|
||||
[f"{llm_root}/jenkins/scripts/get_triton_tag.sh", llm_root],
|
||||
text=True).strip()
|
||||
print(f"using triton tag from docker/Dockerfile.multi: {triton_short_tag}")
|
||||
run_shell_command(
|
||||
f"cd {llm_root}/triton_backend/inflight_batcher_llm/build && "
|
||||
f"cmake .. -DTRTLLM_DIR={llm_root} -DCMAKE_INSTALL_PREFIX=install/ -DBUILD_TESTS=ON -DUSE_CXX11_ABI=ON -DTRITON_COMMON_REPO_TAG=r25.05 -DTRITON_CORE_REPO_TAG=r25.05 -DTRITON_THIRD_PARTY_REPO_TAG=r25.05 -DTRITON_BACKEND_REPO_TAG=r25.05 "
|
||||
f"cmake .. -DTRTLLM_DIR={llm_root} -DCMAKE_INSTALL_PREFIX=install/ "
|
||||
f"-DBUILD_TESTS=ON -DUSE_CXX11_ABI=ON "
|
||||
f"-DTRITON_COMMON_REPO_TAG={triton_short_tag} "
|
||||
f"-DTRITON_CORE_REPO_TAG={triton_short_tag} "
|
||||
f"-DTRITON_THIRD_PARTY_REPO_TAG={triton_short_tag} "
|
||||
f"-DTRITON_BACKEND_REPO_TAG={triton_short_tag} "
|
||||
"&& make -j8 install", llm_root)
|
||||
|
||||
# Run the cpp unit tests
|
||||
|
||||
@ -52,7 +52,10 @@ if [[ "$BUILD_UNIT_TESTS" == "true" ]]; then
|
||||
fi
|
||||
|
||||
# TODO: Remove specifying Triton version after cmake version is upgraded to 3.31.8
|
||||
cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install ${BUILD_TESTS_ARG} -DTRITON_COMMON_REPO_TAG=r25.05 -DTRITON_CORE_REPO_TAG=r25.05 -DTRITON_THIRD_PARTY_REPO_TAG=r25.05 -DTRITON_BACKEND_REPO_TAG=r25.05 ..
|
||||
# Get TRITON_SHORT_TAG from docker/Dockerfile.multi
|
||||
LLM_ROOT="$(dirname $0)/../../../.."
|
||||
TRITON_SHORT_TAG=$("$LLM_ROOT/jenkins/scripts/get_triton_tag.sh" "$LLM_ROOT")
|
||||
cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install ${BUILD_TESTS_ARG} -DTRITON_COMMON_REPO_TAG=${TRITON_SHORT_TAG} -DTRITON_CORE_REPO_TAG=${TRITON_SHORT_TAG} -DTRITON_THIRD_PARTY_REPO_TAG=${TRITON_SHORT_TAG} -DTRITON_BACKEND_REPO_TAG=${TRITON_SHORT_TAG} ..
|
||||
make install
|
||||
|
||||
mkdir -p /opt/tritonserver/backends/tensorrtllm
|
||||
|
||||
Loading…
Reference in New Issue
Block a user