infra: upgrade to DLFW 25.08-pre and TRT 10.13.2.4

Signed-off-by: Zhanrui Sun <zhanruis@nvidia.com>
This commit is contained in:
Zhanrui Sun 2025-08-11 19:27:09 -07:00
parent 97a3788dcf
commit ebec4ea5ee
10 changed files with 134 additions and 98 deletions

View File

@ -1,46 +1,49 @@
ARCH=$(uname -m)
if [ $ARCH == "x86_64" ]; then
# ARCH=$(uname -m)
# if [ $ARCH == "x86_64" ]; then
wget https://urm.nvidia.com/artifactory/sw-gpu-cuda-installer-generic-local/packaging/r13.0/cuda_nvrtc/linux-x86_64/13.0.48/cuda-nvrtc-dev-13-0_13.0.48-1_amd64.deb && \
dpkg -i cuda-nvrtc-dev-13-0_13.0.48-1_amd64.deb && \
rm cuda-nvrtc-dev-13-0_13.0.48-1_amd64.deb
# wget https://urm.nvidia.com/artifactory/sw-gpu-cuda-installer-generic-local/packaging/r13.0/cuda_nvrtc/linux-x86_64/13.0.48/cuda-nvrtc-dev-13-0_13.0.48-1_amd64.deb && \
# dpkg -i cuda-nvrtc-dev-13-0_13.0.48-1_amd64.deb && \
# rm cuda-nvrtc-dev-13-0_13.0.48-1_amd64.deb
wget https://github.com/Kitware/CMake/releases/download/v4.0.3/cmake-4.0.3-linux-x86_64.sh && \
bash cmake-4.0.3-linux-x86_64.sh --skip-license --prefix=/usr/local/cmake --exclude-subdir
# wget https://github.com/Kitware/CMake/releases/download/v4.0.3/cmake-4.0.3-linux-x86_64.sh && \
# bash cmake-4.0.3-linux-x86_64.sh --skip-license --prefix=/usr/local/cmake --exclude-subdir
apt update
apt install -y libstdc++-14-dev
# apt update
# apt install -y libstdc++-14-dev
elif [ $ARCH == "aarch64" ]; then
# elif [ $ARCH == "aarch64" ]; then
# to be moved to docker/common/ scripts
wget https://urm.nvidia.com/artifactory/sw-gpu-cuda-installer-generic-local/packaging/r13.0/cuda_nvrtc/linux-sbsa/13.0.48/cuda-nvrtc-dev-13-0_13.0.48-1_arm64.deb && \
dpkg -i cuda-nvrtc-dev-13-0_13.0.48-1_arm64.deb && \
rm cuda-nvrtc-dev-13-0_13.0.48-1_arm64.deb
# # to be moved to docker/common/ scripts
# wget https://urm.nvidia.com/artifactory/sw-gpu-cuda-installer-generic-local/packaging/r13.0/cuda_nvrtc/linux-sbsa/13.0.48/cuda-nvrtc-dev-13-0_13.0.48-1_arm64.deb && \
# dpkg -i cuda-nvrtc-dev-13-0_13.0.48-1_arm64.deb && \
# rm cuda-nvrtc-dev-13-0_13.0.48-1_arm64.deb
wget https://github.com/Kitware/CMake/releases/download/v4.0.3/cmake-4.0.3-linux-aarch64.sh && \
bash cmake-4.0.3-linux-aarch64.sh --skip-license --prefix=/usr/local/cmake --exclude-subdir
# wget https://github.com/Kitware/CMake/releases/download/v4.0.3/cmake-4.0.3-linux-aarch64.sh && \
# bash cmake-4.0.3-linux-aarch64.sh --skip-license --prefix=/usr/local/cmake --exclude-subdir
apt update
# fix LLVM build
apt install -y libstdc++-14-dev
# apt update
# # fix LLVM build
# apt install -y libstdc++-14-dev
else
echo "Unsupported architecture: $ARCH"
exit 1
fi
# # wait for https://github.com/NVIDIA/TensorRT-LLM/pull/6588
# pip install deep_gemm@git+https://github.com/VALLIS-NERIA/DeepGEMM.git@97d97a20c2ecd53a248ab64242219d780cf822b8 --no-build-isolation
# wait for new triton to be published
cd /usr/local/lib/python3.12/dist-packages/ && \
ls -la | grep pytorch_triton && \
mv pytorch_triton-3.3.1+gitc8757738.dist-info triton-3.3.1+gitc8757738.dist-info && \
cd triton-3.3.1+gitc8757738.dist-info && \
echo "Current directory: $(pwd)" && \
echo "Files in directory:" && \
ls -la && \
sed -i 's/^Name: pytorch-triton/Name: triton/' METADATA && \
sed -i 's|pytorch_triton-3.3.1+gitc8757738.dist-info/|triton-3.3.1+gitc8757738.dist-info/|g' RECORD && \
echo "METADATA after update:" && \
grep "^Name:" METADATA
# else
# echo "Unsupported architecture: $ARCH"
# exit 1
# fi
# pip install git+https://github.com/triton-lang/triton.git@main
# # wait for new triton to be published
# cd /usr/local/lib/python3.12/dist-packages/ && \
# ls -la | grep pytorch_triton && \
# mv pytorch_triton-3.3.1+gitc8757738.dist-info triton-3.3.1+gitc8757738.dist-info && \
# cd triton-3.3.1+gitc8757738.dist-info && \
# echo "Current directory: $(pwd)" && \
# echo "Files in directory:" && \
# ls -la && \
# sed -i 's/^Name: pytorch-triton/Name: triton/' METADATA && \
# sed -i 's|pytorch_triton-3.3.1+gitc8757738.dist-info/|triton-3.3.1+gitc8757738.dist-info/|g' RECORD && \
# echo "METADATA after update:" && \
# grep "^Name:" METADATA
# # pip install git+https://github.com/triton-lang/triton.git@main

View File

@ -1,8 +1,8 @@
# Multi-stage Dockerfile
ARG BASE_IMAGE=gitlab-master.nvidia.com:5005/dl/dgx/pytorch
ARG TRITON_IMAGE=gitlab-master.nvidia.com:5005/dl/dgx/tritonserver
ARG BASE_TAG=25.08-py3.32224057-base
ARG TRITON_BASE_TAG=25.08-RC-py3.32078257
ARG BASE_TAG=25.08-py3.32674667-devel
ARG TRITON_BASE_TAG=25.08-py3.32978230
ARG DEVEL_IMAGE=devel
FROM ${BASE_IMAGE}:${BASE_TAG} AS base
@ -74,13 +74,29 @@ ENV PYTORCH_CUDA_ALLOC_CONF="garbage_collection_threshold:0.99999"
RUN pip3 uninstall -y opencv && rm -rf /usr/local/lib/python3*/dist-packages/cv2/
RUN pip3 install opencv-python-headless --force-reinstall --no-deps --no-cache-dir
COPY bringup_fix.sh bringup_fix.sh
RUN bash ./bringup_fix.sh && rm bringup_fix.sh
# COPY bringup_fix.sh bringup_fix.sh
# RUN bash ./bringup_fix.sh && rm bringup_fix.sh
# WARs against security issues inherited from pytorch:25.06
# * https://github.com/advisories/GHSA-8qvm-5x2c-j2w7
RUN pip3 install --upgrade --no-cache-dir \
"protobuf>=4.25.8"
# wait for new triton to be published
# Rename pytorch_triton package to triton
RUN cd /usr/local/lib/python3.12/dist-packages/ && \
ls -la | grep pytorch_triton && \
mv pytorch_triton-3.3.1+gitc8757738.dist-info triton-3.3.1+gitc8757738.dist-info && \
cd triton-3.3.1+gitc8757738.dist-info && \
echo "Current directory: $(pwd)" && \
echo "Files in directory:" && \
ls -la && \
sed -i 's/^Name: pytorch-triton/Name: triton/' METADATA && \
sed -i 's|pytorch_triton-3.3.1+gitc8757738.dist-info/|triton-3.3.1+gitc8757738.dist-info/|g' RECORD && \
echo "METADATA after update:" && \
grep "^Name:" METADATA
FROM ${TRITON_IMAGE}:${TRITON_BASE_TAG} AS triton
FROM devel AS tritondevel
@ -90,6 +106,8 @@ COPY --from=triton /opt/tritonserver/lib /opt/tritonserver/lib
COPY --from=triton /opt/tritonserver/include /opt/tritonserver/include
COPY --from=triton /opt/tritonserver/bin /opt/tritonserver/bin
COPY --from=triton /opt/tritonserver/caches /opt/tritonserver/caches
# WAR for the datacenter-gpu-manager 4.4.0 is not available in the apt repository
COPY --from=triton /usr/lib/*-linux-gnu/libdcgm.so* /tmp/
COPY docker/common/install_triton.sh install_triton.sh
RUN bash ./install_triton.sh && rm install_triton.sh

View File

@ -56,6 +56,7 @@ init_ubuntu() {
llvm \
libclang-rt-dev \
libffi-dev \
libstdc++-14-dev \
libnuma1 \
libnuma-dev \
python3-dev \

View File

@ -3,7 +3,7 @@
set -ex
ARCH=$(uname -m)
CMAKE_VERSION="3.30.2"
CMAKE_VERSION="4.0.3"
GITHUB_URL="https://github.com"
if [ -n "${GITHUB_MIRROR}" ]; then
GITHUB_URL=${GITHUB_MIRROR}

View File

@ -5,7 +5,7 @@ set -ex
# This script is used for reinstalling CUDA on Rocky Linux 8 with the run file.
# CUDA version is usually aligned with the latest NGC CUDA image tag.
# Only use when public CUDA image is not ready.
CUDA_VER="12.9.1_575.57.08"
CUDA_VER="13.0.0_580.65.06"
CUDA_VER_SHORT="${CUDA_VER%_*}"
NVCC_VERSION_OUTPUT=$(nvcc --version)

View File

@ -2,24 +2,23 @@
set -ex
TRT_VER="10.11.0.33"
TRT_VER="10.13.2.6"
# Align with the pre-installed cuDNN / cuBLAS / NCCL versions from
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-06.html#rel-25-06
CUDA_VER="12.9" # 12.9.1
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-08.html#rel-25-08
CUDA_VER="13.0" # 13.0.0
# Keep the installation for cuDNN if users want to install PyTorch with source codes.
# PyTorch 2.x can compile with cuDNN v9.
CUDNN_VER="9.10.2.21-1"
# NGC PyTorch 25.06 image uses NCCL 2.27.3, while NCCL 2.27.5 resolves a perf regression issue.
# Use NCCL version 2.27.5 instead.
NCCL_VER="2.27.5-1+cuda12.9"
# NGC PyTorch 25.06 image uses cuBLAS 12.9.1.4, but which leads to failures with MoE Lora (see https://nvbugs/5376270).
# Continue using cuBLAS 12.9.0.13 until this issue is resolved.
CUBLAS_VER="12.9.0.13-1"
CUDNN_VER="9.12.0.42-1"
# NCCL version 2.26.x used in the NGC PyTorch 25.05 image but has a performance regression issue.
# Use NCCL version 2.27.5 which has the fixes.
NCCL_VER="2.27.6-1+cuda13.0"
# Use cuBLAS version 13.0.0.19 instead.
CUBLAS_VER="13.0.0.19-1"
# Align with the pre-installed CUDA / NVCC / NVRTC versions from
# https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html
NVRTC_VER="12.9.86-1"
CUDA_RUNTIME="12.9.79-1"
CUDA_DRIVER_VERSION="575.57.08-1.el8"
NVRTC_VER="13.0.48-1"
CUDA_RUNTIME="13.0.37-1"
CUDA_DRIVER_VERSION="580.65.06-1.el8"
for i in "$@"; do
case $i in
@ -41,39 +40,44 @@ fi
install_ubuntu_requirements() {
apt-get update && apt-get install -y --no-install-recommends gnupg2 curl ca-certificates
ARCH=$(uname -m)
if [ "$ARCH" = "amd64" ];then ARCH="x86_64";fi
if [ "$ARCH" = "aarch64" ];then ARCH="sbsa";fi
ARCH2="amd64"
if [ "$ARCH" = "amd64" ];then ARCH="x86_64" && ARCH2="amd64";fi
if [ "$ARCH" = "aarch64" ];then ARCH="sbsa" && ARCH2="arm64";fi
curl -fsSLO https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/${ARCH}/cuda-keyring_1.1-1_all.deb
dpkg -i cuda-keyring_1.1-1_all.deb
rm cuda-keyring_1.1-1_all.deb
apt-get update
# if [[ $(apt list --installed | grep libcudnn9) ]]; then
# apt-get remove --purge -y libcudnn9*
# fi
# if [[ $(apt list --installed | grep libnccl) ]]; then
# apt-get remove --purge -y --allow-change-held-packages libnccl*
# fi
# if [[ $(apt list --installed | grep libcublas) ]]; then
# apt-get remove --purge -y --allow-change-held-packages libcublas*
# fi
# if [[ $(apt list --installed | grep cuda-nvrtc-dev) ]]; then
# apt-get remove --purge -y --allow-change-held-packages cuda-nvrtc-dev*
# fi
wget https://urm.nvidia.com/artifactory/sw-gpu-cuda-installer-generic-local/packaging/r13.0/cuda_nvrtc/linux-${ARCH}/13.0.48/cuda-nvrtc-dev-13-0_13.0.48-1_${ARCH2}.deb && \
dpkg -i cuda-nvrtc-dev-13-0_13.0.48-1_${ARCH2}.deb && \
rm cuda-nvrtc-dev-13-0_13.0.48-1_${ARCH2}.deb
# CUBLAS_CUDA_VERSION=$(echo $CUDA_VER | sed 's/\./-/g')
# NVRTC_CUDA_VERSION=$(echo $CUDA_VER | sed 's/\./-/g')
# apt-get update
# if [[ $(apt list --installed | grep libcudnn9) ]]; then
# apt-get remove --purge -y libcudnn9*
# fi
# if [[ $(apt list --installed | grep libnccl) ]]; then
# apt-get remove --purge -y --allow-change-held-packages libnccl*
# fi
# if [[ $(apt list --installed | grep libcublas) ]]; then
# apt-get remove --purge -y --allow-change-held-packages libcublas*
# fi
# if [[ $(apt list --installed | grep cuda-nvrtc-dev) ]]; then
# apt-get remove --purge -y --allow-change-held-packages cuda-nvrtc-dev*
# fi
# apt-get install -y --no-install-recommends \
# libcudnn9-cuda-12=${CUDNN_VER} \
# libcudnn9-dev-cuda-12=${CUDNN_VER} \
# libcudnn9-headers-cuda-12=${CUDNN_VER} \
# libnccl2=${NCCL_VER} \
# libnccl-dev=${NCCL_VER} \
# libcublas-${CUBLAS_CUDA_VERSION}=${CUBLAS_VER} \
# libcublas-dev-${CUBLAS_CUDA_VERSION}=${CUBLAS_VER} \
# cuda-nvrtc-dev-${NVRTC_CUDA_VERSION}=${NVRTC_VER}
# CUBLAS_CUDA_VERSION=$(echo $CUDA_VER | sed 's/\./-/g')
# NVRTC_CUDA_VERSION=$(echo $CUDA_VER | sed 's/\./-/g')
# apt-get install -y --no-install-recommends \
# libcudnn9-cuda-13=${CUDNN_VER} \
# libcudnn9-dev-cuda-13=${CUDNN_VER} \
# libcudnn9-headers-cuda-13=${CUDNN_VER} \
# libnccl2=${NCCL_VER} \
# libnccl-dev=${NCCL_VER} \
# libcublas-${CUBLAS_CUDA_VERSION}=${CUBLAS_VER} \
# libcublas-dev-${CUBLAS_CUDA_VERSION}=${CUBLAS_VER} \
# cuda-nvrtc-dev-${NVRTC_CUDA_VERSION}=${NVRTC_VER}
apt-get clean
rm -rf /var/lib/apt/lists/*
@ -92,7 +96,7 @@ install_rockylinux_requirements() {
"libnccl-devel-${NCCL_VER}.${ARCH1}" \
"cuda-compat-${CUBLAS_CUDA_VERSION}-${CUDA_DRIVER_VERSION}.${ARCH1}" \
"cuda-toolkit-${CUBLAS_CUDA_VERSION}-config-common-${CUDA_RUNTIME}.noarch" \
"cuda-toolkit-12-config-common-${CUDA_RUNTIME}.noarch" \
"cuda-toolkit-13-config-common-${CUDA_RUNTIME}.noarch" \
"cuda-toolkit-config-common-${CUDA_RUNTIME}.noarch" \
"libcublas-${CUBLAS_CUDA_VERSION}-${CUBLAS_VER}.${ARCH1}" \
"libcublas-devel-${CUBLAS_CUDA_VERSION}-${CUBLAS_VER}.${ARCH1}"; do
@ -108,7 +112,7 @@ install_rockylinux_requirements() {
libnccl-devel-${NCCL_VER}.${ARCH1}.rpm \
cuda-compat-${CUBLAS_CUDA_VERSION}-${CUDA_DRIVER_VERSION}.${ARCH1}.rpm \
cuda-toolkit-${CUBLAS_CUDA_VERSION}-config-common-${CUDA_RUNTIME}.noarch.rpm \
cuda-toolkit-12-config-common-${CUDA_RUNTIME}.noarch.rpm \
cuda-toolkit-13-config-common-${CUDA_RUNTIME}.noarch.rpm \
cuda-toolkit-config-common-${CUDA_RUNTIME}.noarch.rpm \
libcublas-${CUBLAS_CUDA_VERSION}-${CUBLAS_VER}.${ARCH1}.rpm \
libcublas-devel-${CUBLAS_CUDA_VERSION}-${CUBLAS_VER}.${ARCH1}.rpm
@ -130,15 +134,16 @@ install_tensorrt() {
if [ -z "$ARCH" ];then ARCH=$(uname -m);fi
if [ "$ARCH" = "arm64" ];then ARCH="aarch64";fi
if [ "$ARCH" = "amd64" ];then ARCH="x86_64";fi
if [ "$ARCH" = "x86_64" ]; then
RELEASE_URL_TRT="http://cuda-repo/release-candidates/Libraries/TensorRT/v10.14/10.14.0.19-6374d0f7/13.0-r580/Linux-x64-manylinux_2_28/tar/TensorRT-10.14.0.19.Linux.x86_64-gnu.cuda-13.0.tar.gz"
else
RELEASE_URL_TRT="http://cuda-repo/release-candidates/Libraries/TensorRT/v10.14/10.14.0.19-6374d0f7/13.0-r580/Linux-aarch64-manylinux_2_35/tar/TensorRT-10.14.0.19.Ubuntu-22.04.aarch64-gnu.cuda-13.0.tar.gz"
fi
RELEASE_URL_TRT="https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/${TRT_VER_SHORT}/tars/TensorRT-${TRT_VER}.Linux.${ARCH}-gnu.cuda-${TRT_CUDA_VERSION}.tar.gz"
fi
wget --no-verbose ${RELEASE_URL_TRT} -O /tmp/TensorRT.tar
# Download TensorRT (6GB file, needs longer timeout)
echo "Downloading TensorRT from: ${RELEASE_URL_TRT}"
if [ "$ARCH" = "x86_64" ];then
curl -L --insecure --connect-timeout 600 --max-time 3600 --retry 3 -o /tmp/TensorRT.tar "${RELEASE_URL_TRT}"
else
wget --no-verbose ${RELEASE_URL_TRT} -O /tmp/TensorRT.tar
fi
tar -xf /tmp/TensorRT.tar -C /usr/local/
mv /usr/local/TensorRT-* /usr/local/tensorrt
pip3 install --no-cache-dir /usr/local/tensorrt/python/tensorrt-*-cp${PARSED_PY_VERSION}-*.whl
@ -162,7 +167,7 @@ case "$ID" in
install_tensorrt
;;
rocky)
install_rockylinux_requirements
# install_rockylinux_requirements
install_tensorrt
;;
*)

View File

@ -21,10 +21,17 @@ install_triton_deps() {
python3-build \
libb64-dev \
libarchive-dev \
datacenter-gpu-manager=1:3.3.6 \
&& install_boost \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
# Copy /tmp/libdcgm.so* files back to /usr/lib/<arch>-linux-gnu/
if [ -d /usr/lib/x86_64-linux-gnu ]; then
cp -f /tmp/libdcgm.so* /usr/lib/x86_64-linux-gnu/ || true
elif [ -d /usr/lib/aarch64-linux-gnu ]; then
cp -f /tmp/libdcgm.so* /usr/lib/aarch64-linux-gnu/ || true
else
echo "Target /usr/lib directory for architecture not found, skipping libdcgm.so* copy"
fi
}
# Install Triton only if base image is Ubuntu

View File

@ -2097,7 +2097,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
trtllm_utils.llmExecStepWithRetry(pipeline, script: "wget https://developer.download.nvidia.com/compute/cuda/repos/${ubuntu_version}/${platform}/cuda-keyring_1.1-1_all.deb")
trtllm_utils.llmExecStepWithRetry(pipeline, script: "dpkg -i cuda-keyring_1.1-1_all.deb")
trtllm_utils.llmExecStepWithRetry(pipeline, script: "apt-get update")
trtllm_utils.llmExecStepWithRetry(pipeline, script: "apt-get -y install cuda-toolkit-12-9")
trtllm_utils.llmExecStepWithRetry(pipeline, script: "apt-get -y install cuda-toolkit-13-0")
}
// Extra PyTorch CUDA 12.8 install for SBSA platform and Blackwell GPUs bare-metal environments

View File

@ -11,7 +11,9 @@
#
# NB: Typically, the suffix indicates the PR whose CI pipeline generated the images. In case that
# images are adopted from PostMerge pipelines, the abbreviated commit hash is used instead.
LLM_DOCKER_IMAGE=gitlab-master.nvidia.com:5005/xiweny/images:gb110_bringup_x86_64
LLM_SBSA_DOCKER_IMAGE=gitlab-master.nvidia.com:5005/xiweny/images:gb110_bringup_sbsa
LLM_DOCKER_IMAGE_12_9=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202508051130-6090
LLM_SBSA_DOCKER_IMAGE_12_9=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202508051130-6090
LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging/tritondevel:x86_64-tritondevel-torch_skip-a9bc5c5-user_zhanruis_update_dlfw_and_cu13-656
LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging/tritondevel:sbsa-tritondevel-torch_skip-a9bc5c5-user_zhanruis_update_dlfw_and_cu13-656
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py310-trt10.11.0.33-skip-tritondevel-202508051130-6090
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py312-trt10.11.0.33-skip-tritondevel-202508051130-6090

View File

@ -3,7 +3,7 @@
accelerate>=0.25.0
build
colored
cuda-python # Do not override the custom version of cuda-python installed in the NGC PyTorch image.
cuda-python>=12,<13
diffusers>=0.27.0
lark
mpi4py
@ -26,8 +26,8 @@ tensorrt
torch>=2.7.1,<=2.8.0a0
torchvision
nvidia-modelopt[torch]~=0.33.0
nvidia-nccl-cu12
nvidia-cuda-nvrtc-cu12
nvidia-nccl-cu13
nvidia-cuda-nvrtc-cu13
transformers==4.53.1
pydantic>=2.9.1
pydantic-settings[yaml]