From ebec4ea5eeef4ae8fd46be145a263ddba6f836ed Mon Sep 17 00:00:00 2001 From: Zhanrui Sun Date: Mon, 11 Aug 2025 19:27:09 -0700 Subject: [PATCH] infra: upgrade to DLFW 25.08-pre and TRT 10.13.2.4 Signed-off-by: Zhanrui Sun --- bringup_fix.sh | 75 ++++++++++--------- docker/Dockerfile.multi | 26 ++++++- docker/common/install_base.sh | 1 + docker/common/install_cmake.sh | 2 +- docker/common/install_cuda_toolkit.sh | 2 +- docker/common/install_tensorrt.sh | 103 ++++++++++++++------------ docker/common/install_triton.sh | 9 ++- jenkins/L0_Test.groovy | 2 +- jenkins/current_image_tags.properties | 6 +- requirements.txt | 6 +- 10 files changed, 134 insertions(+), 98 deletions(-) diff --git a/bringup_fix.sh b/bringup_fix.sh index 543ea44bf9..3ebd6eb441 100644 --- a/bringup_fix.sh +++ b/bringup_fix.sh @@ -1,46 +1,49 @@ -ARCH=$(uname -m) -if [ $ARCH == "x86_64" ]; then +# ARCH=$(uname -m) +# if [ $ARCH == "x86_64" ]; then -wget https://urm.nvidia.com/artifactory/sw-gpu-cuda-installer-generic-local/packaging/r13.0/cuda_nvrtc/linux-x86_64/13.0.48/cuda-nvrtc-dev-13-0_13.0.48-1_amd64.deb && \ - dpkg -i cuda-nvrtc-dev-13-0_13.0.48-1_amd64.deb && \ - rm cuda-nvrtc-dev-13-0_13.0.48-1_amd64.deb +# wget https://urm.nvidia.com/artifactory/sw-gpu-cuda-installer-generic-local/packaging/r13.0/cuda_nvrtc/linux-x86_64/13.0.48/cuda-nvrtc-dev-13-0_13.0.48-1_amd64.deb && \ +# dpkg -i cuda-nvrtc-dev-13-0_13.0.48-1_amd64.deb && \ +# rm cuda-nvrtc-dev-13-0_13.0.48-1_amd64.deb -wget https://github.com/Kitware/CMake/releases/download/v4.0.3/cmake-4.0.3-linux-x86_64.sh && \ - bash cmake-4.0.3-linux-x86_64.sh --skip-license --prefix=/usr/local/cmake --exclude-subdir +# wget https://github.com/Kitware/CMake/releases/download/v4.0.3/cmake-4.0.3-linux-x86_64.sh && \ +# bash cmake-4.0.3-linux-x86_64.sh --skip-license --prefix=/usr/local/cmake --exclude-subdir -apt update -apt install -y libstdc++-14-dev +# apt update +# apt install -y libstdc++-14-dev -elif [ $ARCH == "aarch64" ]; then +# elif [ $ARCH == "aarch64" ]; then -# to be moved to docker/common/ scripts -wget https://urm.nvidia.com/artifactory/sw-gpu-cuda-installer-generic-local/packaging/r13.0/cuda_nvrtc/linux-sbsa/13.0.48/cuda-nvrtc-dev-13-0_13.0.48-1_arm64.deb && \ - dpkg -i cuda-nvrtc-dev-13-0_13.0.48-1_arm64.deb && \ - rm cuda-nvrtc-dev-13-0_13.0.48-1_arm64.deb +# # to be moved to docker/common/ scripts +# wget https://urm.nvidia.com/artifactory/sw-gpu-cuda-installer-generic-local/packaging/r13.0/cuda_nvrtc/linux-sbsa/13.0.48/cuda-nvrtc-dev-13-0_13.0.48-1_arm64.deb && \ +# dpkg -i cuda-nvrtc-dev-13-0_13.0.48-1_arm64.deb && \ +# rm cuda-nvrtc-dev-13-0_13.0.48-1_arm64.deb -wget https://github.com/Kitware/CMake/releases/download/v4.0.3/cmake-4.0.3-linux-aarch64.sh && \ - bash cmake-4.0.3-linux-aarch64.sh --skip-license --prefix=/usr/local/cmake --exclude-subdir +# wget https://github.com/Kitware/CMake/releases/download/v4.0.3/cmake-4.0.3-linux-aarch64.sh && \ +# bash cmake-4.0.3-linux-aarch64.sh --skip-license --prefix=/usr/local/cmake --exclude-subdir -apt update -# fix LLVM build -apt install -y libstdc++-14-dev +# apt update +# # fix LLVM build +# apt install -y libstdc++-14-dev -else - echo "Unsupported architecture: $ARCH" - exit 1 -fi +# # wait for https://github.com/NVIDIA/TensorRT-LLM/pull/6588 +# pip install deep_gemm@git+https://github.com/VALLIS-NERIA/DeepGEMM.git@97d97a20c2ecd53a248ab64242219d780cf822b8 --no-build-isolation -# wait for new triton to be published -cd /usr/local/lib/python3.12/dist-packages/ && \ - ls -la | grep pytorch_triton && \ - mv pytorch_triton-3.3.1+gitc8757738.dist-info triton-3.3.1+gitc8757738.dist-info && \ - cd triton-3.3.1+gitc8757738.dist-info && \ - echo "Current directory: $(pwd)" && \ - echo "Files in directory:" && \ - ls -la && \ - sed -i 's/^Name: pytorch-triton/Name: triton/' METADATA && \ - sed -i 's|pytorch_triton-3.3.1+gitc8757738.dist-info/|triton-3.3.1+gitc8757738.dist-info/|g' RECORD && \ - echo "METADATA after update:" && \ - grep "^Name:" METADATA +# else +# echo "Unsupported architecture: $ARCH" +# exit 1 +# fi -# pip install git+https://github.com/triton-lang/triton.git@main +# # wait for new triton to be published +# cd /usr/local/lib/python3.12/dist-packages/ && \ +# ls -la | grep pytorch_triton && \ +# mv pytorch_triton-3.3.1+gitc8757738.dist-info triton-3.3.1+gitc8757738.dist-info && \ +# cd triton-3.3.1+gitc8757738.dist-info && \ +# echo "Current directory: $(pwd)" && \ +# echo "Files in directory:" && \ +# ls -la && \ +# sed -i 's/^Name: pytorch-triton/Name: triton/' METADATA && \ +# sed -i 's|pytorch_triton-3.3.1+gitc8757738.dist-info/|triton-3.3.1+gitc8757738.dist-info/|g' RECORD && \ +# echo "METADATA after update:" && \ +# grep "^Name:" METADATA + +# # pip install git+https://github.com/triton-lang/triton.git@main diff --git a/docker/Dockerfile.multi b/docker/Dockerfile.multi index bd9f927182..ed66d49235 100644 --- a/docker/Dockerfile.multi +++ b/docker/Dockerfile.multi @@ -1,8 +1,8 @@ # Multi-stage Dockerfile ARG BASE_IMAGE=gitlab-master.nvidia.com:5005/dl/dgx/pytorch ARG TRITON_IMAGE=gitlab-master.nvidia.com:5005/dl/dgx/tritonserver -ARG BASE_TAG=25.08-py3.32224057-base -ARG TRITON_BASE_TAG=25.08-RC-py3.32078257 +ARG BASE_TAG=25.08-py3.32674667-devel +ARG TRITON_BASE_TAG=25.08-py3.32978230 ARG DEVEL_IMAGE=devel FROM ${BASE_IMAGE}:${BASE_TAG} AS base @@ -74,13 +74,29 @@ ENV PYTORCH_CUDA_ALLOC_CONF="garbage_collection_threshold:0.99999" RUN pip3 uninstall -y opencv && rm -rf /usr/local/lib/python3*/dist-packages/cv2/ RUN pip3 install opencv-python-headless --force-reinstall --no-deps --no-cache-dir -COPY bringup_fix.sh bringup_fix.sh -RUN bash ./bringup_fix.sh && rm bringup_fix.sh +# COPY bringup_fix.sh bringup_fix.sh +# RUN bash ./bringup_fix.sh && rm bringup_fix.sh # WARs against security issues inherited from pytorch:25.06 +# * https://github.com/advisories/GHSA-8qvm-5x2c-j2w7 RUN pip3 install --upgrade --no-cache-dir \ "protobuf>=4.25.8" +# wait for new triton to be published +# Rename pytorch_triton package to triton +RUN cd /usr/local/lib/python3.12/dist-packages/ && \ + ls -la | grep pytorch_triton && \ + mv pytorch_triton-3.3.1+gitc8757738.dist-info triton-3.3.1+gitc8757738.dist-info && \ + cd triton-3.3.1+gitc8757738.dist-info && \ + echo "Current directory: $(pwd)" && \ + echo "Files in directory:" && \ + ls -la && \ + sed -i 's/^Name: pytorch-triton/Name: triton/' METADATA && \ + sed -i 's|pytorch_triton-3.3.1+gitc8757738.dist-info/|triton-3.3.1+gitc8757738.dist-info/|g' RECORD && \ + echo "METADATA after update:" && \ + grep "^Name:" METADATA + + FROM ${TRITON_IMAGE}:${TRITON_BASE_TAG} AS triton FROM devel AS tritondevel @@ -90,6 +106,8 @@ COPY --from=triton /opt/tritonserver/lib /opt/tritonserver/lib COPY --from=triton /opt/tritonserver/include /opt/tritonserver/include COPY --from=triton /opt/tritonserver/bin /opt/tritonserver/bin COPY --from=triton /opt/tritonserver/caches /opt/tritonserver/caches +# WAR for the datacenter-gpu-manager 4.4.0 is not available in the apt repository +COPY --from=triton /usr/lib/*-linux-gnu/libdcgm.so* /tmp/ COPY docker/common/install_triton.sh install_triton.sh RUN bash ./install_triton.sh && rm install_triton.sh diff --git a/docker/common/install_base.sh b/docker/common/install_base.sh index 548e9c1b27..1546434692 100644 --- a/docker/common/install_base.sh +++ b/docker/common/install_base.sh @@ -56,6 +56,7 @@ init_ubuntu() { llvm \ libclang-rt-dev \ libffi-dev \ + libstdc++-14-dev \ libnuma1 \ libnuma-dev \ python3-dev \ diff --git a/docker/common/install_cmake.sh b/docker/common/install_cmake.sh index 6bbe955b38..6272cf29dc 100644 --- a/docker/common/install_cmake.sh +++ b/docker/common/install_cmake.sh @@ -3,7 +3,7 @@ set -ex ARCH=$(uname -m) -CMAKE_VERSION="3.30.2" +CMAKE_VERSION="4.0.3" GITHUB_URL="https://github.com" if [ -n "${GITHUB_MIRROR}" ]; then GITHUB_URL=${GITHUB_MIRROR} diff --git a/docker/common/install_cuda_toolkit.sh b/docker/common/install_cuda_toolkit.sh index c257315819..5d3ce166d6 100644 --- a/docker/common/install_cuda_toolkit.sh +++ b/docker/common/install_cuda_toolkit.sh @@ -5,7 +5,7 @@ set -ex # This script is used for reinstalling CUDA on Rocky Linux 8 with the run file. # CUDA version is usually aligned with the latest NGC CUDA image tag. # Only use when public CUDA image is not ready. -CUDA_VER="12.9.1_575.57.08" +CUDA_VER="13.0.0_580.65.06" CUDA_VER_SHORT="${CUDA_VER%_*}" NVCC_VERSION_OUTPUT=$(nvcc --version) diff --git a/docker/common/install_tensorrt.sh b/docker/common/install_tensorrt.sh index e2e3c6218d..bac35c7262 100644 --- a/docker/common/install_tensorrt.sh +++ b/docker/common/install_tensorrt.sh @@ -2,24 +2,23 @@ set -ex -TRT_VER="10.11.0.33" +TRT_VER="10.13.2.6" # Align with the pre-installed cuDNN / cuBLAS / NCCL versions from -# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-06.html#rel-25-06 -CUDA_VER="12.9" # 12.9.1 +# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-08.html#rel-25-08 +CUDA_VER="13.0" # 13.0.0 # Keep the installation for cuDNN if users want to install PyTorch with source codes. # PyTorch 2.x can compile with cuDNN v9. -CUDNN_VER="9.10.2.21-1" -# NGC PyTorch 25.06 image uses NCCL 2.27.3, while NCCL 2.27.5 resolves a perf regression issue. -# Use NCCL version 2.27.5 instead. -NCCL_VER="2.27.5-1+cuda12.9" -# NGC PyTorch 25.06 image uses cuBLAS 12.9.1.4, but which leads to failures with MoE Lora (see https://nvbugs/5376270). -# Continue using cuBLAS 12.9.0.13 until this issue is resolved. -CUBLAS_VER="12.9.0.13-1" +CUDNN_VER="9.12.0.42-1" +# NCCL version 2.26.x used in the NGC PyTorch 25.05 image but has a performance regression issue. +# Use NCCL version 2.27.5 which has the fixes. +NCCL_VER="2.27.6-1+cuda13.0" +# Use cuBLAS version 13.0.0.19 instead. +CUBLAS_VER="13.0.0.19-1" # Align with the pre-installed CUDA / NVCC / NVRTC versions from # https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html -NVRTC_VER="12.9.86-1" -CUDA_RUNTIME="12.9.79-1" -CUDA_DRIVER_VERSION="575.57.08-1.el8" +NVRTC_VER="13.0.48-1" +CUDA_RUNTIME="13.0.37-1" +CUDA_DRIVER_VERSION="580.65.06-1.el8" for i in "$@"; do case $i in @@ -41,39 +40,44 @@ fi install_ubuntu_requirements() { apt-get update && apt-get install -y --no-install-recommends gnupg2 curl ca-certificates ARCH=$(uname -m) - if [ "$ARCH" = "amd64" ];then ARCH="x86_64";fi - if [ "$ARCH" = "aarch64" ];then ARCH="sbsa";fi + ARCH2="amd64" + if [ "$ARCH" = "amd64" ];then ARCH="x86_64" && ARCH2="amd64";fi + if [ "$ARCH" = "aarch64" ];then ARCH="sbsa" && ARCH2="arm64";fi curl -fsSLO https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/${ARCH}/cuda-keyring_1.1-1_all.deb dpkg -i cuda-keyring_1.1-1_all.deb rm cuda-keyring_1.1-1_all.deb - apt-get update - # if [[ $(apt list --installed | grep libcudnn9) ]]; then - # apt-get remove --purge -y libcudnn9* - # fi - # if [[ $(apt list --installed | grep libnccl) ]]; then - # apt-get remove --purge -y --allow-change-held-packages libnccl* - # fi - # if [[ $(apt list --installed | grep libcublas) ]]; then - # apt-get remove --purge -y --allow-change-held-packages libcublas* - # fi - # if [[ $(apt list --installed | grep cuda-nvrtc-dev) ]]; then - # apt-get remove --purge -y --allow-change-held-packages cuda-nvrtc-dev* - # fi + wget https://urm.nvidia.com/artifactory/sw-gpu-cuda-installer-generic-local/packaging/r13.0/cuda_nvrtc/linux-${ARCH}/13.0.48/cuda-nvrtc-dev-13-0_13.0.48-1_${ARCH2}.deb && \ + dpkg -i cuda-nvrtc-dev-13-0_13.0.48-1_${ARCH2}.deb && \ + rm cuda-nvrtc-dev-13-0_13.0.48-1_${ARCH2}.deb - # CUBLAS_CUDA_VERSION=$(echo $CUDA_VER | sed 's/\./-/g') - # NVRTC_CUDA_VERSION=$(echo $CUDA_VER | sed 's/\./-/g') + # apt-get update + # if [[ $(apt list --installed | grep libcudnn9) ]]; then + # apt-get remove --purge -y libcudnn9* + # fi + # if [[ $(apt list --installed | grep libnccl) ]]; then + # apt-get remove --purge -y --allow-change-held-packages libnccl* + # fi + # if [[ $(apt list --installed | grep libcublas) ]]; then + # apt-get remove --purge -y --allow-change-held-packages libcublas* + # fi + # if [[ $(apt list --installed | grep cuda-nvrtc-dev) ]]; then + # apt-get remove --purge -y --allow-change-held-packages cuda-nvrtc-dev* + # fi - # apt-get install -y --no-install-recommends \ - # libcudnn9-cuda-12=${CUDNN_VER} \ - # libcudnn9-dev-cuda-12=${CUDNN_VER} \ - # libcudnn9-headers-cuda-12=${CUDNN_VER} \ - # libnccl2=${NCCL_VER} \ - # libnccl-dev=${NCCL_VER} \ - # libcublas-${CUBLAS_CUDA_VERSION}=${CUBLAS_VER} \ - # libcublas-dev-${CUBLAS_CUDA_VERSION}=${CUBLAS_VER} \ - # cuda-nvrtc-dev-${NVRTC_CUDA_VERSION}=${NVRTC_VER} + # CUBLAS_CUDA_VERSION=$(echo $CUDA_VER | sed 's/\./-/g') + # NVRTC_CUDA_VERSION=$(echo $CUDA_VER | sed 's/\./-/g') + + # apt-get install -y --no-install-recommends \ + # libcudnn9-cuda-13=${CUDNN_VER} \ + # libcudnn9-dev-cuda-13=${CUDNN_VER} \ + # libcudnn9-headers-cuda-13=${CUDNN_VER} \ + # libnccl2=${NCCL_VER} \ + # libnccl-dev=${NCCL_VER} \ + # libcublas-${CUBLAS_CUDA_VERSION}=${CUBLAS_VER} \ + # libcublas-dev-${CUBLAS_CUDA_VERSION}=${CUBLAS_VER} \ + # cuda-nvrtc-dev-${NVRTC_CUDA_VERSION}=${NVRTC_VER} apt-get clean rm -rf /var/lib/apt/lists/* @@ -92,7 +96,7 @@ install_rockylinux_requirements() { "libnccl-devel-${NCCL_VER}.${ARCH1}" \ "cuda-compat-${CUBLAS_CUDA_VERSION}-${CUDA_DRIVER_VERSION}.${ARCH1}" \ "cuda-toolkit-${CUBLAS_CUDA_VERSION}-config-common-${CUDA_RUNTIME}.noarch" \ - "cuda-toolkit-12-config-common-${CUDA_RUNTIME}.noarch" \ + "cuda-toolkit-13-config-common-${CUDA_RUNTIME}.noarch" \ "cuda-toolkit-config-common-${CUDA_RUNTIME}.noarch" \ "libcublas-${CUBLAS_CUDA_VERSION}-${CUBLAS_VER}.${ARCH1}" \ "libcublas-devel-${CUBLAS_CUDA_VERSION}-${CUBLAS_VER}.${ARCH1}"; do @@ -108,7 +112,7 @@ install_rockylinux_requirements() { libnccl-devel-${NCCL_VER}.${ARCH1}.rpm \ cuda-compat-${CUBLAS_CUDA_VERSION}-${CUDA_DRIVER_VERSION}.${ARCH1}.rpm \ cuda-toolkit-${CUBLAS_CUDA_VERSION}-config-common-${CUDA_RUNTIME}.noarch.rpm \ - cuda-toolkit-12-config-common-${CUDA_RUNTIME}.noarch.rpm \ + cuda-toolkit-13-config-common-${CUDA_RUNTIME}.noarch.rpm \ cuda-toolkit-config-common-${CUDA_RUNTIME}.noarch.rpm \ libcublas-${CUBLAS_CUDA_VERSION}-${CUBLAS_VER}.${ARCH1}.rpm \ libcublas-devel-${CUBLAS_CUDA_VERSION}-${CUBLAS_VER}.${ARCH1}.rpm @@ -130,15 +134,16 @@ install_tensorrt() { if [ -z "$ARCH" ];then ARCH=$(uname -m);fi if [ "$ARCH" = "arm64" ];then ARCH="aarch64";fi if [ "$ARCH" = "amd64" ];then ARCH="x86_64";fi - - if [ "$ARCH" = "x86_64" ]; then - RELEASE_URL_TRT="http://cuda-repo/release-candidates/Libraries/TensorRT/v10.14/10.14.0.19-6374d0f7/13.0-r580/Linux-x64-manylinux_2_28/tar/TensorRT-10.14.0.19.Linux.x86_64-gnu.cuda-13.0.tar.gz" - else - RELEASE_URL_TRT="http://cuda-repo/release-candidates/Libraries/TensorRT/v10.14/10.14.0.19-6374d0f7/13.0-r580/Linux-aarch64-manylinux_2_35/tar/TensorRT-10.14.0.19.Ubuntu-22.04.aarch64-gnu.cuda-13.0.tar.gz" - fi + RELEASE_URL_TRT="https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/${TRT_VER_SHORT}/tars/TensorRT-${TRT_VER}.Linux.${ARCH}-gnu.cuda-${TRT_CUDA_VERSION}.tar.gz" fi - wget --no-verbose ${RELEASE_URL_TRT} -O /tmp/TensorRT.tar + # Download TensorRT (6GB file, needs longer timeout) + echo "Downloading TensorRT from: ${RELEASE_URL_TRT}" + if [ "$ARCH" = "x86_64" ];then + curl -L --insecure --connect-timeout 600 --max-time 3600 --retry 3 -o /tmp/TensorRT.tar "${RELEASE_URL_TRT}" + else + wget --no-verbose ${RELEASE_URL_TRT} -O /tmp/TensorRT.tar + fi tar -xf /tmp/TensorRT.tar -C /usr/local/ mv /usr/local/TensorRT-* /usr/local/tensorrt pip3 install --no-cache-dir /usr/local/tensorrt/python/tensorrt-*-cp${PARSED_PY_VERSION}-*.whl @@ -162,7 +167,7 @@ case "$ID" in install_tensorrt ;; rocky) - install_rockylinux_requirements + # install_rockylinux_requirements install_tensorrt ;; *) diff --git a/docker/common/install_triton.sh b/docker/common/install_triton.sh index 0886726fcf..89b6aced03 100644 --- a/docker/common/install_triton.sh +++ b/docker/common/install_triton.sh @@ -21,10 +21,17 @@ install_triton_deps() { python3-build \ libb64-dev \ libarchive-dev \ - datacenter-gpu-manager=1:3.3.6 \ && install_boost \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* + # Copy /tmp/libdcgm.so* files back to /usr/lib/-linux-gnu/ + if [ -d /usr/lib/x86_64-linux-gnu ]; then + cp -f /tmp/libdcgm.so* /usr/lib/x86_64-linux-gnu/ || true + elif [ -d /usr/lib/aarch64-linux-gnu ]; then + cp -f /tmp/libdcgm.so* /usr/lib/aarch64-linux-gnu/ || true + else + echo "Target /usr/lib directory for architecture not found, skipping libdcgm.so* copy" + fi } # Install Triton only if base image is Ubuntu diff --git a/jenkins/L0_Test.groovy b/jenkins/L0_Test.groovy index 9bf77571ba..aa5458b3bd 100644 --- a/jenkins/L0_Test.groovy +++ b/jenkins/L0_Test.groovy @@ -2097,7 +2097,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null) trtllm_utils.llmExecStepWithRetry(pipeline, script: "wget https://developer.download.nvidia.com/compute/cuda/repos/${ubuntu_version}/${platform}/cuda-keyring_1.1-1_all.deb") trtllm_utils.llmExecStepWithRetry(pipeline, script: "dpkg -i cuda-keyring_1.1-1_all.deb") trtllm_utils.llmExecStepWithRetry(pipeline, script: "apt-get update") - trtllm_utils.llmExecStepWithRetry(pipeline, script: "apt-get -y install cuda-toolkit-12-9") + trtllm_utils.llmExecStepWithRetry(pipeline, script: "apt-get -y install cuda-toolkit-13-0") } // Extra PyTorch CUDA 12.8 install for SBSA platform and Blackwell GPUs bare-metal environments diff --git a/jenkins/current_image_tags.properties b/jenkins/current_image_tags.properties index 87a2808834..5774cad038 100644 --- a/jenkins/current_image_tags.properties +++ b/jenkins/current_image_tags.properties @@ -11,7 +11,9 @@ # # NB: Typically, the suffix indicates the PR whose CI pipeline generated the images. In case that # images are adopted from PostMerge pipelines, the abbreviated commit hash is used instead. -LLM_DOCKER_IMAGE=gitlab-master.nvidia.com:5005/xiweny/images:gb110_bringup_x86_64 -LLM_SBSA_DOCKER_IMAGE=gitlab-master.nvidia.com:5005/xiweny/images:gb110_bringup_sbsa +LLM_DOCKER_IMAGE_12_9=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202508051130-6090 +LLM_SBSA_DOCKER_IMAGE_12_9=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202508051130-6090 +LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging/tritondevel:x86_64-tritondevel-torch_skip-a9bc5c5-user_zhanruis_update_dlfw_and_cu13-656 +LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging/tritondevel:sbsa-tritondevel-torch_skip-a9bc5c5-user_zhanruis_update_dlfw_and_cu13-656 LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py310-trt10.11.0.33-skip-tritondevel-202508051130-6090 LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py312-trt10.11.0.33-skip-tritondevel-202508051130-6090 diff --git a/requirements.txt b/requirements.txt index 42c7579352..7aa628d652 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,7 @@ accelerate>=0.25.0 build colored -cuda-python # Do not override the custom version of cuda-python installed in the NGC PyTorch image. +cuda-python>=12,<13 diffusers>=0.27.0 lark mpi4py @@ -26,8 +26,8 @@ tensorrt torch>=2.7.1,<=2.8.0a0 torchvision nvidia-modelopt[torch]~=0.33.0 -nvidia-nccl-cu12 -nvidia-cuda-nvrtc-cu12 +nvidia-nccl-cu13 +nvidia-cuda-nvrtc-cu13 transformers==4.53.1 pydantic>=2.9.1 pydantic-settings[yaml]