mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
infra: upgrade to DLFW 25.08-pre and TRT 10.13.2.4
Signed-off-by: Zhanrui Sun <zhanruis@nvidia.com>
This commit is contained in:
parent
97a3788dcf
commit
ebec4ea5ee
@ -1,46 +1,49 @@
|
||||
ARCH=$(uname -m)
|
||||
if [ $ARCH == "x86_64" ]; then
|
||||
# ARCH=$(uname -m)
|
||||
# if [ $ARCH == "x86_64" ]; then
|
||||
|
||||
wget https://urm.nvidia.com/artifactory/sw-gpu-cuda-installer-generic-local/packaging/r13.0/cuda_nvrtc/linux-x86_64/13.0.48/cuda-nvrtc-dev-13-0_13.0.48-1_amd64.deb && \
|
||||
dpkg -i cuda-nvrtc-dev-13-0_13.0.48-1_amd64.deb && \
|
||||
rm cuda-nvrtc-dev-13-0_13.0.48-1_amd64.deb
|
||||
# wget https://urm.nvidia.com/artifactory/sw-gpu-cuda-installer-generic-local/packaging/r13.0/cuda_nvrtc/linux-x86_64/13.0.48/cuda-nvrtc-dev-13-0_13.0.48-1_amd64.deb && \
|
||||
# dpkg -i cuda-nvrtc-dev-13-0_13.0.48-1_amd64.deb && \
|
||||
# rm cuda-nvrtc-dev-13-0_13.0.48-1_amd64.deb
|
||||
|
||||
wget https://github.com/Kitware/CMake/releases/download/v4.0.3/cmake-4.0.3-linux-x86_64.sh && \
|
||||
bash cmake-4.0.3-linux-x86_64.sh --skip-license --prefix=/usr/local/cmake --exclude-subdir
|
||||
# wget https://github.com/Kitware/CMake/releases/download/v4.0.3/cmake-4.0.3-linux-x86_64.sh && \
|
||||
# bash cmake-4.0.3-linux-x86_64.sh --skip-license --prefix=/usr/local/cmake --exclude-subdir
|
||||
|
||||
apt update
|
||||
apt install -y libstdc++-14-dev
|
||||
# apt update
|
||||
# apt install -y libstdc++-14-dev
|
||||
|
||||
elif [ $ARCH == "aarch64" ]; then
|
||||
# elif [ $ARCH == "aarch64" ]; then
|
||||
|
||||
# to be moved to docker/common/ scripts
|
||||
wget https://urm.nvidia.com/artifactory/sw-gpu-cuda-installer-generic-local/packaging/r13.0/cuda_nvrtc/linux-sbsa/13.0.48/cuda-nvrtc-dev-13-0_13.0.48-1_arm64.deb && \
|
||||
dpkg -i cuda-nvrtc-dev-13-0_13.0.48-1_arm64.deb && \
|
||||
rm cuda-nvrtc-dev-13-0_13.0.48-1_arm64.deb
|
||||
# # to be moved to docker/common/ scripts
|
||||
# wget https://urm.nvidia.com/artifactory/sw-gpu-cuda-installer-generic-local/packaging/r13.0/cuda_nvrtc/linux-sbsa/13.0.48/cuda-nvrtc-dev-13-0_13.0.48-1_arm64.deb && \
|
||||
# dpkg -i cuda-nvrtc-dev-13-0_13.0.48-1_arm64.deb && \
|
||||
# rm cuda-nvrtc-dev-13-0_13.0.48-1_arm64.deb
|
||||
|
||||
wget https://github.com/Kitware/CMake/releases/download/v4.0.3/cmake-4.0.3-linux-aarch64.sh && \
|
||||
bash cmake-4.0.3-linux-aarch64.sh --skip-license --prefix=/usr/local/cmake --exclude-subdir
|
||||
# wget https://github.com/Kitware/CMake/releases/download/v4.0.3/cmake-4.0.3-linux-aarch64.sh && \
|
||||
# bash cmake-4.0.3-linux-aarch64.sh --skip-license --prefix=/usr/local/cmake --exclude-subdir
|
||||
|
||||
apt update
|
||||
# fix LLVM build
|
||||
apt install -y libstdc++-14-dev
|
||||
# apt update
|
||||
# # fix LLVM build
|
||||
# apt install -y libstdc++-14-dev
|
||||
|
||||
else
|
||||
echo "Unsupported architecture: $ARCH"
|
||||
exit 1
|
||||
fi
|
||||
# # wait for https://github.com/NVIDIA/TensorRT-LLM/pull/6588
|
||||
# pip install deep_gemm@git+https://github.com/VALLIS-NERIA/DeepGEMM.git@97d97a20c2ecd53a248ab64242219d780cf822b8 --no-build-isolation
|
||||
|
||||
# wait for new triton to be published
|
||||
cd /usr/local/lib/python3.12/dist-packages/ && \
|
||||
ls -la | grep pytorch_triton && \
|
||||
mv pytorch_triton-3.3.1+gitc8757738.dist-info triton-3.3.1+gitc8757738.dist-info && \
|
||||
cd triton-3.3.1+gitc8757738.dist-info && \
|
||||
echo "Current directory: $(pwd)" && \
|
||||
echo "Files in directory:" && \
|
||||
ls -la && \
|
||||
sed -i 's/^Name: pytorch-triton/Name: triton/' METADATA && \
|
||||
sed -i 's|pytorch_triton-3.3.1+gitc8757738.dist-info/|triton-3.3.1+gitc8757738.dist-info/|g' RECORD && \
|
||||
echo "METADATA after update:" && \
|
||||
grep "^Name:" METADATA
|
||||
# else
|
||||
# echo "Unsupported architecture: $ARCH"
|
||||
# exit 1
|
||||
# fi
|
||||
|
||||
# pip install git+https://github.com/triton-lang/triton.git@main
|
||||
# # wait for new triton to be published
|
||||
# cd /usr/local/lib/python3.12/dist-packages/ && \
|
||||
# ls -la | grep pytorch_triton && \
|
||||
# mv pytorch_triton-3.3.1+gitc8757738.dist-info triton-3.3.1+gitc8757738.dist-info && \
|
||||
# cd triton-3.3.1+gitc8757738.dist-info && \
|
||||
# echo "Current directory: $(pwd)" && \
|
||||
# echo "Files in directory:" && \
|
||||
# ls -la && \
|
||||
# sed -i 's/^Name: pytorch-triton/Name: triton/' METADATA && \
|
||||
# sed -i 's|pytorch_triton-3.3.1+gitc8757738.dist-info/|triton-3.3.1+gitc8757738.dist-info/|g' RECORD && \
|
||||
# echo "METADATA after update:" && \
|
||||
# grep "^Name:" METADATA
|
||||
|
||||
# # pip install git+https://github.com/triton-lang/triton.git@main
|
||||
|
||||
@ -1,8 +1,8 @@
|
||||
# Multi-stage Dockerfile
|
||||
ARG BASE_IMAGE=gitlab-master.nvidia.com:5005/dl/dgx/pytorch
|
||||
ARG TRITON_IMAGE=gitlab-master.nvidia.com:5005/dl/dgx/tritonserver
|
||||
ARG BASE_TAG=25.08-py3.32224057-base
|
||||
ARG TRITON_BASE_TAG=25.08-RC-py3.32078257
|
||||
ARG BASE_TAG=25.08-py3.32674667-devel
|
||||
ARG TRITON_BASE_TAG=25.08-py3.32978230
|
||||
ARG DEVEL_IMAGE=devel
|
||||
|
||||
FROM ${BASE_IMAGE}:${BASE_TAG} AS base
|
||||
@ -74,13 +74,29 @@ ENV PYTORCH_CUDA_ALLOC_CONF="garbage_collection_threshold:0.99999"
|
||||
RUN pip3 uninstall -y opencv && rm -rf /usr/local/lib/python3*/dist-packages/cv2/
|
||||
RUN pip3 install opencv-python-headless --force-reinstall --no-deps --no-cache-dir
|
||||
|
||||
COPY bringup_fix.sh bringup_fix.sh
|
||||
RUN bash ./bringup_fix.sh && rm bringup_fix.sh
|
||||
# COPY bringup_fix.sh bringup_fix.sh
|
||||
# RUN bash ./bringup_fix.sh && rm bringup_fix.sh
|
||||
|
||||
# WARs against security issues inherited from pytorch:25.06
|
||||
# * https://github.com/advisories/GHSA-8qvm-5x2c-j2w7
|
||||
RUN pip3 install --upgrade --no-cache-dir \
|
||||
"protobuf>=4.25.8"
|
||||
|
||||
# wait for new triton to be published
|
||||
# Rename pytorch_triton package to triton
|
||||
RUN cd /usr/local/lib/python3.12/dist-packages/ && \
|
||||
ls -la | grep pytorch_triton && \
|
||||
mv pytorch_triton-3.3.1+gitc8757738.dist-info triton-3.3.1+gitc8757738.dist-info && \
|
||||
cd triton-3.3.1+gitc8757738.dist-info && \
|
||||
echo "Current directory: $(pwd)" && \
|
||||
echo "Files in directory:" && \
|
||||
ls -la && \
|
||||
sed -i 's/^Name: pytorch-triton/Name: triton/' METADATA && \
|
||||
sed -i 's|pytorch_triton-3.3.1+gitc8757738.dist-info/|triton-3.3.1+gitc8757738.dist-info/|g' RECORD && \
|
||||
echo "METADATA after update:" && \
|
||||
grep "^Name:" METADATA
|
||||
|
||||
|
||||
FROM ${TRITON_IMAGE}:${TRITON_BASE_TAG} AS triton
|
||||
|
||||
FROM devel AS tritondevel
|
||||
@ -90,6 +106,8 @@ COPY --from=triton /opt/tritonserver/lib /opt/tritonserver/lib
|
||||
COPY --from=triton /opt/tritonserver/include /opt/tritonserver/include
|
||||
COPY --from=triton /opt/tritonserver/bin /opt/tritonserver/bin
|
||||
COPY --from=triton /opt/tritonserver/caches /opt/tritonserver/caches
|
||||
# WAR for the datacenter-gpu-manager 4.4.0 is not available in the apt repository
|
||||
COPY --from=triton /usr/lib/*-linux-gnu/libdcgm.so* /tmp/
|
||||
COPY docker/common/install_triton.sh install_triton.sh
|
||||
RUN bash ./install_triton.sh && rm install_triton.sh
|
||||
|
||||
|
||||
@ -56,6 +56,7 @@ init_ubuntu() {
|
||||
llvm \
|
||||
libclang-rt-dev \
|
||||
libffi-dev \
|
||||
libstdc++-14-dev \
|
||||
libnuma1 \
|
||||
libnuma-dev \
|
||||
python3-dev \
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
set -ex
|
||||
|
||||
ARCH=$(uname -m)
|
||||
CMAKE_VERSION="3.30.2"
|
||||
CMAKE_VERSION="4.0.3"
|
||||
GITHUB_URL="https://github.com"
|
||||
if [ -n "${GITHUB_MIRROR}" ]; then
|
||||
GITHUB_URL=${GITHUB_MIRROR}
|
||||
|
||||
@ -5,7 +5,7 @@ set -ex
|
||||
# This script is used for reinstalling CUDA on Rocky Linux 8 with the run file.
|
||||
# CUDA version is usually aligned with the latest NGC CUDA image tag.
|
||||
# Only use when public CUDA image is not ready.
|
||||
CUDA_VER="12.9.1_575.57.08"
|
||||
CUDA_VER="13.0.0_580.65.06"
|
||||
CUDA_VER_SHORT="${CUDA_VER%_*}"
|
||||
|
||||
NVCC_VERSION_OUTPUT=$(nvcc --version)
|
||||
|
||||
@ -2,24 +2,23 @@
|
||||
|
||||
set -ex
|
||||
|
||||
TRT_VER="10.11.0.33"
|
||||
TRT_VER="10.13.2.6"
|
||||
# Align with the pre-installed cuDNN / cuBLAS / NCCL versions from
|
||||
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-06.html#rel-25-06
|
||||
CUDA_VER="12.9" # 12.9.1
|
||||
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-08.html#rel-25-08
|
||||
CUDA_VER="13.0" # 13.0.0
|
||||
# Keep the installation for cuDNN if users want to install PyTorch with source codes.
|
||||
# PyTorch 2.x can compile with cuDNN v9.
|
||||
CUDNN_VER="9.10.2.21-1"
|
||||
# NGC PyTorch 25.06 image uses NCCL 2.27.3, while NCCL 2.27.5 resolves a perf regression issue.
|
||||
# Use NCCL version 2.27.5 instead.
|
||||
NCCL_VER="2.27.5-1+cuda12.9"
|
||||
# NGC PyTorch 25.06 image uses cuBLAS 12.9.1.4, but which leads to failures with MoE Lora (see https://nvbugs/5376270).
|
||||
# Continue using cuBLAS 12.9.0.13 until this issue is resolved.
|
||||
CUBLAS_VER="12.9.0.13-1"
|
||||
CUDNN_VER="9.12.0.42-1"
|
||||
# NCCL version 2.26.x used in the NGC PyTorch 25.05 image but has a performance regression issue.
|
||||
# Use NCCL version 2.27.5 which has the fixes.
|
||||
NCCL_VER="2.27.6-1+cuda13.0"
|
||||
# Use cuBLAS version 13.0.0.19 instead.
|
||||
CUBLAS_VER="13.0.0.19-1"
|
||||
# Align with the pre-installed CUDA / NVCC / NVRTC versions from
|
||||
# https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html
|
||||
NVRTC_VER="12.9.86-1"
|
||||
CUDA_RUNTIME="12.9.79-1"
|
||||
CUDA_DRIVER_VERSION="575.57.08-1.el8"
|
||||
NVRTC_VER="13.0.48-1"
|
||||
CUDA_RUNTIME="13.0.37-1"
|
||||
CUDA_DRIVER_VERSION="580.65.06-1.el8"
|
||||
|
||||
for i in "$@"; do
|
||||
case $i in
|
||||
@ -41,39 +40,44 @@ fi
|
||||
install_ubuntu_requirements() {
|
||||
apt-get update && apt-get install -y --no-install-recommends gnupg2 curl ca-certificates
|
||||
ARCH=$(uname -m)
|
||||
if [ "$ARCH" = "amd64" ];then ARCH="x86_64";fi
|
||||
if [ "$ARCH" = "aarch64" ];then ARCH="sbsa";fi
|
||||
ARCH2="amd64"
|
||||
if [ "$ARCH" = "amd64" ];then ARCH="x86_64" && ARCH2="amd64";fi
|
||||
if [ "$ARCH" = "aarch64" ];then ARCH="sbsa" && ARCH2="arm64";fi
|
||||
|
||||
curl -fsSLO https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/${ARCH}/cuda-keyring_1.1-1_all.deb
|
||||
dpkg -i cuda-keyring_1.1-1_all.deb
|
||||
rm cuda-keyring_1.1-1_all.deb
|
||||
|
||||
apt-get update
|
||||
# if [[ $(apt list --installed | grep libcudnn9) ]]; then
|
||||
# apt-get remove --purge -y libcudnn9*
|
||||
# fi
|
||||
# if [[ $(apt list --installed | grep libnccl) ]]; then
|
||||
# apt-get remove --purge -y --allow-change-held-packages libnccl*
|
||||
# fi
|
||||
# if [[ $(apt list --installed | grep libcublas) ]]; then
|
||||
# apt-get remove --purge -y --allow-change-held-packages libcublas*
|
||||
# fi
|
||||
# if [[ $(apt list --installed | grep cuda-nvrtc-dev) ]]; then
|
||||
# apt-get remove --purge -y --allow-change-held-packages cuda-nvrtc-dev*
|
||||
# fi
|
||||
wget https://urm.nvidia.com/artifactory/sw-gpu-cuda-installer-generic-local/packaging/r13.0/cuda_nvrtc/linux-${ARCH}/13.0.48/cuda-nvrtc-dev-13-0_13.0.48-1_${ARCH2}.deb && \
|
||||
dpkg -i cuda-nvrtc-dev-13-0_13.0.48-1_${ARCH2}.deb && \
|
||||
rm cuda-nvrtc-dev-13-0_13.0.48-1_${ARCH2}.deb
|
||||
|
||||
# CUBLAS_CUDA_VERSION=$(echo $CUDA_VER | sed 's/\./-/g')
|
||||
# NVRTC_CUDA_VERSION=$(echo $CUDA_VER | sed 's/\./-/g')
|
||||
# apt-get update
|
||||
# if [[ $(apt list --installed | grep libcudnn9) ]]; then
|
||||
# apt-get remove --purge -y libcudnn9*
|
||||
# fi
|
||||
# if [[ $(apt list --installed | grep libnccl) ]]; then
|
||||
# apt-get remove --purge -y --allow-change-held-packages libnccl*
|
||||
# fi
|
||||
# if [[ $(apt list --installed | grep libcublas) ]]; then
|
||||
# apt-get remove --purge -y --allow-change-held-packages libcublas*
|
||||
# fi
|
||||
# if [[ $(apt list --installed | grep cuda-nvrtc-dev) ]]; then
|
||||
# apt-get remove --purge -y --allow-change-held-packages cuda-nvrtc-dev*
|
||||
# fi
|
||||
|
||||
# apt-get install -y --no-install-recommends \
|
||||
# libcudnn9-cuda-12=${CUDNN_VER} \
|
||||
# libcudnn9-dev-cuda-12=${CUDNN_VER} \
|
||||
# libcudnn9-headers-cuda-12=${CUDNN_VER} \
|
||||
# libnccl2=${NCCL_VER} \
|
||||
# libnccl-dev=${NCCL_VER} \
|
||||
# libcublas-${CUBLAS_CUDA_VERSION}=${CUBLAS_VER} \
|
||||
# libcublas-dev-${CUBLAS_CUDA_VERSION}=${CUBLAS_VER} \
|
||||
# cuda-nvrtc-dev-${NVRTC_CUDA_VERSION}=${NVRTC_VER}
|
||||
# CUBLAS_CUDA_VERSION=$(echo $CUDA_VER | sed 's/\./-/g')
|
||||
# NVRTC_CUDA_VERSION=$(echo $CUDA_VER | sed 's/\./-/g')
|
||||
|
||||
# apt-get install -y --no-install-recommends \
|
||||
# libcudnn9-cuda-13=${CUDNN_VER} \
|
||||
# libcudnn9-dev-cuda-13=${CUDNN_VER} \
|
||||
# libcudnn9-headers-cuda-13=${CUDNN_VER} \
|
||||
# libnccl2=${NCCL_VER} \
|
||||
# libnccl-dev=${NCCL_VER} \
|
||||
# libcublas-${CUBLAS_CUDA_VERSION}=${CUBLAS_VER} \
|
||||
# libcublas-dev-${CUBLAS_CUDA_VERSION}=${CUBLAS_VER} \
|
||||
# cuda-nvrtc-dev-${NVRTC_CUDA_VERSION}=${NVRTC_VER}
|
||||
|
||||
apt-get clean
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
@ -92,7 +96,7 @@ install_rockylinux_requirements() {
|
||||
"libnccl-devel-${NCCL_VER}.${ARCH1}" \
|
||||
"cuda-compat-${CUBLAS_CUDA_VERSION}-${CUDA_DRIVER_VERSION}.${ARCH1}" \
|
||||
"cuda-toolkit-${CUBLAS_CUDA_VERSION}-config-common-${CUDA_RUNTIME}.noarch" \
|
||||
"cuda-toolkit-12-config-common-${CUDA_RUNTIME}.noarch" \
|
||||
"cuda-toolkit-13-config-common-${CUDA_RUNTIME}.noarch" \
|
||||
"cuda-toolkit-config-common-${CUDA_RUNTIME}.noarch" \
|
||||
"libcublas-${CUBLAS_CUDA_VERSION}-${CUBLAS_VER}.${ARCH1}" \
|
||||
"libcublas-devel-${CUBLAS_CUDA_VERSION}-${CUBLAS_VER}.${ARCH1}"; do
|
||||
@ -108,7 +112,7 @@ install_rockylinux_requirements() {
|
||||
libnccl-devel-${NCCL_VER}.${ARCH1}.rpm \
|
||||
cuda-compat-${CUBLAS_CUDA_VERSION}-${CUDA_DRIVER_VERSION}.${ARCH1}.rpm \
|
||||
cuda-toolkit-${CUBLAS_CUDA_VERSION}-config-common-${CUDA_RUNTIME}.noarch.rpm \
|
||||
cuda-toolkit-12-config-common-${CUDA_RUNTIME}.noarch.rpm \
|
||||
cuda-toolkit-13-config-common-${CUDA_RUNTIME}.noarch.rpm \
|
||||
cuda-toolkit-config-common-${CUDA_RUNTIME}.noarch.rpm \
|
||||
libcublas-${CUBLAS_CUDA_VERSION}-${CUBLAS_VER}.${ARCH1}.rpm \
|
||||
libcublas-devel-${CUBLAS_CUDA_VERSION}-${CUBLAS_VER}.${ARCH1}.rpm
|
||||
@ -130,15 +134,16 @@ install_tensorrt() {
|
||||
if [ -z "$ARCH" ];then ARCH=$(uname -m);fi
|
||||
if [ "$ARCH" = "arm64" ];then ARCH="aarch64";fi
|
||||
if [ "$ARCH" = "amd64" ];then ARCH="x86_64";fi
|
||||
|
||||
if [ "$ARCH" = "x86_64" ]; then
|
||||
RELEASE_URL_TRT="http://cuda-repo/release-candidates/Libraries/TensorRT/v10.14/10.14.0.19-6374d0f7/13.0-r580/Linux-x64-manylinux_2_28/tar/TensorRT-10.14.0.19.Linux.x86_64-gnu.cuda-13.0.tar.gz"
|
||||
else
|
||||
RELEASE_URL_TRT="http://cuda-repo/release-candidates/Libraries/TensorRT/v10.14/10.14.0.19-6374d0f7/13.0-r580/Linux-aarch64-manylinux_2_35/tar/TensorRT-10.14.0.19.Ubuntu-22.04.aarch64-gnu.cuda-13.0.tar.gz"
|
||||
fi
|
||||
RELEASE_URL_TRT="https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/${TRT_VER_SHORT}/tars/TensorRT-${TRT_VER}.Linux.${ARCH}-gnu.cuda-${TRT_CUDA_VERSION}.tar.gz"
|
||||
fi
|
||||
|
||||
wget --no-verbose ${RELEASE_URL_TRT} -O /tmp/TensorRT.tar
|
||||
# Download TensorRT (6GB file, needs longer timeout)
|
||||
echo "Downloading TensorRT from: ${RELEASE_URL_TRT}"
|
||||
if [ "$ARCH" = "x86_64" ];then
|
||||
curl -L --insecure --connect-timeout 600 --max-time 3600 --retry 3 -o /tmp/TensorRT.tar "${RELEASE_URL_TRT}"
|
||||
else
|
||||
wget --no-verbose ${RELEASE_URL_TRT} -O /tmp/TensorRT.tar
|
||||
fi
|
||||
tar -xf /tmp/TensorRT.tar -C /usr/local/
|
||||
mv /usr/local/TensorRT-* /usr/local/tensorrt
|
||||
pip3 install --no-cache-dir /usr/local/tensorrt/python/tensorrt-*-cp${PARSED_PY_VERSION}-*.whl
|
||||
@ -162,7 +167,7 @@ case "$ID" in
|
||||
install_tensorrt
|
||||
;;
|
||||
rocky)
|
||||
install_rockylinux_requirements
|
||||
# install_rockylinux_requirements
|
||||
install_tensorrt
|
||||
;;
|
||||
*)
|
||||
|
||||
@ -21,10 +21,17 @@ install_triton_deps() {
|
||||
python3-build \
|
||||
libb64-dev \
|
||||
libarchive-dev \
|
||||
datacenter-gpu-manager=1:3.3.6 \
|
||||
&& install_boost \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
# Copy /tmp/libdcgm.so* files back to /usr/lib/<arch>-linux-gnu/
|
||||
if [ -d /usr/lib/x86_64-linux-gnu ]; then
|
||||
cp -f /tmp/libdcgm.so* /usr/lib/x86_64-linux-gnu/ || true
|
||||
elif [ -d /usr/lib/aarch64-linux-gnu ]; then
|
||||
cp -f /tmp/libdcgm.so* /usr/lib/aarch64-linux-gnu/ || true
|
||||
else
|
||||
echo "Target /usr/lib directory for architecture not found, skipping libdcgm.so* copy"
|
||||
fi
|
||||
}
|
||||
|
||||
# Install Triton only if base image is Ubuntu
|
||||
|
||||
@ -2097,7 +2097,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
|
||||
trtllm_utils.llmExecStepWithRetry(pipeline, script: "wget https://developer.download.nvidia.com/compute/cuda/repos/${ubuntu_version}/${platform}/cuda-keyring_1.1-1_all.deb")
|
||||
trtllm_utils.llmExecStepWithRetry(pipeline, script: "dpkg -i cuda-keyring_1.1-1_all.deb")
|
||||
trtllm_utils.llmExecStepWithRetry(pipeline, script: "apt-get update")
|
||||
trtllm_utils.llmExecStepWithRetry(pipeline, script: "apt-get -y install cuda-toolkit-12-9")
|
||||
trtllm_utils.llmExecStepWithRetry(pipeline, script: "apt-get -y install cuda-toolkit-13-0")
|
||||
}
|
||||
|
||||
// Extra PyTorch CUDA 12.8 install for SBSA platform and Blackwell GPUs bare-metal environments
|
||||
|
||||
@ -11,7 +11,9 @@
|
||||
#
|
||||
# NB: Typically, the suffix indicates the PR whose CI pipeline generated the images. In case that
|
||||
# images are adopted from PostMerge pipelines, the abbreviated commit hash is used instead.
|
||||
LLM_DOCKER_IMAGE=gitlab-master.nvidia.com:5005/xiweny/images:gb110_bringup_x86_64
|
||||
LLM_SBSA_DOCKER_IMAGE=gitlab-master.nvidia.com:5005/xiweny/images:gb110_bringup_sbsa
|
||||
LLM_DOCKER_IMAGE_12_9=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202508051130-6090
|
||||
LLM_SBSA_DOCKER_IMAGE_12_9=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202508051130-6090
|
||||
LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging/tritondevel:x86_64-tritondevel-torch_skip-a9bc5c5-user_zhanruis_update_dlfw_and_cu13-656
|
||||
LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging/tritondevel:sbsa-tritondevel-torch_skip-a9bc5c5-user_zhanruis_update_dlfw_and_cu13-656
|
||||
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py310-trt10.11.0.33-skip-tritondevel-202508051130-6090
|
||||
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py312-trt10.11.0.33-skip-tritondevel-202508051130-6090
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
accelerate>=0.25.0
|
||||
build
|
||||
colored
|
||||
cuda-python # Do not override the custom version of cuda-python installed in the NGC PyTorch image.
|
||||
cuda-python>=12,<13
|
||||
diffusers>=0.27.0
|
||||
lark
|
||||
mpi4py
|
||||
@ -26,8 +26,8 @@ tensorrt
|
||||
torch>=2.7.1,<=2.8.0a0
|
||||
torchvision
|
||||
nvidia-modelopt[torch]~=0.33.0
|
||||
nvidia-nccl-cu12
|
||||
nvidia-cuda-nvrtc-cu12
|
||||
nvidia-nccl-cu13
|
||||
nvidia-cuda-nvrtc-cu13
|
||||
transformers==4.53.1
|
||||
pydantic>=2.9.1
|
||||
pydantic-settings[yaml]
|
||||
|
||||
Loading…
Reference in New Issue
Block a user