infra: Support build for both CU12 and CU13

Signed-off-by: Zhanrui Sun <zhanruis@nvidia.com>
This commit is contained in:
Zhanrui Sun 2025-08-17 22:16:57 -07:00
parent f12a90b752
commit 8c998533af
8 changed files with 114 additions and 62 deletions

View File

@ -84,17 +84,21 @@ RUN pip3 install --upgrade --no-cache-dir \
# wait for new triton to be published
# Rename pytorch_triton package to triton
RUN cd /usr/local/lib/python3.12/dist-packages/ && \
ls -la | grep pytorch_triton && \
mv pytorch_triton-3.3.1+gitc8757738.dist-info triton-3.3.1+gitc8757738.dist-info && \
cd triton-3.3.1+gitc8757738.dist-info && \
echo "Current directory: $(pwd)" && \
echo "Files in directory:" && \
ls -la && \
sed -i 's/^Name: pytorch-triton/Name: triton/' METADATA && \
sed -i 's|pytorch_triton-3.3.1+gitc8757738.dist-info/|triton-3.3.1+gitc8757738.dist-info/|g' RECORD && \
echo "METADATA after update:" && \
grep "^Name:" METADATA
RUN if [ -f /etc/redhat-release ]; then \
echo "Rocky8 detected, skipping symlink and ldconfig steps"; \
else \
cd /usr/local/lib/python3.12/dist-packages/ && \
ls -la | grep pytorch_triton && \
mv pytorch_triton-3.3.1+gitc8757738.dist-info triton-3.3.1+gitc8757738.dist-info && \
cd triton-3.3.1+gitc8757738.dist-info && \
echo "Current directory: $(pwd)" && \
echo "Files in directory:" && \
ls -la && \
sed -i 's/^Name: pytorch-triton/Name: triton/' METADATA && \
sed -i 's|pytorch_triton-3.3.1+gitc8757738.dist-info/|triton-3.3.1+gitc8757738.dist-info/|g' RECORD && \
echo "METADATA after update:" && \
grep "^Name:" METADATA; \
fi
FROM ${TRITON_IMAGE}:${TRITON_BASE_TAG} AS triton

View File

@ -188,16 +188,16 @@ jenkins-rockylinux8_%: PYTHON_VERSION_TAG_ID = $(if $(findstring 3.12,${PYTHON_V
jenkins-rockylinux8_%: IMAGE_WITH_TAG = $(shell . ../jenkins/current_image_tags.properties && echo $$LLM_ROCKYLINUX8_${PYTHON_VERSION_TAG_ID}_DOCKER_IMAGE)
jenkins-rockylinux8_%: STAGE = tritondevel
jenkins-rockylinux8_%: BASE_IMAGE = nvidia/cuda
jenkins-rockylinux8_%: BASE_TAG = 12.9.1-devel-rockylinux8
jenkins-rockylinux8_%: BASE_TAG = 13.0.0-devel-rockylinux8
rockylinux8_%: STAGE = tritondevel
rockylinux8_%: BASE_IMAGE = nvidia/cuda
rockylinux8_%: BASE_TAG = 12.9.1-devel-rockylinux8
rockylinux8_%: BASE_TAG = 13.0.0-devel-rockylinux8
# For x86_64 and aarch64
ubuntu22_%: STAGE = tritondevel
ubuntu22_%: BASE_IMAGE = nvidia/cuda
ubuntu22_%: BASE_TAG = 12.9.1-devel-ubuntu22.04
ubuntu22_%: BASE_TAG = 13.0.0-devel-ubuntu22.04
trtllm_%: STAGE = release
trtllm_%: PUSH_TO_STAGING := 0

View File

@ -8,16 +8,16 @@ TRT_VER="10.13.2.6"
CUDA_VER="13.0" # 13.0.0
# Keep the installation for cuDNN if users want to install PyTorch with source codes.
# PyTorch 2.x can compile with cuDNN v9.
CUDNN_VER="9.12.0.42-1"
CUDNN_VER="9.12.0.46-1"
# NCCL version 2.26.x used in the NGC PyTorch 25.05 image but has a performance regression issue.
# Use NCCL version 2.27.5 which has the fixes.
NCCL_VER="2.27.6-1+cuda13.0"
NCCL_VER="2.27.7-1+cuda13.0"
# Use cuBLAS version 13.0.0.19 instead.
CUBLAS_VER="13.0.0.19-1"
# Align with the pre-installed CUDA / NVCC / NVRTC versions from
# https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html
NVRTC_VER="13.0.48-1"
CUDA_RUNTIME="13.0.37-1"
CUDA_RUNTIME="13.0.48-1"
CUDA_DRIVER_VERSION="580.65.06-1.el8"
for i in "$@"; do
@ -48,36 +48,32 @@ install_ubuntu_requirements() {
dpkg -i cuda-keyring_1.1-1_all.deb
rm cuda-keyring_1.1-1_all.deb
wget https://urm.nvidia.com/artifactory/sw-gpu-cuda-installer-generic-local/packaging/r13.0/cuda_nvrtc/linux-${ARCH}/13.0.48/cuda-nvrtc-dev-13-0_13.0.48-1_${ARCH2}.deb && \
dpkg -i cuda-nvrtc-dev-13-0_13.0.48-1_${ARCH2}.deb && \
rm cuda-nvrtc-dev-13-0_13.0.48-1_${ARCH2}.deb
apt-get update
if [[ $(apt list --installed | grep libcudnn9) ]]; then
apt-get remove --purge -y libcudnn9*
fi
if [[ $(apt list --installed | grep libnccl) ]]; then
apt-get remove --purge -y --allow-change-held-packages libnccl*
fi
if [[ $(apt list --installed | grep libcublas) ]]; then
apt-get remove --purge -y --allow-change-held-packages libcublas*
fi
if [[ $(apt list --installed | grep cuda-nvrtc-dev) ]]; then
apt-get remove --purge -y --allow-change-held-packages cuda-nvrtc-dev*
fi
# apt-get update
# if [[ $(apt list --installed | grep libcudnn9) ]]; then
# apt-get remove --purge -y libcudnn9*
# fi
# if [[ $(apt list --installed | grep libnccl) ]]; then
# apt-get remove --purge -y --allow-change-held-packages libnccl*
# fi
# if [[ $(apt list --installed | grep libcublas) ]]; then
# apt-get remove --purge -y --allow-change-held-packages libcublas*
# fi
# if [[ $(apt list --installed | grep cuda-nvrtc-dev) ]]; then
# apt-get remove --purge -y --allow-change-held-packages cuda-nvrtc-dev*
# fi
CUBLAS_CUDA_VERSION=$(echo $CUDA_VER | sed 's/\./-/g')
NVRTC_CUDA_VERSION=$(echo $CUDA_VER | sed 's/\./-/g')
# CUBLAS_CUDA_VERSION=$(echo $CUDA_VER | sed 's/\./-/g')
# NVRTC_CUDA_VERSION=$(echo $CUDA_VER | sed 's/\./-/g')
# apt-get install -y --no-install-recommends \
# libcudnn9-cuda-13=${CUDNN_VER} \
# libcudnn9-dev-cuda-13=${CUDNN_VER} \
# libcudnn9-headers-cuda-13=${CUDNN_VER} \
# libnccl2=${NCCL_VER} \
# libnccl-dev=${NCCL_VER} \
# libcublas-${CUBLAS_CUDA_VERSION}=${CUBLAS_VER} \
# libcublas-dev-${CUBLAS_CUDA_VERSION}=${CUBLAS_VER} \
# cuda-nvrtc-dev-${NVRTC_CUDA_VERSION}=${NVRTC_VER}
apt-get install -y --no-install-recommends \
libcudnn9-cuda-13=${CUDNN_VER} \
libcudnn9-dev-cuda-13=${CUDNN_VER} \
libcudnn9-headers-cuda-13=${CUDNN_VER} \
libnccl2=${NCCL_VER} \
libnccl-dev=${NCCL_VER} \
libcublas-${CUBLAS_CUDA_VERSION}=${CUBLAS_VER} \
libcublas-dev-${CUBLAS_CUDA_VERSION}=${CUBLAS_VER} \
cuda-nvrtc-dev-${NVRTC_CUDA_VERSION}=${NVRTC_VER}
apt-get clean
rm -rf /var/lib/apt/lists/*
@ -167,7 +163,7 @@ case "$ID" in
install_tensorrt
;;
rocky)
# install_rockylinux_requirements
install_rockylinux_requirements
install_tensorrt
;;
*)

View File

@ -2,7 +2,7 @@
set -ex
GITHUB_URL="https://github.com"
UCX_VERSION="v1.18.1"
UCX_VERSION="v1.19.0"
UCX_INSTALL_PATH="/usr/local/ucx/"
CUDA_PATH="/usr/local/cuda"
UCX_REPO="https://github.com/openucx/ucx.git"

View File

@ -16,6 +16,9 @@ AARCH64_TRIPLE = "aarch64-linux-gnu"
LLM_DOCKER_IMAGE = env.dockerImage
LLM_DOCKER_IMAGE_12_9 = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202508051130-6090"
LLM_SBSA_DOCKER_IMAGE_12_9 = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202508051130-6090"
// Always use x86_64 image for agent
AGENT_IMAGE = env.dockerImage.replace("aarch64", "x86_64")
@ -35,6 +38,9 @@ def WHEEL_ARCHS = "wheelArchs"
@Field
def CONFIG_LINUX_X86_64_VANILLA = "linux_x86_64_Vanilla"
@Field
def CONFIG_LINUX_X86_64_VANILLA_CU12 = "linux_x86_64_Vanilla_CU12"
@Field
def CONFIG_LINUX_X86_64_SINGLE_DEVICE = "linux_x86_64_SingleDevice"
@ -42,7 +48,10 @@ def CONFIG_LINUX_X86_64_SINGLE_DEVICE = "linux_x86_64_SingleDevice"
def CONFIG_LINUX_X86_64_LLVM = "linux_x86_64_LLVM"
@Field
CONFIG_LINUX_AARCH64 = "linux_aarch64"
def CONFIG_LINUX_AARCH64 = "linux_aarch64"
@Field
def CONFIG_LINUX_AARCH64_CU12 = "linux_aarch64_CU12"
@Field
def CONFIG_LINUX_AARCH64_LLVM = "linux_aarch64_LLVM"
@ -62,6 +71,11 @@ def BUILD_CONFIGS = [
(TARNAME) : "TensorRT-LLM.tar.gz",
(WHEEL_ARCHS): "80-real;86-real;89-real;90-real;100-real;120-real",
],
(CONFIG_LINUX_X86_64_VANILLA_CU12) : [
(WHEEL_EXTRA_ARGS) : "--extra-cmake-vars ENABLE_MULTI_DEVICE=1 --extra-cmake-vars WARNING_IS_ERROR=ON --extra-cmake-vars NIXL_ROOT=/opt/nvidia/nvda_nixl --micro_benchmarks",
(TARNAME) : "TensorRT-LLM-CU12.tar.gz",
(WHEEL_ARCHS): "80-real;86-real;89-real;90-real;100-real;120-real",
],
(CONFIG_LINUX_X86_64_NANOBIND) : [
(WHEEL_EXTRA_ARGS) : "--binding_type nanobind --extra-cmake-vars ENABLE_MULTI_DEVICE=1 --extra-cmake-vars WARNING_IS_ERROR=ON --extra-cmake-vars NIXL_ROOT=/opt/nvidia/nvda_nixl --micro_benchmarks",
(TARNAME) : "nanobind-TensorRT-LLM.tar.gz",
@ -82,6 +96,11 @@ def BUILD_CONFIGS = [
(TARNAME) : "TensorRT-LLM-GH200.tar.gz",
(WHEEL_ARCHS): "90-real;100-real;120-real",
],
(CONFIG_LINUX_AARCH64_CU12): [
(WHEEL_EXTRA_ARGS) : "--extra-cmake-vars WARNING_IS_ERROR=ON",
(TARNAME) : "TensorRT-LLM-GH200-CU12.tar.gz",
(WHEEL_ARCHS): "90-real;100-real;120-real",
],
(CONFIG_LINUX_AARCH64_NANOBIND): [
(WHEEL_EXTRA_ARGS) : "--binding_type nanobind --extra-cmake-vars WARNING_IS_ERROR=ON",
(TARNAME) : "nanobind-TensorRT-LLM-GH200.tar.gz",
@ -448,6 +467,9 @@ def runLLMBuild(pipeline, buildFlags, tarName, is_linux_x86_64)
// TODO: Remove after the cmake version is upgraded to 3.31.8
// Get triton tag from docker/dockerfile.multi
def tritonShortTag = "r25.08"
if (tarName.contains("CU12")) {
tritonShortTag = "r25.06"
}
sh "cd ${LLM_ROOT}/triton_backend/inflight_batcher_llm && mkdir build && cd build && cmake .. -DTRTLLM_DIR=${llmPath} -DTRITON_COMMON_REPO_TAG=${tritonShortTag} -DTRITON_CORE_REPO_TAG=${tritonShortTag} -DTRITON_THIRD_PARTY_REPO_TAG=${tritonShortTag} -DTRITON_BACKEND_REPO_TAG=${tritonShortTag} -DUSE_CXX11_ABI=ON && make -j${BUILD_JOBS} install"
// Step 3: packaging wheels into tarfile
@ -537,9 +559,13 @@ def launchStages(pipeline, cpu_arch, enableFailFast, globalVars)
wheelDockerImage = env.dockerImage
}
def LLM_DOCKER_IMAGE_CU12 = cpu_arch == AARCH64_TRIPLE ? LLM_SBSA_DOCKER_IMAGE_12_9 : LLM_DOCKER_IMAGE_12_9
buildConfigs = [
"Build TRT-LLM": [LLM_DOCKER_IMAGE] + prepareLLMBuild(
pipeline, cpu_arch == AARCH64_TRIPLE ? CONFIG_LINUX_AARCH64 : CONFIG_LINUX_X86_64_VANILLA),
"Build TRT-LLM CUDA12": [LLM_DOCKER_IMAGE_CU12] + prepareLLMBuild(
pipeline, cpu_arch == AARCH64_TRIPLE ? CONFIG_LINUX_AARCH64_CU12 : CONFIG_LINUX_X86_64_VANILLA_CU12),
"Build TRT-LLM LLVM": [LLM_DOCKER_IMAGE] + prepareLLMBuild(
pipeline, cpu_arch == AARCH64_TRIPLE ? CONFIG_LINUX_AARCH64_LLVM : CONFIG_LINUX_X86_64_LLVM),
"Build TRT-LLM Nanobind": [LLM_DOCKER_IMAGE] + prepareLLMBuild(

View File

@ -38,6 +38,9 @@ LLM_DOCKER_IMAGE = env.dockerImage
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE = env.wheelDockerImagePy310
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE = env.wheelDockerImagePy312
LLM_DOCKER_IMAGE_12_9 = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202508051130-6090"
LLM_SBSA_DOCKER_IMAGE_12_9 = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202508051130-6090"
// DLFW torch image
DLFW_IMAGE = "nvcr.io/nvidia/pytorch:25.06-py3"
@ -55,6 +58,9 @@ def TARNAME = "tarName"
@Field
def VANILLA_CONFIG = "Vanilla"
@Field
def VANILLA_CONFIG_CU12 = "Vanilla_CU12"
@Field
def SINGLE_DEVICE_CONFIG = "SingleDevice"
@ -62,7 +68,10 @@ def SINGLE_DEVICE_CONFIG = "SingleDevice"
def LLVM_CONFIG = "LLVM"
@Field
LINUX_AARCH64_CONFIG = "linux_aarch64"
def LINUX_AARCH64_CONFIG = "linux_aarch64"
@Field
def LINUX_AARCH64_CONFIG_CU12 = "linux_aarch64_CU12"
@Field
def NANOBIND_CONFIG = "Nanobind"
@ -71,9 +80,11 @@ def NANOBIND_CONFIG = "Nanobind"
def BUILD_CONFIGS = [
// Vanilla TARNAME is used for packaging in runLLMPackage
(VANILLA_CONFIG) : [(TARNAME) : "TensorRT-LLM.tar.gz"],
(VANILLA_CONFIG_CU12) : [(TARNAME) : "TensorRT-LLM-CU12.tar.gz"],
(SINGLE_DEVICE_CONFIG) : [(TARNAME) : "single-device-TensorRT-LLM.tar.gz"],
(LLVM_CONFIG) : [(TARNAME) : "llvm-TensorRT-LLM.tar.gz"],
(LINUX_AARCH64_CONFIG) : [(TARNAME) : "TensorRT-LLM-GH200.tar.gz"],
(LINUX_AARCH64_CONFIG_CU12) : [(TARNAME) : "TensorRT-LLM-GH200-CU12.tar.gz"],
(NANOBIND_CONFIG) : [(TARNAME) : "nanobind-TensorRT-LLM.tar.gz"],
]
@ -1310,6 +1321,9 @@ def runLLMTestlistOnPlatformImpl(pipeline, platform, testList, config=VANILLA_CO
if (env.alternativeTRT) {
sh "cd ${llmSrc} && sed -i 's#tensorrt~=.*\$#tensorrt#g' requirements.txt && cat requirements.txt"
}
if (stageName.contains("-CU12")) {
trtllm_utils.llmExecStepWithRetry(pipeline, script: "cd ${llmSrc} && sed -i 's/-cu13/-cu12/g' requirements.txt && cat requirements.txt")
}
trtllm_utils.llmExecStepWithRetry(pipeline, script: "cd ${llmSrc} && pip3 install --retries 1 -r requirements-dev.txt")
if (!skipInstallWheel) {
trtllm_utils.llmExecStepWithRetry(pipeline, script: "cd ${llmPath} && pip3 install --force-reinstall --no-deps TensorRT-LLM/tensorrt_llm-*.whl")
@ -1783,7 +1797,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
"A100X-PyTorch-1": ["a100x", "l0_a100", 1, 1],
"L40S-PyTorch-1": ["l40s", "l0_l40s", 1, 2],
"L40S-PyTorch-2": ["l40s", "l0_l40s", 2, 2],
"H100_PCIe-PyTorch-1": ["h100-cr", "l0_h100", 1, 3],
"H100_PCIe-PyTorch-CU12-1": ["h100-cr", "l0_h100", 1, 3],
"H100_PCIe-PyTorch-2": ["h100-cr", "l0_h100", 2, 3],
"H100_PCIe-PyTorch-3": ["h100-cr", "l0_h100", 3, 3],
"H100_PCIe-CPP-1": ["h100-cr", "l0_h100", 1, 2],
@ -1842,7 +1856,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
"DGX_H200-4_GPUs-TensorRT-Post-Merge-3": ["dgx-h200-x4", "l0_dgx_h200", 3, 3, 4],
]
parallelJobs = x86TestConfigs.collectEntries{key, values -> [key, [createKubernetesPodConfig(LLM_DOCKER_IMAGE, values[0], "amd64", values[4] ?: 1, key.contains("Perf")), {
parallelJobs = x86TestConfigs.collectEntries{key, values -> [key, [createKubernetesPodConfig(key.contains("-CU12-") ? LLM_DOCKER_IMAGE_12_9 : LLM_DOCKER_IMAGE, values[0], "amd64", values[4] ?: 1, key.contains("Perf")), {
def config = VANILLA_CONFIG
if (key.contains("single-device")) {
config = SINGLE_DEVICE_CONFIG
@ -1853,6 +1867,9 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
if (key.contains("Nanobind")) {
config = NANOBIND_CONFIG
}
if (key.contains("-CU12-")) {
config = VANILLA_CONFIG_CU12
}
runLLMTestlistOnPlatform(pipeline, values[0], values[1], config, key.contains("Perf"), key, values[2], values[3])
}]]}
fullSet = parallelJobs.keySet()
@ -1871,6 +1888,9 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
if (key.contains("llvm")) {
config = LLVM_CONFIG
}
if (key.contains("-CU12-")) {
config = VANILLA_CONFIG_CU12
}
runLLMTestlistOnSlurm(pipeline, values[0], values[1], config, key.contains("Perf"), key, values[2], values[3], values[4] ?: 1)
}]]}
@ -1902,7 +1922,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
fullSet += multiNodesSBSAConfigs.keySet()
if (env.targetArch == AARCH64_TRIPLE) {
parallelJobs = SBSATestConfigs.collectEntries{key, values -> [key, [createKubernetesPodConfig(LLM_DOCKER_IMAGE, values[0], "arm64"), {
parallelJobs = SBSATestConfigs.collectEntries{key, values -> [key, [createKubernetesPodConfig(key.contains("-CU12-") ? LLM_SBSA_DOCKER_IMAGE_12_9 : LLM_DOCKER_IMAGE, values[0], "arm64"), {
runLLMTestlistOnPlatform(pipeline, values[0], values[1], LINUX_AARCH64_CONFIG, false, key, values[2], values[3])
}]]}
@ -1987,7 +2007,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
aarch64SanityCheckConfigs = [
"PY312-UB2404": [
LLM_DOCKER_IMAGE,
LLM_SBSA_DOCKER_IMAGE_12_9,
"GH200",
AARCH64_TRIPLE,
false,
@ -1996,7 +2016,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
true, // Extra PyTorch CUDA 12.8 install
],
"PY312-DLFW": [
LLM_DOCKER_IMAGE,
LLM_SBSA_DOCKER_IMAGE_12_9,
"GH200",
AARCH64_TRIPLE,
false,
@ -2097,9 +2117,11 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
trtllm_utils.llmExecStepWithRetry(pipeline, script: "wget https://developer.download.nvidia.com/compute/cuda/repos/${ubuntu_version}/${platform}/cuda-keyring_1.1-1_all.deb")
trtllm_utils.llmExecStepWithRetry(pipeline, script: "dpkg -i cuda-keyring_1.1-1_all.deb")
trtllm_utils.llmExecStepWithRetry(pipeline, script: "apt-get update")
trtllm_utils.llmExecStepWithRetry(pipeline, script: "apt-get -y install cuda-toolkit-13-0")
}
trtllm_utils.llmExecStepWithRetry(pipeline, script: "apt-get -y install cuda-toolkit-12-9")
trtllm_utils.llmExecStepWithRetry(pipeline, script: "pip3 install 'cuda-python>=12,<13' 'nvidia-ml-py>=12,<13'")
}
trtllm_utils.llmExecStepWithRetry(pipeline, script: "sed -i 's/-cu13/-cu12/g' ${LLM_ROOT}/requirements.txt")
// Extra PyTorch CUDA 12.8 install for SBSA platform and Blackwell GPUs bare-metal environments
if (values[6]) {
echo "###### Extra PyTorch CUDA 12.8 install Start ######"
@ -2132,9 +2154,9 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
checkPipInstall(pipeline, "${cpu_arch}/${wheelPath}")
}
echo "###### Run LLMAPI tests Start ######"
def config = VANILLA_CONFIG
def config = VANILLA_CONFIG_CU12
if (cpu_arch == AARCH64_TRIPLE) {
config = LINUX_AARCH64_CONFIG
config = LINUX_AARCH64_CONFIG_CU12
}
withEnv(libEnv) {
sh "env | sort"

View File

@ -13,7 +13,11 @@
# images are adopted from PostMerge pipelines, the abbreviated commit hash is used instead.
LLM_DOCKER_IMAGE_12_9=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202508051130-6090
LLM_SBSA_DOCKER_IMAGE_12_9=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202508051130-6090
LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging/tritondevel:x86_64-tritondevel-torch_skip-a9bc5c5-user_zhanruis_update_dlfw_and_cu13-656
LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging/tritondevel:sbsa-tritondevel-torch_skip-a9bc5c5-user_zhanruis_update_dlfw_and_cu13-656
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE_12_9=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py310-trt10.11.0.33-skip-tritondevel-202508051130-6090
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE_12_9=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py312-trt10.11.0.33-skip-tritondevel-202508051130-6090
LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.08-py3-x86_64-ubuntu24.04-trt10.13.2.6-skip-tritondevel-202508151730-pre-test
LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.08-py3-aarch64-ubuntu24.04-trt10.13.2.6-skip-tritondevel-202508151730-pre-test
#LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.0-devel-rocky8-x86_64-rocky8-py310-trt10.13.2.6-skip-tritondevel-202508151730-pre-test
#LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.0-devel-rocky8-x86_64-rocky8-py312-trt10.13.2.6-skip-tritondevel-202508151730-pre-test
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py310-trt10.11.0.33-skip-tritondevel-202508051130-6090
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py312-trt10.11.0.33-skip-tritondevel-202508051130-6090

View File

@ -3,7 +3,7 @@
accelerate>=0.25.0
build
colored
cuda-python
cuda-python>=12
diffusers>=0.27.0
lark
mpi4py
@ -13,7 +13,7 @@ onnx_graphsurgeon>=0.5.2
openai
polygraphy
psutil
nvidia-ml-py>=12,<13
nvidia-ml-py>=12
# Just a wrapper since nvidia-modelopt requires pynvml
pynvml==12.0.0
pulp