From 8c998533affb1d4c87f60356fd9f05db07e59bd6 Mon Sep 17 00:00:00 2001 From: Zhanrui Sun Date: Sun, 17 Aug 2025 22:16:57 -0700 Subject: [PATCH] infra: Support build for both CU12 and CU13 Signed-off-by: Zhanrui Sun --- docker/Dockerfile.multi | 26 +++++++----- docker/Makefile | 6 +-- docker/common/install_tensorrt.sh | 60 +++++++++++++-------------- docker/common/install_ucx.sh | 2 +- jenkins/Build.groovy | 28 ++++++++++++- jenkins/L0_Test.groovy | 42 ++++++++++++++----- jenkins/current_image_tags.properties | 8 +++- requirements.txt | 4 +- 8 files changed, 114 insertions(+), 62 deletions(-) diff --git a/docker/Dockerfile.multi b/docker/Dockerfile.multi index ed66d49235..aba824f7ba 100644 --- a/docker/Dockerfile.multi +++ b/docker/Dockerfile.multi @@ -84,17 +84,21 @@ RUN pip3 install --upgrade --no-cache-dir \ # wait for new triton to be published # Rename pytorch_triton package to triton -RUN cd /usr/local/lib/python3.12/dist-packages/ && \ - ls -la | grep pytorch_triton && \ - mv pytorch_triton-3.3.1+gitc8757738.dist-info triton-3.3.1+gitc8757738.dist-info && \ - cd triton-3.3.1+gitc8757738.dist-info && \ - echo "Current directory: $(pwd)" && \ - echo "Files in directory:" && \ - ls -la && \ - sed -i 's/^Name: pytorch-triton/Name: triton/' METADATA && \ - sed -i 's|pytorch_triton-3.3.1+gitc8757738.dist-info/|triton-3.3.1+gitc8757738.dist-info/|g' RECORD && \ - echo "METADATA after update:" && \ - grep "^Name:" METADATA +RUN if [ -f /etc/redhat-release ]; then \ + echo "Rocky8 detected, skipping symlink and ldconfig steps"; \ + else \ + cd /usr/local/lib/python3.12/dist-packages/ && \ + ls -la | grep pytorch_triton && \ + mv pytorch_triton-3.3.1+gitc8757738.dist-info triton-3.3.1+gitc8757738.dist-info && \ + cd triton-3.3.1+gitc8757738.dist-info && \ + echo "Current directory: $(pwd)" && \ + echo "Files in directory:" && \ + ls -la && \ + sed -i 's/^Name: pytorch-triton/Name: triton/' METADATA && \ + sed -i 's|pytorch_triton-3.3.1+gitc8757738.dist-info/|triton-3.3.1+gitc8757738.dist-info/|g' RECORD && \ + echo "METADATA after update:" && \ + grep "^Name:" METADATA; \ + fi FROM ${TRITON_IMAGE}:${TRITON_BASE_TAG} AS triton diff --git a/docker/Makefile b/docker/Makefile index 8432710af4..b70763f09c 100644 --- a/docker/Makefile +++ b/docker/Makefile @@ -188,16 +188,16 @@ jenkins-rockylinux8_%: PYTHON_VERSION_TAG_ID = $(if $(findstring 3.12,${PYTHON_V jenkins-rockylinux8_%: IMAGE_WITH_TAG = $(shell . ../jenkins/current_image_tags.properties && echo $$LLM_ROCKYLINUX8_${PYTHON_VERSION_TAG_ID}_DOCKER_IMAGE) jenkins-rockylinux8_%: STAGE = tritondevel jenkins-rockylinux8_%: BASE_IMAGE = nvidia/cuda -jenkins-rockylinux8_%: BASE_TAG = 12.9.1-devel-rockylinux8 +jenkins-rockylinux8_%: BASE_TAG = 13.0.0-devel-rockylinux8 rockylinux8_%: STAGE = tritondevel rockylinux8_%: BASE_IMAGE = nvidia/cuda -rockylinux8_%: BASE_TAG = 12.9.1-devel-rockylinux8 +rockylinux8_%: BASE_TAG = 13.0.0-devel-rockylinux8 # For x86_64 and aarch64 ubuntu22_%: STAGE = tritondevel ubuntu22_%: BASE_IMAGE = nvidia/cuda -ubuntu22_%: BASE_TAG = 12.9.1-devel-ubuntu22.04 +ubuntu22_%: BASE_TAG = 13.0.0-devel-ubuntu22.04 trtllm_%: STAGE = release trtllm_%: PUSH_TO_STAGING := 0 diff --git a/docker/common/install_tensorrt.sh b/docker/common/install_tensorrt.sh index bac35c7262..1ee819bb52 100644 --- a/docker/common/install_tensorrt.sh +++ b/docker/common/install_tensorrt.sh @@ -8,16 +8,16 @@ TRT_VER="10.13.2.6" CUDA_VER="13.0" # 13.0.0 # Keep the installation for cuDNN if users want to install PyTorch with source codes. # PyTorch 2.x can compile with cuDNN v9. -CUDNN_VER="9.12.0.42-1" +CUDNN_VER="9.12.0.46-1" # NCCL version 2.26.x used in the NGC PyTorch 25.05 image but has a performance regression issue. # Use NCCL version 2.27.5 which has the fixes. -NCCL_VER="2.27.6-1+cuda13.0" +NCCL_VER="2.27.7-1+cuda13.0" # Use cuBLAS version 13.0.0.19 instead. CUBLAS_VER="13.0.0.19-1" # Align with the pre-installed CUDA / NVCC / NVRTC versions from # https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html NVRTC_VER="13.0.48-1" -CUDA_RUNTIME="13.0.37-1" +CUDA_RUNTIME="13.0.48-1" CUDA_DRIVER_VERSION="580.65.06-1.el8" for i in "$@"; do @@ -48,36 +48,32 @@ install_ubuntu_requirements() { dpkg -i cuda-keyring_1.1-1_all.deb rm cuda-keyring_1.1-1_all.deb - wget https://urm.nvidia.com/artifactory/sw-gpu-cuda-installer-generic-local/packaging/r13.0/cuda_nvrtc/linux-${ARCH}/13.0.48/cuda-nvrtc-dev-13-0_13.0.48-1_${ARCH2}.deb && \ - dpkg -i cuda-nvrtc-dev-13-0_13.0.48-1_${ARCH2}.deb && \ - rm cuda-nvrtc-dev-13-0_13.0.48-1_${ARCH2}.deb + apt-get update + if [[ $(apt list --installed | grep libcudnn9) ]]; then + apt-get remove --purge -y libcudnn9* + fi + if [[ $(apt list --installed | grep libnccl) ]]; then + apt-get remove --purge -y --allow-change-held-packages libnccl* + fi + if [[ $(apt list --installed | grep libcublas) ]]; then + apt-get remove --purge -y --allow-change-held-packages libcublas* + fi + if [[ $(apt list --installed | grep cuda-nvrtc-dev) ]]; then + apt-get remove --purge -y --allow-change-held-packages cuda-nvrtc-dev* + fi - # apt-get update - # if [[ $(apt list --installed | grep libcudnn9) ]]; then - # apt-get remove --purge -y libcudnn9* - # fi - # if [[ $(apt list --installed | grep libnccl) ]]; then - # apt-get remove --purge -y --allow-change-held-packages libnccl* - # fi - # if [[ $(apt list --installed | grep libcublas) ]]; then - # apt-get remove --purge -y --allow-change-held-packages libcublas* - # fi - # if [[ $(apt list --installed | grep cuda-nvrtc-dev) ]]; then - # apt-get remove --purge -y --allow-change-held-packages cuda-nvrtc-dev* - # fi + CUBLAS_CUDA_VERSION=$(echo $CUDA_VER | sed 's/\./-/g') + NVRTC_CUDA_VERSION=$(echo $CUDA_VER | sed 's/\./-/g') - # CUBLAS_CUDA_VERSION=$(echo $CUDA_VER | sed 's/\./-/g') - # NVRTC_CUDA_VERSION=$(echo $CUDA_VER | sed 's/\./-/g') - - # apt-get install -y --no-install-recommends \ - # libcudnn9-cuda-13=${CUDNN_VER} \ - # libcudnn9-dev-cuda-13=${CUDNN_VER} \ - # libcudnn9-headers-cuda-13=${CUDNN_VER} \ - # libnccl2=${NCCL_VER} \ - # libnccl-dev=${NCCL_VER} \ - # libcublas-${CUBLAS_CUDA_VERSION}=${CUBLAS_VER} \ - # libcublas-dev-${CUBLAS_CUDA_VERSION}=${CUBLAS_VER} \ - # cuda-nvrtc-dev-${NVRTC_CUDA_VERSION}=${NVRTC_VER} + apt-get install -y --no-install-recommends \ + libcudnn9-cuda-13=${CUDNN_VER} \ + libcudnn9-dev-cuda-13=${CUDNN_VER} \ + libcudnn9-headers-cuda-13=${CUDNN_VER} \ + libnccl2=${NCCL_VER} \ + libnccl-dev=${NCCL_VER} \ + libcublas-${CUBLAS_CUDA_VERSION}=${CUBLAS_VER} \ + libcublas-dev-${CUBLAS_CUDA_VERSION}=${CUBLAS_VER} \ + cuda-nvrtc-dev-${NVRTC_CUDA_VERSION}=${NVRTC_VER} apt-get clean rm -rf /var/lib/apt/lists/* @@ -167,7 +163,7 @@ case "$ID" in install_tensorrt ;; rocky) - # install_rockylinux_requirements + install_rockylinux_requirements install_tensorrt ;; *) diff --git a/docker/common/install_ucx.sh b/docker/common/install_ucx.sh index 22f444d974..613ac1c773 100644 --- a/docker/common/install_ucx.sh +++ b/docker/common/install_ucx.sh @@ -2,7 +2,7 @@ set -ex GITHUB_URL="https://github.com" -UCX_VERSION="v1.18.1" +UCX_VERSION="v1.19.0" UCX_INSTALL_PATH="/usr/local/ucx/" CUDA_PATH="/usr/local/cuda" UCX_REPO="https://github.com/openucx/ucx.git" diff --git a/jenkins/Build.groovy b/jenkins/Build.groovy index dbd3cc3ec7..b7ff896665 100644 --- a/jenkins/Build.groovy +++ b/jenkins/Build.groovy @@ -16,6 +16,9 @@ AARCH64_TRIPLE = "aarch64-linux-gnu" LLM_DOCKER_IMAGE = env.dockerImage +LLM_DOCKER_IMAGE_12_9 = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202508051130-6090" +LLM_SBSA_DOCKER_IMAGE_12_9 = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202508051130-6090" + // Always use x86_64 image for agent AGENT_IMAGE = env.dockerImage.replace("aarch64", "x86_64") @@ -35,6 +38,9 @@ def WHEEL_ARCHS = "wheelArchs" @Field def CONFIG_LINUX_X86_64_VANILLA = "linux_x86_64_Vanilla" +@Field +def CONFIG_LINUX_X86_64_VANILLA_CU12 = "linux_x86_64_Vanilla_CU12" + @Field def CONFIG_LINUX_X86_64_SINGLE_DEVICE = "linux_x86_64_SingleDevice" @@ -42,7 +48,10 @@ def CONFIG_LINUX_X86_64_SINGLE_DEVICE = "linux_x86_64_SingleDevice" def CONFIG_LINUX_X86_64_LLVM = "linux_x86_64_LLVM" @Field -CONFIG_LINUX_AARCH64 = "linux_aarch64" +def CONFIG_LINUX_AARCH64 = "linux_aarch64" + +@Field +def CONFIG_LINUX_AARCH64_CU12 = "linux_aarch64_CU12" @Field def CONFIG_LINUX_AARCH64_LLVM = "linux_aarch64_LLVM" @@ -62,6 +71,11 @@ def BUILD_CONFIGS = [ (TARNAME) : "TensorRT-LLM.tar.gz", (WHEEL_ARCHS): "80-real;86-real;89-real;90-real;100-real;120-real", ], + (CONFIG_LINUX_X86_64_VANILLA_CU12) : [ + (WHEEL_EXTRA_ARGS) : "--extra-cmake-vars ENABLE_MULTI_DEVICE=1 --extra-cmake-vars WARNING_IS_ERROR=ON --extra-cmake-vars NIXL_ROOT=/opt/nvidia/nvda_nixl --micro_benchmarks", + (TARNAME) : "TensorRT-LLM-CU12.tar.gz", + (WHEEL_ARCHS): "80-real;86-real;89-real;90-real;100-real;120-real", + ], (CONFIG_LINUX_X86_64_NANOBIND) : [ (WHEEL_EXTRA_ARGS) : "--binding_type nanobind --extra-cmake-vars ENABLE_MULTI_DEVICE=1 --extra-cmake-vars WARNING_IS_ERROR=ON --extra-cmake-vars NIXL_ROOT=/opt/nvidia/nvda_nixl --micro_benchmarks", (TARNAME) : "nanobind-TensorRT-LLM.tar.gz", @@ -82,6 +96,11 @@ def BUILD_CONFIGS = [ (TARNAME) : "TensorRT-LLM-GH200.tar.gz", (WHEEL_ARCHS): "90-real;100-real;120-real", ], + (CONFIG_LINUX_AARCH64_CU12): [ + (WHEEL_EXTRA_ARGS) : "--extra-cmake-vars WARNING_IS_ERROR=ON", + (TARNAME) : "TensorRT-LLM-GH200-CU12.tar.gz", + (WHEEL_ARCHS): "90-real;100-real;120-real", + ], (CONFIG_LINUX_AARCH64_NANOBIND): [ (WHEEL_EXTRA_ARGS) : "--binding_type nanobind --extra-cmake-vars WARNING_IS_ERROR=ON", (TARNAME) : "nanobind-TensorRT-LLM-GH200.tar.gz", @@ -448,6 +467,9 @@ def runLLMBuild(pipeline, buildFlags, tarName, is_linux_x86_64) // TODO: Remove after the cmake version is upgraded to 3.31.8 // Get triton tag from docker/dockerfile.multi def tritonShortTag = "r25.08" + if (tarName.contains("CU12")) { + tritonShortTag = "r25.06" + } sh "cd ${LLM_ROOT}/triton_backend/inflight_batcher_llm && mkdir build && cd build && cmake .. -DTRTLLM_DIR=${llmPath} -DTRITON_COMMON_REPO_TAG=${tritonShortTag} -DTRITON_CORE_REPO_TAG=${tritonShortTag} -DTRITON_THIRD_PARTY_REPO_TAG=${tritonShortTag} -DTRITON_BACKEND_REPO_TAG=${tritonShortTag} -DUSE_CXX11_ABI=ON && make -j${BUILD_JOBS} install" // Step 3: packaging wheels into tarfile @@ -537,9 +559,13 @@ def launchStages(pipeline, cpu_arch, enableFailFast, globalVars) wheelDockerImage = env.dockerImage } + def LLM_DOCKER_IMAGE_CU12 = cpu_arch == AARCH64_TRIPLE ? LLM_SBSA_DOCKER_IMAGE_12_9 : LLM_DOCKER_IMAGE_12_9 + buildConfigs = [ "Build TRT-LLM": [LLM_DOCKER_IMAGE] + prepareLLMBuild( pipeline, cpu_arch == AARCH64_TRIPLE ? CONFIG_LINUX_AARCH64 : CONFIG_LINUX_X86_64_VANILLA), + "Build TRT-LLM CUDA12": [LLM_DOCKER_IMAGE_CU12] + prepareLLMBuild( + pipeline, cpu_arch == AARCH64_TRIPLE ? CONFIG_LINUX_AARCH64_CU12 : CONFIG_LINUX_X86_64_VANILLA_CU12), "Build TRT-LLM LLVM": [LLM_DOCKER_IMAGE] + prepareLLMBuild( pipeline, cpu_arch == AARCH64_TRIPLE ? CONFIG_LINUX_AARCH64_LLVM : CONFIG_LINUX_X86_64_LLVM), "Build TRT-LLM Nanobind": [LLM_DOCKER_IMAGE] + prepareLLMBuild( diff --git a/jenkins/L0_Test.groovy b/jenkins/L0_Test.groovy index ae995274aa..16b8029aa7 100644 --- a/jenkins/L0_Test.groovy +++ b/jenkins/L0_Test.groovy @@ -38,6 +38,9 @@ LLM_DOCKER_IMAGE = env.dockerImage LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE = env.wheelDockerImagePy310 LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE = env.wheelDockerImagePy312 +LLM_DOCKER_IMAGE_12_9 = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202508051130-6090" +LLM_SBSA_DOCKER_IMAGE_12_9 = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202508051130-6090" + // DLFW torch image DLFW_IMAGE = "nvcr.io/nvidia/pytorch:25.06-py3" @@ -55,6 +58,9 @@ def TARNAME = "tarName" @Field def VANILLA_CONFIG = "Vanilla" +@Field +def VANILLA_CONFIG_CU12 = "Vanilla_CU12" + @Field def SINGLE_DEVICE_CONFIG = "SingleDevice" @@ -62,7 +68,10 @@ def SINGLE_DEVICE_CONFIG = "SingleDevice" def LLVM_CONFIG = "LLVM" @Field -LINUX_AARCH64_CONFIG = "linux_aarch64" +def LINUX_AARCH64_CONFIG = "linux_aarch64" + +@Field +def LINUX_AARCH64_CONFIG_CU12 = "linux_aarch64_CU12" @Field def NANOBIND_CONFIG = "Nanobind" @@ -71,9 +80,11 @@ def NANOBIND_CONFIG = "Nanobind" def BUILD_CONFIGS = [ // Vanilla TARNAME is used for packaging in runLLMPackage (VANILLA_CONFIG) : [(TARNAME) : "TensorRT-LLM.tar.gz"], + (VANILLA_CONFIG_CU12) : [(TARNAME) : "TensorRT-LLM-CU12.tar.gz"], (SINGLE_DEVICE_CONFIG) : [(TARNAME) : "single-device-TensorRT-LLM.tar.gz"], (LLVM_CONFIG) : [(TARNAME) : "llvm-TensorRT-LLM.tar.gz"], (LINUX_AARCH64_CONFIG) : [(TARNAME) : "TensorRT-LLM-GH200.tar.gz"], + (LINUX_AARCH64_CONFIG_CU12) : [(TARNAME) : "TensorRT-LLM-GH200-CU12.tar.gz"], (NANOBIND_CONFIG) : [(TARNAME) : "nanobind-TensorRT-LLM.tar.gz"], ] @@ -1310,6 +1321,9 @@ def runLLMTestlistOnPlatformImpl(pipeline, platform, testList, config=VANILLA_CO if (env.alternativeTRT) { sh "cd ${llmSrc} && sed -i 's#tensorrt~=.*\$#tensorrt#g' requirements.txt && cat requirements.txt" } + if (stageName.contains("-CU12")) { + trtllm_utils.llmExecStepWithRetry(pipeline, script: "cd ${llmSrc} && sed -i 's/-cu13/-cu12/g' requirements.txt && cat requirements.txt") + } trtllm_utils.llmExecStepWithRetry(pipeline, script: "cd ${llmSrc} && pip3 install --retries 1 -r requirements-dev.txt") if (!skipInstallWheel) { trtllm_utils.llmExecStepWithRetry(pipeline, script: "cd ${llmPath} && pip3 install --force-reinstall --no-deps TensorRT-LLM/tensorrt_llm-*.whl") @@ -1783,7 +1797,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null) "A100X-PyTorch-1": ["a100x", "l0_a100", 1, 1], "L40S-PyTorch-1": ["l40s", "l0_l40s", 1, 2], "L40S-PyTorch-2": ["l40s", "l0_l40s", 2, 2], - "H100_PCIe-PyTorch-1": ["h100-cr", "l0_h100", 1, 3], + "H100_PCIe-PyTorch-CU12-1": ["h100-cr", "l0_h100", 1, 3], "H100_PCIe-PyTorch-2": ["h100-cr", "l0_h100", 2, 3], "H100_PCIe-PyTorch-3": ["h100-cr", "l0_h100", 3, 3], "H100_PCIe-CPP-1": ["h100-cr", "l0_h100", 1, 2], @@ -1842,7 +1856,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null) "DGX_H200-4_GPUs-TensorRT-Post-Merge-3": ["dgx-h200-x4", "l0_dgx_h200", 3, 3, 4], ] - parallelJobs = x86TestConfigs.collectEntries{key, values -> [key, [createKubernetesPodConfig(LLM_DOCKER_IMAGE, values[0], "amd64", values[4] ?: 1, key.contains("Perf")), { + parallelJobs = x86TestConfigs.collectEntries{key, values -> [key, [createKubernetesPodConfig(key.contains("-CU12-") ? LLM_DOCKER_IMAGE_12_9 : LLM_DOCKER_IMAGE, values[0], "amd64", values[4] ?: 1, key.contains("Perf")), { def config = VANILLA_CONFIG if (key.contains("single-device")) { config = SINGLE_DEVICE_CONFIG @@ -1853,6 +1867,9 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null) if (key.contains("Nanobind")) { config = NANOBIND_CONFIG } + if (key.contains("-CU12-")) { + config = VANILLA_CONFIG_CU12 + } runLLMTestlistOnPlatform(pipeline, values[0], values[1], config, key.contains("Perf"), key, values[2], values[3]) }]]} fullSet = parallelJobs.keySet() @@ -1871,6 +1888,9 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null) if (key.contains("llvm")) { config = LLVM_CONFIG } + if (key.contains("-CU12-")) { + config = VANILLA_CONFIG_CU12 + } runLLMTestlistOnSlurm(pipeline, values[0], values[1], config, key.contains("Perf"), key, values[2], values[3], values[4] ?: 1) }]]} @@ -1902,7 +1922,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null) fullSet += multiNodesSBSAConfigs.keySet() if (env.targetArch == AARCH64_TRIPLE) { - parallelJobs = SBSATestConfigs.collectEntries{key, values -> [key, [createKubernetesPodConfig(LLM_DOCKER_IMAGE, values[0], "arm64"), { + parallelJobs = SBSATestConfigs.collectEntries{key, values -> [key, [createKubernetesPodConfig(key.contains("-CU12-") ? LLM_SBSA_DOCKER_IMAGE_12_9 : LLM_DOCKER_IMAGE, values[0], "arm64"), { runLLMTestlistOnPlatform(pipeline, values[0], values[1], LINUX_AARCH64_CONFIG, false, key, values[2], values[3]) }]]} @@ -1987,7 +2007,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null) aarch64SanityCheckConfigs = [ "PY312-UB2404": [ - LLM_DOCKER_IMAGE, + LLM_SBSA_DOCKER_IMAGE_12_9, "GH200", AARCH64_TRIPLE, false, @@ -1996,7 +2016,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null) true, // Extra PyTorch CUDA 12.8 install ], "PY312-DLFW": [ - LLM_DOCKER_IMAGE, + LLM_SBSA_DOCKER_IMAGE_12_9, "GH200", AARCH64_TRIPLE, false, @@ -2097,9 +2117,11 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null) trtllm_utils.llmExecStepWithRetry(pipeline, script: "wget https://developer.download.nvidia.com/compute/cuda/repos/${ubuntu_version}/${platform}/cuda-keyring_1.1-1_all.deb") trtllm_utils.llmExecStepWithRetry(pipeline, script: "dpkg -i cuda-keyring_1.1-1_all.deb") trtllm_utils.llmExecStepWithRetry(pipeline, script: "apt-get update") - trtllm_utils.llmExecStepWithRetry(pipeline, script: "apt-get -y install cuda-toolkit-13-0") - } + trtllm_utils.llmExecStepWithRetry(pipeline, script: "apt-get -y install cuda-toolkit-12-9") + trtllm_utils.llmExecStepWithRetry(pipeline, script: "pip3 install 'cuda-python>=12,<13' 'nvidia-ml-py>=12,<13'") + } + trtllm_utils.llmExecStepWithRetry(pipeline, script: "sed -i 's/-cu13/-cu12/g' ${LLM_ROOT}/requirements.txt") // Extra PyTorch CUDA 12.8 install for SBSA platform and Blackwell GPUs bare-metal environments if (values[6]) { echo "###### Extra PyTorch CUDA 12.8 install Start ######" @@ -2132,9 +2154,9 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null) checkPipInstall(pipeline, "${cpu_arch}/${wheelPath}") } echo "###### Run LLMAPI tests Start ######" - def config = VANILLA_CONFIG + def config = VANILLA_CONFIG_CU12 if (cpu_arch == AARCH64_TRIPLE) { - config = LINUX_AARCH64_CONFIG + config = LINUX_AARCH64_CONFIG_CU12 } withEnv(libEnv) { sh "env | sort" diff --git a/jenkins/current_image_tags.properties b/jenkins/current_image_tags.properties index 5774cad038..ccb6d9f503 100644 --- a/jenkins/current_image_tags.properties +++ b/jenkins/current_image_tags.properties @@ -13,7 +13,11 @@ # images are adopted from PostMerge pipelines, the abbreviated commit hash is used instead. LLM_DOCKER_IMAGE_12_9=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202508051130-6090 LLM_SBSA_DOCKER_IMAGE_12_9=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202508051130-6090 -LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging/tritondevel:x86_64-tritondevel-torch_skip-a9bc5c5-user_zhanruis_update_dlfw_and_cu13-656 -LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging/tritondevel:sbsa-tritondevel-torch_skip-a9bc5c5-user_zhanruis_update_dlfw_and_cu13-656 +LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE_12_9=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py310-trt10.11.0.33-skip-tritondevel-202508051130-6090 +LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE_12_9=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py312-trt10.11.0.33-skip-tritondevel-202508051130-6090 +LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.08-py3-x86_64-ubuntu24.04-trt10.13.2.6-skip-tritondevel-202508151730-pre-test +LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.08-py3-aarch64-ubuntu24.04-trt10.13.2.6-skip-tritondevel-202508151730-pre-test +#LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.0-devel-rocky8-x86_64-rocky8-py310-trt10.13.2.6-skip-tritondevel-202508151730-pre-test +#LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.0-devel-rocky8-x86_64-rocky8-py312-trt10.13.2.6-skip-tritondevel-202508151730-pre-test LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py310-trt10.11.0.33-skip-tritondevel-202508051130-6090 LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py312-trt10.11.0.33-skip-tritondevel-202508051130-6090 diff --git a/requirements.txt b/requirements.txt index f865188a7d..252a27987d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,7 @@ accelerate>=0.25.0 build colored -cuda-python +cuda-python>=12 diffusers>=0.27.0 lark mpi4py @@ -13,7 +13,7 @@ onnx_graphsurgeon>=0.5.2 openai polygraphy psutil -nvidia-ml-py>=12,<13 +nvidia-ml-py>=12 # Just a wrapper since nvidia-modelopt requires pynvml pynvml==12.0.0 pulp