diff --git a/docker/Dockerfile.multi b/docker/Dockerfile.multi index dcd4ca073b..61c19f7392 100644 --- a/docker/Dockerfile.multi +++ b/docker/Dockerfile.multi @@ -1,8 +1,8 @@ # Multi-stage Dockerfile -ARG BASE_IMAGE=gitlab-master.nvidia.com:5005/dl/dgx/pytorch -ARG TRITON_IMAGE=gitlab-master.nvidia.com:5005/dl/dgx/tritonserver -ARG BASE_TAG=25.08-py3.32674667-devel -ARG TRITON_BASE_TAG=25.08-py3.32978230 +ARG BASE_IMAGE=nvcr.io/nvidia/pytorch +ARG TRITON_IMAGE=nvcr.io/nvidia/tritonserver +ARG BASE_TAG=25.08-py3 +ARG TRITON_BASE_TAG=25.08-py3 ARG DEVEL_IMAGE=devel FROM ${BASE_IMAGE}:${BASE_TAG} AS base diff --git a/docker/Makefile b/docker/Makefile index 81adc8e532..c868876ed8 100644 --- a/docker/Makefile +++ b/docker/Makefile @@ -79,6 +79,7 @@ endef %_build: @echo "Building docker image: $(IMAGE_WITH_TAG)" docker buildx build $(DOCKER_BUILD_OPTS) $(DOCKER_BUILD_ARGS) \ + --network=host \ --progress $(DOCKER_PROGRESS) \ $(if $(BASE_IMAGE), --build-arg BASE_IMAGE=$(BASE_IMAGE)) \ $(if $(BASE_TAG), --build-arg BASE_TAG=$(BASE_TAG)) \ @@ -191,16 +192,16 @@ jenkins-aarch64_%: STAGE = tritondevel jenkins-rockylinux8_%: PYTHON_VERSION_TAG_ID = $(if $(findstring 3.12,${PYTHON_VERSION}),PY312,$(if $(findstring 3.10,${PYTHON_VERSION}),PY310,$(error Unknown PYTHON_VERSION specified))) jenkins-rockylinux8_%: IMAGE_WITH_TAG = $(shell . ../jenkins/current_image_tags.properties && echo $$LLM_ROCKYLINUX8_${PYTHON_VERSION_TAG_ID}_DOCKER_IMAGE) jenkins-rockylinux8_%: STAGE = tritondevel -jenkins-rockylinux8_%: BASE_IMAGE = nvidia/cuda +jenkins-rockylinux8_%: BASE_IMAGE = nvcr.io/nvidia/cuda jenkins-rockylinux8_%: BASE_TAG = 13.0.0-devel-rockylinux8 rockylinux8_%: STAGE = tritondevel -rockylinux8_%: BASE_IMAGE = nvidia/cuda +rockylinux8_%: BASE_IMAGE = nvcr.io/nvidia/cuda rockylinux8_%: BASE_TAG = 13.0.0-devel-rockylinux8 # For x86_64 and aarch64 ubuntu22_%: STAGE = tritondevel -ubuntu22_%: BASE_IMAGE = nvidia/cuda +ubuntu22_%: BASE_IMAGE = nvcr.io/nvidia/cuda ubuntu22_%: BASE_TAG = 13.0.0-devel-ubuntu22.04 trtllm_%: STAGE = release diff --git a/docker/common/install_pytorch.sh b/docker/common/install_pytorch.sh index 6dcba33039..ca70784348 100644 --- a/docker/common/install_pytorch.sh +++ b/docker/common/install_pytorch.sh @@ -4,8 +4,8 @@ set -ex # Use latest stable version from https://pypi.org/project/torch/#history # and closest to the version specified in -# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-06.html#rel-25-06 -TORCH_VERSION="2.7.1" +# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-08.html#rel-25-08 +TORCH_VERSION="2.8.0" SYSTEM_ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') prepare_environment() { diff --git a/jenkins/Build.groovy b/jenkins/Build.groovy index ecacc33f3a..574f7a5992 100644 --- a/jenkins/Build.groovy +++ b/jenkins/Build.groovy @@ -448,6 +448,9 @@ def runLLMBuild(pipeline, buildFlags, tarName, is_linux_x86_64) pipArgs = "" } + if (tarName.contains("_CU12")) { + trtllm_utils.llmExecStepWithRetry(pipeline, script: "cd ${LLM_ROOT} && sed -i '/^# .*\$/ {s/^# //; n; s/^/# /}' requirements.txt && cat requirements.txt") + } // install python package trtllm_utils.llmExecStepWithRetry(pipeline, script: "cd ${LLM_ROOT} && pip3 install -r requirements-dev.txt ${pipArgs}") @@ -577,6 +580,8 @@ def launchStages(pipeline, cpu_arch, enableFailFast, globalVars) "Build TRT-LLM SingleDevice": [LLM_DOCKER_IMAGE] + prepareLLMBuild( pipeline, CONFIG_LINUX_X86_64_SINGLE_DEVICE), ] + } else { + buildConfigs.remove("Build TRT-LLM LLVM") // TODO: Remove after LLVM is supported on AArch64 } rtServer ( diff --git a/jenkins/L0_Test.groovy b/jenkins/L0_Test.groovy index d2ed6964c5..342bed4e28 100644 --- a/jenkins/L0_Test.groovy +++ b/jenkins/L0_Test.groovy @@ -37,11 +37,16 @@ LLM_DOCKER_IMAGE = env.dockerImage LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE = env.wheelDockerImagePy310 LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE = env.wheelDockerImagePy312 +LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE_12_9="urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py310-trt10.11.0.33-skip-tritondevel-202508051130-6090" +LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE_12_9="urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py312-trt10.11.0.33-skip-tritondevel-202508051130-6090" + LLM_DOCKER_IMAGE_12_9 = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202508051130-6090" LLM_SBSA_DOCKER_IMAGE_12_9 = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202508051130-6090" +DLFW_IMAGE_12_9 = "urm.nvidia.com/docker/nvidia/pytorch:25.06-py3" + // DLFW torch image -DLFW_IMAGE = "urm.nvidia.com/docker/nvidia/pytorch:25.06-py3" +DLFW_IMAGE = "urm.nvidia.com/docker/nvidia/pytorch:25.08-py3" //Ubuntu base image UBUNTU_22_04_IMAGE = "urm.nvidia.com/docker/ubuntu:22.04" @@ -769,6 +774,16 @@ def createKubernetesPodConfig(image, type, arch = "amd64", gpuCount = 1, perfMod path: /vol/scratch1/scratch.svc_tensorrt_blossom """ } + // TODO: remove this after GH200 driver upgrade + def hostnameMatch = "" + if (type == "gh200") { + hostnameMatch = """ + - key: "kubernetes.io/hostname" + operator: In + values: + - "lego-cg1-qct-066.ipp3a2.colossus\"""" + } + def podConfig = [ cloud: targetCould, namespace: "sw-tensorrt", @@ -788,7 +803,7 @@ def createKubernetesPodConfig(image, type, arch = "amd64", gpuCount = 1, perfMod - key: "tensorrt/affinity" operator: NotIn values: - - "core" + - "core"${hostnameMatch} nodeSelector: ${selectors} containers: ${containerConfig} @@ -1354,7 +1369,7 @@ def runLLMTestlistOnPlatformImpl(pipeline, platform, testList, config=VANILLA_CO sh "cd ${llmSrc} && sed -i 's#tensorrt~=.*\$#tensorrt#g' requirements.txt && cat requirements.txt" } if (stageName.contains("-CU12")) { - trtllm_utils.llmExecStepWithRetry(pipeline, script: "cd ${llmSrc} && sed -i 's/-cu13/-cu12/g' requirements.txt && cat requirements.txt") + trtllm_utils.llmExecStepWithRetry(pipeline, script: "cd ${llmSrc} && sed -i '/^# .*\$/ {s/^# //; n; s/^/# /}' requirements.txt && cat requirements.txt") } trtllm_utils.llmExecStepWithRetry(pipeline, script: "cd ${llmSrc} && pip3 install --retries 1 -r requirements-dev.txt") if (!skipInstallWheel) { @@ -1616,7 +1631,7 @@ def checkPipInstall(pipeline, wheel_path) } -def runLLMBuild(pipeline, cpu_arch, reinstall_dependencies=false, wheel_path="", cpver="cp312") +def runLLMBuild(pipeline, cpu_arch, reinstall_dependencies=false, wheel_path="", cpver="cp312", is_cu12=false) { sh "pwd && ls -alh" sh "env | sort" @@ -1624,7 +1639,10 @@ def runLLMBuild(pipeline, cpu_arch, reinstall_dependencies=false, wheel_path="", trtllm_utils.checkoutSource(LLM_REPO, env.gitlabCommit, "tensorrt_llm", true, true) if (env.alternativeTRT) { - sh "cd ${LLM_ROOT} && sed -i 's#tensorrt~=.*\$#tensorrt#g' requirements.txt && cat requirements.txt" + sh "cd tensorrt_llm/ && sed -i 's#tensorrt~=.*\$#tensorrt#g' requirements.txt && cat requirements.txt" + } + if (is_cu12) { + trtllm_utils.llmExecStepWithRetry(pipeline, script: "cd tensorrt_llm/ && sed -i '/^# .*\$/ {s/^# //; n; s/^/# /}' requirements.txt && cat requirements.txt") } // Random sleep to avoid resource contention @@ -2013,17 +2031,17 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null) // Python version and OS for sanity check x86SanityCheckConfigs = [ - "PY312-DLFW": [ - LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE, + "PY312-DLFW-CU12": [ + LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE_12_9, "B200_PCIe", X86_64_TRIPLE, true, "dlfw/", - DLFW_IMAGE, + DLFW_IMAGE_12_9, false, ], - "PY310-UB2204": [ - LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE, + "PY310-UB2204-CU12": [ + LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE_12_9, "A10", X86_64_TRIPLE, true, @@ -2031,8 +2049,8 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null) UBUNTU_22_04_IMAGE, false, ], - "PY312-UB2404": [ - LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE, + "PY312-UB2404-CU12": [ + LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE_12_9, "RTX5090", X86_64_TRIPLE, true, @@ -2043,7 +2061,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null) ] aarch64SanityCheckConfigs = [ - "PY312-UB2404": [ + "PY312-UB2404-CU12": [ LLM_SBSA_DOCKER_IMAGE_12_9, "GH200", AARCH64_TRIPLE, @@ -2052,13 +2070,13 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null) UBUNTU_24_04_IMAGE, true, // Extra PyTorch CUDA 12.8 install ], - "PY312-DLFW": [ + "PY312-DLFW-CU12": [ LLM_SBSA_DOCKER_IMAGE_12_9, "GH200", AARCH64_TRIPLE, false, "dlfw/", - DLFW_IMAGE, + DLFW_IMAGE_12_9, false, ], ] @@ -2114,7 +2132,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null) env = ["LD_LIBRARY_PATH+=:/usr/local/cuda/compat"] } withEnv(env) { - wheelName = runLLMBuild(pipeline, cpu_arch, values[3], wheelPath, cpver) + wheelName = runLLMBuild(pipeline, cpu_arch, values[3], wheelPath, cpver, key.contains("CU12")) } } @@ -2139,7 +2157,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null) echo "###### Prerequisites Start ######" echoNodeAndGpuInfo(pipeline, toStageName(values[1], key)) // Clean up the pip constraint file from the base NGC PyTorch image. - if (values[5] == DLFW_IMAGE) { + if (values[5] == DLFW_IMAGE || values[5] == DLFW_IMAGE_12_9) { trtllm_utils.llmExecStepWithRetry(pipeline, script: "[ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true") } trtllm_utils.llmExecStepWithRetry(pipeline, script: "apt-get update") @@ -2148,21 +2166,30 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null) trtllm_utils.llmExecStepWithRetry(pipeline, script: "pip3 config set global.break-system-packages true") trtllm_utils.llmExecStepWithRetry(pipeline, script: "pip3 install requests") trtllm_utils.llmExecStepWithRetry(pipeline, script: "pip3 uninstall -y tensorrt") - if (values[5] != DLFW_IMAGE) { + if (values[5] != DLFW_IMAGE && values[5] != DLFW_IMAGE_12_9) { def ubuntu_version = key.contains("UB2404") ? "ubuntu2404" : "ubuntu2204" def platform = cpu_arch == X86_64_TRIPLE ? "x86_64" : "sbsa" trtllm_utils.llmExecStepWithRetry(pipeline, script: "wget https://developer.download.nvidia.com/compute/cuda/repos/${ubuntu_version}/${platform}/cuda-keyring_1.1-1_all.deb") trtllm_utils.llmExecStepWithRetry(pipeline, script: "dpkg -i cuda-keyring_1.1-1_all.deb") trtllm_utils.llmExecStepWithRetry(pipeline, script: "apt-get update") - trtllm_utils.llmExecStepWithRetry(pipeline, script: "apt-get -y install cuda-toolkit-12-9") - - trtllm_utils.llmExecStepWithRetry(pipeline, script: "pip3 install 'cuda-python>=12,<13' 'nvidia-ml-py>=12,<13'") + if (key.contains("CU12")) { + trtllm_utils.llmExecStepWithRetry(pipeline, script: "apt-get -y install cuda-toolkit-12-9") + } else { + trtllm_utils.llmExecStepWithRetry(pipeline, script: "apt-get -y install cuda-toolkit-13-0") + } + } + if (key.contains("CU12")) { + trtllm_utils.llmExecStepWithRetry(pipeline, script: "sed -i '/^# .*\$/ {s/^# //; n; s/^/# /}' ${LLM_ROOT}/requirements.txt") + sh "cat ${LLM_ROOT}/requirements.txt" } - trtllm_utils.llmExecStepWithRetry(pipeline, script: "sed -i 's/-cu13/-cu12/g' ${LLM_ROOT}/requirements.txt") // Extra PyTorch CUDA 12.8 install for SBSA platform and Blackwell GPUs bare-metal environments if (values[6]) { echo "###### Extra PyTorch CUDA 12.8 install Start ######" - trtllm_utils.llmExecStepWithRetry(pipeline, script: "pip3 install torch==2.7.1 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128") + if (key.contains("CU12")) { + trtllm_utils.llmExecStepWithRetry(pipeline, script: "pip3 install torch==2.7.1 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128") + } else { + trtllm_utils.llmExecStepWithRetry(pipeline, script: "pip3 install torch==2.8.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128") + } } def libEnv = [] @@ -2181,9 +2208,10 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null) } } echo "###### Run LLMAPI tests Start ######" - def config = VANILLA_CONFIG_CU12 + + def config = key.contains("CU12") ? VANILLA_CONFIG_CU12 : VANILLA_CONFIG if (cpu_arch == AARCH64_TRIPLE) { - config = LINUX_AARCH64_CONFIG_CU12 + config = key.contains("CU12") ? LINUX_AARCH64_CONFIG_CU12 : LINUX_AARCH64_CONFIG } withEnv(libEnv) { sh "env | sort" diff --git a/jenkins/current_image_tags.properties b/jenkins/current_image_tags.properties index ccb6d9f503..93ead428af 100644 --- a/jenkins/current_image_tags.properties +++ b/jenkins/current_image_tags.properties @@ -15,9 +15,7 @@ LLM_DOCKER_IMAGE_12_9=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25. LLM_SBSA_DOCKER_IMAGE_12_9=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202508051130-6090 LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE_12_9=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py310-trt10.11.0.33-skip-tritondevel-202508051130-6090 LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE_12_9=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py312-trt10.11.0.33-skip-tritondevel-202508051130-6090 -LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.08-py3-x86_64-ubuntu24.04-trt10.13.2.6-skip-tritondevel-202508151730-pre-test -LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.08-py3-aarch64-ubuntu24.04-trt10.13.2.6-skip-tritondevel-202508151730-pre-test -#LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.0-devel-rocky8-x86_64-rocky8-py310-trt10.13.2.6-skip-tritondevel-202508151730-pre-test -#LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.0-devel-rocky8-x86_64-rocky8-py312-trt10.13.2.6-skip-tritondevel-202508151730-pre-test -LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py310-trt10.11.0.33-skip-tritondevel-202508051130-6090 -LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py312-trt10.11.0.33-skip-tritondevel-202508051130-6090 +LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.08-py3-x86_64-ubuntu24.04-trt10.13.2.6-skip-tritondevel-202508261630-9671 +LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.08-py3-aarch64-ubuntu24.04-trt10.13.2.6-skip-tritondevel-202508261630-9671 +LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.0-devel-rocky8-x86_64-rocky8-py310-trt10.13.2.6-skip-tritondevel-202508261630-9671 +LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.0-devel-rocky8-x86_64-rocky8-py312-trt10.13.2.6-skip-tritondevel-202508261630-9671 diff --git a/requirements.txt b/requirements.txt index 049e0cf8b9..0fabd78a7a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,7 @@ accelerate>=1.7.0 build colored +# cuda-python>=12,<13 # cuda-python>=12 diffusers>=0.27.0 lark @@ -13,6 +14,7 @@ onnx_graphsurgeon>=0.5.2 openai polygraphy psutil +# nvidia-ml-py>=12,<13 # nvidia-ml-py>=12 # Just a wrapper since nvidia-modelopt requires pynvml pynvml==12.0.0 @@ -21,13 +23,17 @@ pandas h5py==3.12.1 StrEnum sentencepiece>=0.1.99 -tensorrt -# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-06.html#rel-25-06 uses 2.8.0a0. -torch>=2.7.1,<=2.8.0a0 +# tensorrt>=10.11.0,<=10.13.0 # +tensorrt~=10.13.0 +# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-08.html#rel-25-08 uses 2.8.0a0. +# torch>=2.7.1,<=2.8.0a0 # +torch>=2.8.0a0,<=2.8.0 torchvision nvidia-modelopt[torch]~=0.33.0 -nvidia-nccl-cu13 -nvidia-cuda-nvrtc-cu13 +# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-08.html#rel-25-08 uses 2.27.7 +nvidia-nccl-cu12 +# nvidia-cuda-nvrtc-cu12 # +nvidia-cuda-nvrtc transformers==4.55.0 prometheus_client prometheus_fastapi_instrumentator @@ -64,6 +70,6 @@ ninja etcd3 blake3 soundfile -triton==3.3.1; platform_machine == "x86_64" +triton>=3.3.1,<=3.4.0; platform_machine == "x86_64" tiktoken blobfile diff --git a/scripts/build_wheel.py b/scripts/build_wheel.py index a1275bf106..257d1301a0 100755 --- a/scripts/build_wheel.py +++ b/scripts/build_wheel.py @@ -865,6 +865,42 @@ def main(*, # and validating python changes in the whl. clear_folder(dist_dir) + # Modify requirements.txt for wheel build based on CUDA version + def modify_requirements_for_cuda(): + requirements_file = project_dir / ("requirements-windows.txt" + if on_windows else + "requirements.txt") + if os.environ.get("CUDA_VERSION", "").startswith("12."): + print( + "Detected CUDA 12 environment, modifying requirements.txt for wheel build..." + ) + with open(requirements_file, 'r', encoding='utf-8') as f: + lines = f.readlines() + modified_lines = [] + i = 0 + while i < len(lines): + line = lines[i] + if "" in line and line.strip().startswith( + "#"): + new_line = line.replace("# ", "", 1) + print( + f"Enable CUDA 12.9 dependency: {new_line.strip()}") + modified_lines.append(new_line) + print( + f"Disable CUDA 13 dependency: # {lines[i + 1].strip()}" + ) + modified_lines.append("# " + lines[i + 1]) + i += 1 + else: + modified_lines.append(line) + i += 1 + with open(requirements_file, 'w', encoding='utf-8') as f: + f.writelines(modified_lines) + return True + return False + + modify_requirements_for_cuda() + build_run( f'\"{venv_python}\" -m build {project_dir} --skip-dependency-check --no-isolation --wheel --outdir "{dist_dir}"' )