mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
infra: Support build for both CU12 and CU13
Signed-off-by: Zhanrui Sun <zhanruis@nvidia.com>
This commit is contained in:
parent
f12a90b752
commit
8c998533af
@ -84,17 +84,21 @@ RUN pip3 install --upgrade --no-cache-dir \
|
||||
|
||||
# wait for new triton to be published
|
||||
# Rename pytorch_triton package to triton
|
||||
RUN cd /usr/local/lib/python3.12/dist-packages/ && \
|
||||
ls -la | grep pytorch_triton && \
|
||||
mv pytorch_triton-3.3.1+gitc8757738.dist-info triton-3.3.1+gitc8757738.dist-info && \
|
||||
cd triton-3.3.1+gitc8757738.dist-info && \
|
||||
echo "Current directory: $(pwd)" && \
|
||||
echo "Files in directory:" && \
|
||||
ls -la && \
|
||||
sed -i 's/^Name: pytorch-triton/Name: triton/' METADATA && \
|
||||
sed -i 's|pytorch_triton-3.3.1+gitc8757738.dist-info/|triton-3.3.1+gitc8757738.dist-info/|g' RECORD && \
|
||||
echo "METADATA after update:" && \
|
||||
grep "^Name:" METADATA
|
||||
RUN if [ -f /etc/redhat-release ]; then \
|
||||
echo "Rocky8 detected, skipping symlink and ldconfig steps"; \
|
||||
else \
|
||||
cd /usr/local/lib/python3.12/dist-packages/ && \
|
||||
ls -la | grep pytorch_triton && \
|
||||
mv pytorch_triton-3.3.1+gitc8757738.dist-info triton-3.3.1+gitc8757738.dist-info && \
|
||||
cd triton-3.3.1+gitc8757738.dist-info && \
|
||||
echo "Current directory: $(pwd)" && \
|
||||
echo "Files in directory:" && \
|
||||
ls -la && \
|
||||
sed -i 's/^Name: pytorch-triton/Name: triton/' METADATA && \
|
||||
sed -i 's|pytorch_triton-3.3.1+gitc8757738.dist-info/|triton-3.3.1+gitc8757738.dist-info/|g' RECORD && \
|
||||
echo "METADATA after update:" && \
|
||||
grep "^Name:" METADATA; \
|
||||
fi
|
||||
|
||||
|
||||
FROM ${TRITON_IMAGE}:${TRITON_BASE_TAG} AS triton
|
||||
|
||||
@ -188,16 +188,16 @@ jenkins-rockylinux8_%: PYTHON_VERSION_TAG_ID = $(if $(findstring 3.12,${PYTHON_V
|
||||
jenkins-rockylinux8_%: IMAGE_WITH_TAG = $(shell . ../jenkins/current_image_tags.properties && echo $$LLM_ROCKYLINUX8_${PYTHON_VERSION_TAG_ID}_DOCKER_IMAGE)
|
||||
jenkins-rockylinux8_%: STAGE = tritondevel
|
||||
jenkins-rockylinux8_%: BASE_IMAGE = nvidia/cuda
|
||||
jenkins-rockylinux8_%: BASE_TAG = 12.9.1-devel-rockylinux8
|
||||
jenkins-rockylinux8_%: BASE_TAG = 13.0.0-devel-rockylinux8
|
||||
|
||||
rockylinux8_%: STAGE = tritondevel
|
||||
rockylinux8_%: BASE_IMAGE = nvidia/cuda
|
||||
rockylinux8_%: BASE_TAG = 12.9.1-devel-rockylinux8
|
||||
rockylinux8_%: BASE_TAG = 13.0.0-devel-rockylinux8
|
||||
|
||||
# For x86_64 and aarch64
|
||||
ubuntu22_%: STAGE = tritondevel
|
||||
ubuntu22_%: BASE_IMAGE = nvidia/cuda
|
||||
ubuntu22_%: BASE_TAG = 12.9.1-devel-ubuntu22.04
|
||||
ubuntu22_%: BASE_TAG = 13.0.0-devel-ubuntu22.04
|
||||
|
||||
trtllm_%: STAGE = release
|
||||
trtllm_%: PUSH_TO_STAGING := 0
|
||||
|
||||
@ -8,16 +8,16 @@ TRT_VER="10.13.2.6"
|
||||
CUDA_VER="13.0" # 13.0.0
|
||||
# Keep the installation for cuDNN if users want to install PyTorch with source codes.
|
||||
# PyTorch 2.x can compile with cuDNN v9.
|
||||
CUDNN_VER="9.12.0.42-1"
|
||||
CUDNN_VER="9.12.0.46-1"
|
||||
# NCCL version 2.26.x used in the NGC PyTorch 25.05 image but has a performance regression issue.
|
||||
# Use NCCL version 2.27.5 which has the fixes.
|
||||
NCCL_VER="2.27.6-1+cuda13.0"
|
||||
NCCL_VER="2.27.7-1+cuda13.0"
|
||||
# Use cuBLAS version 13.0.0.19 instead.
|
||||
CUBLAS_VER="13.0.0.19-1"
|
||||
# Align with the pre-installed CUDA / NVCC / NVRTC versions from
|
||||
# https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html
|
||||
NVRTC_VER="13.0.48-1"
|
||||
CUDA_RUNTIME="13.0.37-1"
|
||||
CUDA_RUNTIME="13.0.48-1"
|
||||
CUDA_DRIVER_VERSION="580.65.06-1.el8"
|
||||
|
||||
for i in "$@"; do
|
||||
@ -48,36 +48,32 @@ install_ubuntu_requirements() {
|
||||
dpkg -i cuda-keyring_1.1-1_all.deb
|
||||
rm cuda-keyring_1.1-1_all.deb
|
||||
|
||||
wget https://urm.nvidia.com/artifactory/sw-gpu-cuda-installer-generic-local/packaging/r13.0/cuda_nvrtc/linux-${ARCH}/13.0.48/cuda-nvrtc-dev-13-0_13.0.48-1_${ARCH2}.deb && \
|
||||
dpkg -i cuda-nvrtc-dev-13-0_13.0.48-1_${ARCH2}.deb && \
|
||||
rm cuda-nvrtc-dev-13-0_13.0.48-1_${ARCH2}.deb
|
||||
apt-get update
|
||||
if [[ $(apt list --installed | grep libcudnn9) ]]; then
|
||||
apt-get remove --purge -y libcudnn9*
|
||||
fi
|
||||
if [[ $(apt list --installed | grep libnccl) ]]; then
|
||||
apt-get remove --purge -y --allow-change-held-packages libnccl*
|
||||
fi
|
||||
if [[ $(apt list --installed | grep libcublas) ]]; then
|
||||
apt-get remove --purge -y --allow-change-held-packages libcublas*
|
||||
fi
|
||||
if [[ $(apt list --installed | grep cuda-nvrtc-dev) ]]; then
|
||||
apt-get remove --purge -y --allow-change-held-packages cuda-nvrtc-dev*
|
||||
fi
|
||||
|
||||
# apt-get update
|
||||
# if [[ $(apt list --installed | grep libcudnn9) ]]; then
|
||||
# apt-get remove --purge -y libcudnn9*
|
||||
# fi
|
||||
# if [[ $(apt list --installed | grep libnccl) ]]; then
|
||||
# apt-get remove --purge -y --allow-change-held-packages libnccl*
|
||||
# fi
|
||||
# if [[ $(apt list --installed | grep libcublas) ]]; then
|
||||
# apt-get remove --purge -y --allow-change-held-packages libcublas*
|
||||
# fi
|
||||
# if [[ $(apt list --installed | grep cuda-nvrtc-dev) ]]; then
|
||||
# apt-get remove --purge -y --allow-change-held-packages cuda-nvrtc-dev*
|
||||
# fi
|
||||
CUBLAS_CUDA_VERSION=$(echo $CUDA_VER | sed 's/\./-/g')
|
||||
NVRTC_CUDA_VERSION=$(echo $CUDA_VER | sed 's/\./-/g')
|
||||
|
||||
# CUBLAS_CUDA_VERSION=$(echo $CUDA_VER | sed 's/\./-/g')
|
||||
# NVRTC_CUDA_VERSION=$(echo $CUDA_VER | sed 's/\./-/g')
|
||||
|
||||
# apt-get install -y --no-install-recommends \
|
||||
# libcudnn9-cuda-13=${CUDNN_VER} \
|
||||
# libcudnn9-dev-cuda-13=${CUDNN_VER} \
|
||||
# libcudnn9-headers-cuda-13=${CUDNN_VER} \
|
||||
# libnccl2=${NCCL_VER} \
|
||||
# libnccl-dev=${NCCL_VER} \
|
||||
# libcublas-${CUBLAS_CUDA_VERSION}=${CUBLAS_VER} \
|
||||
# libcublas-dev-${CUBLAS_CUDA_VERSION}=${CUBLAS_VER} \
|
||||
# cuda-nvrtc-dev-${NVRTC_CUDA_VERSION}=${NVRTC_VER}
|
||||
apt-get install -y --no-install-recommends \
|
||||
libcudnn9-cuda-13=${CUDNN_VER} \
|
||||
libcudnn9-dev-cuda-13=${CUDNN_VER} \
|
||||
libcudnn9-headers-cuda-13=${CUDNN_VER} \
|
||||
libnccl2=${NCCL_VER} \
|
||||
libnccl-dev=${NCCL_VER} \
|
||||
libcublas-${CUBLAS_CUDA_VERSION}=${CUBLAS_VER} \
|
||||
libcublas-dev-${CUBLAS_CUDA_VERSION}=${CUBLAS_VER} \
|
||||
cuda-nvrtc-dev-${NVRTC_CUDA_VERSION}=${NVRTC_VER}
|
||||
|
||||
apt-get clean
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
@ -167,7 +163,7 @@ case "$ID" in
|
||||
install_tensorrt
|
||||
;;
|
||||
rocky)
|
||||
# install_rockylinux_requirements
|
||||
install_rockylinux_requirements
|
||||
install_tensorrt
|
||||
;;
|
||||
*)
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
set -ex
|
||||
|
||||
GITHUB_URL="https://github.com"
|
||||
UCX_VERSION="v1.18.1"
|
||||
UCX_VERSION="v1.19.0"
|
||||
UCX_INSTALL_PATH="/usr/local/ucx/"
|
||||
CUDA_PATH="/usr/local/cuda"
|
||||
UCX_REPO="https://github.com/openucx/ucx.git"
|
||||
|
||||
@ -16,6 +16,9 @@ AARCH64_TRIPLE = "aarch64-linux-gnu"
|
||||
|
||||
LLM_DOCKER_IMAGE = env.dockerImage
|
||||
|
||||
LLM_DOCKER_IMAGE_12_9 = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202508051130-6090"
|
||||
LLM_SBSA_DOCKER_IMAGE_12_9 = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202508051130-6090"
|
||||
|
||||
// Always use x86_64 image for agent
|
||||
AGENT_IMAGE = env.dockerImage.replace("aarch64", "x86_64")
|
||||
|
||||
@ -35,6 +38,9 @@ def WHEEL_ARCHS = "wheelArchs"
|
||||
@Field
|
||||
def CONFIG_LINUX_X86_64_VANILLA = "linux_x86_64_Vanilla"
|
||||
|
||||
@Field
|
||||
def CONFIG_LINUX_X86_64_VANILLA_CU12 = "linux_x86_64_Vanilla_CU12"
|
||||
|
||||
@Field
|
||||
def CONFIG_LINUX_X86_64_SINGLE_DEVICE = "linux_x86_64_SingleDevice"
|
||||
|
||||
@ -42,7 +48,10 @@ def CONFIG_LINUX_X86_64_SINGLE_DEVICE = "linux_x86_64_SingleDevice"
|
||||
def CONFIG_LINUX_X86_64_LLVM = "linux_x86_64_LLVM"
|
||||
|
||||
@Field
|
||||
CONFIG_LINUX_AARCH64 = "linux_aarch64"
|
||||
def CONFIG_LINUX_AARCH64 = "linux_aarch64"
|
||||
|
||||
@Field
|
||||
def CONFIG_LINUX_AARCH64_CU12 = "linux_aarch64_CU12"
|
||||
|
||||
@Field
|
||||
def CONFIG_LINUX_AARCH64_LLVM = "linux_aarch64_LLVM"
|
||||
@ -62,6 +71,11 @@ def BUILD_CONFIGS = [
|
||||
(TARNAME) : "TensorRT-LLM.tar.gz",
|
||||
(WHEEL_ARCHS): "80-real;86-real;89-real;90-real;100-real;120-real",
|
||||
],
|
||||
(CONFIG_LINUX_X86_64_VANILLA_CU12) : [
|
||||
(WHEEL_EXTRA_ARGS) : "--extra-cmake-vars ENABLE_MULTI_DEVICE=1 --extra-cmake-vars WARNING_IS_ERROR=ON --extra-cmake-vars NIXL_ROOT=/opt/nvidia/nvda_nixl --micro_benchmarks",
|
||||
(TARNAME) : "TensorRT-LLM-CU12.tar.gz",
|
||||
(WHEEL_ARCHS): "80-real;86-real;89-real;90-real;100-real;120-real",
|
||||
],
|
||||
(CONFIG_LINUX_X86_64_NANOBIND) : [
|
||||
(WHEEL_EXTRA_ARGS) : "--binding_type nanobind --extra-cmake-vars ENABLE_MULTI_DEVICE=1 --extra-cmake-vars WARNING_IS_ERROR=ON --extra-cmake-vars NIXL_ROOT=/opt/nvidia/nvda_nixl --micro_benchmarks",
|
||||
(TARNAME) : "nanobind-TensorRT-LLM.tar.gz",
|
||||
@ -82,6 +96,11 @@ def BUILD_CONFIGS = [
|
||||
(TARNAME) : "TensorRT-LLM-GH200.tar.gz",
|
||||
(WHEEL_ARCHS): "90-real;100-real;120-real",
|
||||
],
|
||||
(CONFIG_LINUX_AARCH64_CU12): [
|
||||
(WHEEL_EXTRA_ARGS) : "--extra-cmake-vars WARNING_IS_ERROR=ON",
|
||||
(TARNAME) : "TensorRT-LLM-GH200-CU12.tar.gz",
|
||||
(WHEEL_ARCHS): "90-real;100-real;120-real",
|
||||
],
|
||||
(CONFIG_LINUX_AARCH64_NANOBIND): [
|
||||
(WHEEL_EXTRA_ARGS) : "--binding_type nanobind --extra-cmake-vars WARNING_IS_ERROR=ON",
|
||||
(TARNAME) : "nanobind-TensorRT-LLM-GH200.tar.gz",
|
||||
@ -448,6 +467,9 @@ def runLLMBuild(pipeline, buildFlags, tarName, is_linux_x86_64)
|
||||
// TODO: Remove after the cmake version is upgraded to 3.31.8
|
||||
// Get triton tag from docker/dockerfile.multi
|
||||
def tritonShortTag = "r25.08"
|
||||
if (tarName.contains("CU12")) {
|
||||
tritonShortTag = "r25.06"
|
||||
}
|
||||
sh "cd ${LLM_ROOT}/triton_backend/inflight_batcher_llm && mkdir build && cd build && cmake .. -DTRTLLM_DIR=${llmPath} -DTRITON_COMMON_REPO_TAG=${tritonShortTag} -DTRITON_CORE_REPO_TAG=${tritonShortTag} -DTRITON_THIRD_PARTY_REPO_TAG=${tritonShortTag} -DTRITON_BACKEND_REPO_TAG=${tritonShortTag} -DUSE_CXX11_ABI=ON && make -j${BUILD_JOBS} install"
|
||||
|
||||
// Step 3: packaging wheels into tarfile
|
||||
@ -537,9 +559,13 @@ def launchStages(pipeline, cpu_arch, enableFailFast, globalVars)
|
||||
wheelDockerImage = env.dockerImage
|
||||
}
|
||||
|
||||
def LLM_DOCKER_IMAGE_CU12 = cpu_arch == AARCH64_TRIPLE ? LLM_SBSA_DOCKER_IMAGE_12_9 : LLM_DOCKER_IMAGE_12_9
|
||||
|
||||
buildConfigs = [
|
||||
"Build TRT-LLM": [LLM_DOCKER_IMAGE] + prepareLLMBuild(
|
||||
pipeline, cpu_arch == AARCH64_TRIPLE ? CONFIG_LINUX_AARCH64 : CONFIG_LINUX_X86_64_VANILLA),
|
||||
"Build TRT-LLM CUDA12": [LLM_DOCKER_IMAGE_CU12] + prepareLLMBuild(
|
||||
pipeline, cpu_arch == AARCH64_TRIPLE ? CONFIG_LINUX_AARCH64_CU12 : CONFIG_LINUX_X86_64_VANILLA_CU12),
|
||||
"Build TRT-LLM LLVM": [LLM_DOCKER_IMAGE] + prepareLLMBuild(
|
||||
pipeline, cpu_arch == AARCH64_TRIPLE ? CONFIG_LINUX_AARCH64_LLVM : CONFIG_LINUX_X86_64_LLVM),
|
||||
"Build TRT-LLM Nanobind": [LLM_DOCKER_IMAGE] + prepareLLMBuild(
|
||||
|
||||
@ -38,6 +38,9 @@ LLM_DOCKER_IMAGE = env.dockerImage
|
||||
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE = env.wheelDockerImagePy310
|
||||
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE = env.wheelDockerImagePy312
|
||||
|
||||
LLM_DOCKER_IMAGE_12_9 = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202508051130-6090"
|
||||
LLM_SBSA_DOCKER_IMAGE_12_9 = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202508051130-6090"
|
||||
|
||||
// DLFW torch image
|
||||
DLFW_IMAGE = "nvcr.io/nvidia/pytorch:25.06-py3"
|
||||
|
||||
@ -55,6 +58,9 @@ def TARNAME = "tarName"
|
||||
@Field
|
||||
def VANILLA_CONFIG = "Vanilla"
|
||||
|
||||
@Field
|
||||
def VANILLA_CONFIG_CU12 = "Vanilla_CU12"
|
||||
|
||||
@Field
|
||||
def SINGLE_DEVICE_CONFIG = "SingleDevice"
|
||||
|
||||
@ -62,7 +68,10 @@ def SINGLE_DEVICE_CONFIG = "SingleDevice"
|
||||
def LLVM_CONFIG = "LLVM"
|
||||
|
||||
@Field
|
||||
LINUX_AARCH64_CONFIG = "linux_aarch64"
|
||||
def LINUX_AARCH64_CONFIG = "linux_aarch64"
|
||||
|
||||
@Field
|
||||
def LINUX_AARCH64_CONFIG_CU12 = "linux_aarch64_CU12"
|
||||
|
||||
@Field
|
||||
def NANOBIND_CONFIG = "Nanobind"
|
||||
@ -71,9 +80,11 @@ def NANOBIND_CONFIG = "Nanobind"
|
||||
def BUILD_CONFIGS = [
|
||||
// Vanilla TARNAME is used for packaging in runLLMPackage
|
||||
(VANILLA_CONFIG) : [(TARNAME) : "TensorRT-LLM.tar.gz"],
|
||||
(VANILLA_CONFIG_CU12) : [(TARNAME) : "TensorRT-LLM-CU12.tar.gz"],
|
||||
(SINGLE_DEVICE_CONFIG) : [(TARNAME) : "single-device-TensorRT-LLM.tar.gz"],
|
||||
(LLVM_CONFIG) : [(TARNAME) : "llvm-TensorRT-LLM.tar.gz"],
|
||||
(LINUX_AARCH64_CONFIG) : [(TARNAME) : "TensorRT-LLM-GH200.tar.gz"],
|
||||
(LINUX_AARCH64_CONFIG_CU12) : [(TARNAME) : "TensorRT-LLM-GH200-CU12.tar.gz"],
|
||||
(NANOBIND_CONFIG) : [(TARNAME) : "nanobind-TensorRT-LLM.tar.gz"],
|
||||
]
|
||||
|
||||
@ -1310,6 +1321,9 @@ def runLLMTestlistOnPlatformImpl(pipeline, platform, testList, config=VANILLA_CO
|
||||
if (env.alternativeTRT) {
|
||||
sh "cd ${llmSrc} && sed -i 's#tensorrt~=.*\$#tensorrt#g' requirements.txt && cat requirements.txt"
|
||||
}
|
||||
if (stageName.contains("-CU12")) {
|
||||
trtllm_utils.llmExecStepWithRetry(pipeline, script: "cd ${llmSrc} && sed -i 's/-cu13/-cu12/g' requirements.txt && cat requirements.txt")
|
||||
}
|
||||
trtllm_utils.llmExecStepWithRetry(pipeline, script: "cd ${llmSrc} && pip3 install --retries 1 -r requirements-dev.txt")
|
||||
if (!skipInstallWheel) {
|
||||
trtllm_utils.llmExecStepWithRetry(pipeline, script: "cd ${llmPath} && pip3 install --force-reinstall --no-deps TensorRT-LLM/tensorrt_llm-*.whl")
|
||||
@ -1783,7 +1797,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
|
||||
"A100X-PyTorch-1": ["a100x", "l0_a100", 1, 1],
|
||||
"L40S-PyTorch-1": ["l40s", "l0_l40s", 1, 2],
|
||||
"L40S-PyTorch-2": ["l40s", "l0_l40s", 2, 2],
|
||||
"H100_PCIe-PyTorch-1": ["h100-cr", "l0_h100", 1, 3],
|
||||
"H100_PCIe-PyTorch-CU12-1": ["h100-cr", "l0_h100", 1, 3],
|
||||
"H100_PCIe-PyTorch-2": ["h100-cr", "l0_h100", 2, 3],
|
||||
"H100_PCIe-PyTorch-3": ["h100-cr", "l0_h100", 3, 3],
|
||||
"H100_PCIe-CPP-1": ["h100-cr", "l0_h100", 1, 2],
|
||||
@ -1842,7 +1856,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
|
||||
"DGX_H200-4_GPUs-TensorRT-Post-Merge-3": ["dgx-h200-x4", "l0_dgx_h200", 3, 3, 4],
|
||||
]
|
||||
|
||||
parallelJobs = x86TestConfigs.collectEntries{key, values -> [key, [createKubernetesPodConfig(LLM_DOCKER_IMAGE, values[0], "amd64", values[4] ?: 1, key.contains("Perf")), {
|
||||
parallelJobs = x86TestConfigs.collectEntries{key, values -> [key, [createKubernetesPodConfig(key.contains("-CU12-") ? LLM_DOCKER_IMAGE_12_9 : LLM_DOCKER_IMAGE, values[0], "amd64", values[4] ?: 1, key.contains("Perf")), {
|
||||
def config = VANILLA_CONFIG
|
||||
if (key.contains("single-device")) {
|
||||
config = SINGLE_DEVICE_CONFIG
|
||||
@ -1853,6 +1867,9 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
|
||||
if (key.contains("Nanobind")) {
|
||||
config = NANOBIND_CONFIG
|
||||
}
|
||||
if (key.contains("-CU12-")) {
|
||||
config = VANILLA_CONFIG_CU12
|
||||
}
|
||||
runLLMTestlistOnPlatform(pipeline, values[0], values[1], config, key.contains("Perf"), key, values[2], values[3])
|
||||
}]]}
|
||||
fullSet = parallelJobs.keySet()
|
||||
@ -1871,6 +1888,9 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
|
||||
if (key.contains("llvm")) {
|
||||
config = LLVM_CONFIG
|
||||
}
|
||||
if (key.contains("-CU12-")) {
|
||||
config = VANILLA_CONFIG_CU12
|
||||
}
|
||||
runLLMTestlistOnSlurm(pipeline, values[0], values[1], config, key.contains("Perf"), key, values[2], values[3], values[4] ?: 1)
|
||||
}]]}
|
||||
|
||||
@ -1902,7 +1922,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
|
||||
fullSet += multiNodesSBSAConfigs.keySet()
|
||||
|
||||
if (env.targetArch == AARCH64_TRIPLE) {
|
||||
parallelJobs = SBSATestConfigs.collectEntries{key, values -> [key, [createKubernetesPodConfig(LLM_DOCKER_IMAGE, values[0], "arm64"), {
|
||||
parallelJobs = SBSATestConfigs.collectEntries{key, values -> [key, [createKubernetesPodConfig(key.contains("-CU12-") ? LLM_SBSA_DOCKER_IMAGE_12_9 : LLM_DOCKER_IMAGE, values[0], "arm64"), {
|
||||
runLLMTestlistOnPlatform(pipeline, values[0], values[1], LINUX_AARCH64_CONFIG, false, key, values[2], values[3])
|
||||
}]]}
|
||||
|
||||
@ -1987,7 +2007,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
|
||||
|
||||
aarch64SanityCheckConfigs = [
|
||||
"PY312-UB2404": [
|
||||
LLM_DOCKER_IMAGE,
|
||||
LLM_SBSA_DOCKER_IMAGE_12_9,
|
||||
"GH200",
|
||||
AARCH64_TRIPLE,
|
||||
false,
|
||||
@ -1996,7 +2016,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
|
||||
true, // Extra PyTorch CUDA 12.8 install
|
||||
],
|
||||
"PY312-DLFW": [
|
||||
LLM_DOCKER_IMAGE,
|
||||
LLM_SBSA_DOCKER_IMAGE_12_9,
|
||||
"GH200",
|
||||
AARCH64_TRIPLE,
|
||||
false,
|
||||
@ -2097,9 +2117,11 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
|
||||
trtllm_utils.llmExecStepWithRetry(pipeline, script: "wget https://developer.download.nvidia.com/compute/cuda/repos/${ubuntu_version}/${platform}/cuda-keyring_1.1-1_all.deb")
|
||||
trtllm_utils.llmExecStepWithRetry(pipeline, script: "dpkg -i cuda-keyring_1.1-1_all.deb")
|
||||
trtllm_utils.llmExecStepWithRetry(pipeline, script: "apt-get update")
|
||||
trtllm_utils.llmExecStepWithRetry(pipeline, script: "apt-get -y install cuda-toolkit-13-0")
|
||||
}
|
||||
trtllm_utils.llmExecStepWithRetry(pipeline, script: "apt-get -y install cuda-toolkit-12-9")
|
||||
|
||||
trtllm_utils.llmExecStepWithRetry(pipeline, script: "pip3 install 'cuda-python>=12,<13' 'nvidia-ml-py>=12,<13'")
|
||||
}
|
||||
trtllm_utils.llmExecStepWithRetry(pipeline, script: "sed -i 's/-cu13/-cu12/g' ${LLM_ROOT}/requirements.txt")
|
||||
// Extra PyTorch CUDA 12.8 install for SBSA platform and Blackwell GPUs bare-metal environments
|
||||
if (values[6]) {
|
||||
echo "###### Extra PyTorch CUDA 12.8 install Start ######"
|
||||
@ -2132,9 +2154,9 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
|
||||
checkPipInstall(pipeline, "${cpu_arch}/${wheelPath}")
|
||||
}
|
||||
echo "###### Run LLMAPI tests Start ######"
|
||||
def config = VANILLA_CONFIG
|
||||
def config = VANILLA_CONFIG_CU12
|
||||
if (cpu_arch == AARCH64_TRIPLE) {
|
||||
config = LINUX_AARCH64_CONFIG
|
||||
config = LINUX_AARCH64_CONFIG_CU12
|
||||
}
|
||||
withEnv(libEnv) {
|
||||
sh "env | sort"
|
||||
|
||||
@ -13,7 +13,11 @@
|
||||
# images are adopted from PostMerge pipelines, the abbreviated commit hash is used instead.
|
||||
LLM_DOCKER_IMAGE_12_9=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202508051130-6090
|
||||
LLM_SBSA_DOCKER_IMAGE_12_9=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202508051130-6090
|
||||
LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging/tritondevel:x86_64-tritondevel-torch_skip-a9bc5c5-user_zhanruis_update_dlfw_and_cu13-656
|
||||
LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging/tritondevel:sbsa-tritondevel-torch_skip-a9bc5c5-user_zhanruis_update_dlfw_and_cu13-656
|
||||
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE_12_9=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py310-trt10.11.0.33-skip-tritondevel-202508051130-6090
|
||||
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE_12_9=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py312-trt10.11.0.33-skip-tritondevel-202508051130-6090
|
||||
LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.08-py3-x86_64-ubuntu24.04-trt10.13.2.6-skip-tritondevel-202508151730-pre-test
|
||||
LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.08-py3-aarch64-ubuntu24.04-trt10.13.2.6-skip-tritondevel-202508151730-pre-test
|
||||
#LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.0-devel-rocky8-x86_64-rocky8-py310-trt10.13.2.6-skip-tritondevel-202508151730-pre-test
|
||||
#LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.0-devel-rocky8-x86_64-rocky8-py312-trt10.13.2.6-skip-tritondevel-202508151730-pre-test
|
||||
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py310-trt10.11.0.33-skip-tritondevel-202508051130-6090
|
||||
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py312-trt10.11.0.33-skip-tritondevel-202508051130-6090
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
accelerate>=0.25.0
|
||||
build
|
||||
colored
|
||||
cuda-python
|
||||
cuda-python>=12
|
||||
diffusers>=0.27.0
|
||||
lark
|
||||
mpi4py
|
||||
@ -13,7 +13,7 @@ onnx_graphsurgeon>=0.5.2
|
||||
openai
|
||||
polygraphy
|
||||
psutil
|
||||
nvidia-ml-py>=12,<13
|
||||
nvidia-ml-py>=12
|
||||
# Just a wrapper since nvidia-modelopt requires pynvml
|
||||
pynvml==12.0.0
|
||||
pulp
|
||||
|
||||
Loading…
Reference in New Issue
Block a user