mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 22:18:36 +08:00
[Infra] - Update the upstream PyTorch dependency to 2.7.0 (#4235)
[Infra][TRTLLM-4941] - Update the upstream PyTorch dependency to 2.7.0 Signed-off-by: Yanchao Lu <yanchaol@nvidia.com>
This commit is contained in:
parent
c67da1fbaa
commit
504f4bf779
@ -5,7 +5,7 @@ set -ex
|
||||
# Use latest stable version from https://pypi.org/project/torch/#history
|
||||
# and closest to the version specified in
|
||||
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-03.html#rel-25-03
|
||||
TORCH_VERSION="2.6.0"
|
||||
TORCH_VERSION="2.7.0"
|
||||
SYSTEM_ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
|
||||
|
||||
prepare_environment() {
|
||||
@ -32,17 +32,14 @@ install_from_source() {
|
||||
if [ "$ARCH" = "amd64" ];then ARCH="x86_64";fi
|
||||
if [ "$ARCH" = "aarch64" ];then ARCH="sbsa";fi
|
||||
|
||||
if [ "$ARCH" = "sbsa" ] && [ "$TORCH_VERSION" = "2.6.0" ]; then
|
||||
echo "Due to a known issue https://github.com/pytorch/pytorch/issues/141083, PyTorch v2.6.0 installation from source codes cannot be supported..."
|
||||
exit 1
|
||||
fi
|
||||
prepare_environment $1
|
||||
|
||||
export _GLIBCXX_USE_CXX11_ABI=$1
|
||||
|
||||
export TORCH_CUDA_ARCH_LIST="8.0;9.0"
|
||||
export TORCH_CUDA_ARCH_LIST="8.0;8.6;9.0;10.0;12.0"
|
||||
export PYTORCH_BUILD_VERSION=${TORCH_VERSION}
|
||||
export PYTORCH_BUILD_NUMBER=0
|
||||
export MAX_JOBS=12
|
||||
pip3 uninstall -y torch
|
||||
cd /tmp
|
||||
git clone --depth 1 --branch v${TORCH_VERSION} https://github.com/pytorch/pytorch
|
||||
@ -58,6 +55,7 @@ install_from_source() {
|
||||
export PYTORCH_VERSION=${PYTORCH_BUILD_VERSION}
|
||||
export FORCE_CUDA=1
|
||||
export BUILD_VERSION=${TORCHVISION_VERSION}
|
||||
export MAX_JOBS=12
|
||||
pip3 uninstall -y torchvision
|
||||
cd /tmp
|
||||
git clone --depth 1 --branch v${TORCHVISION_VERSION} https://github.com/pytorch/vision
|
||||
@ -71,12 +69,8 @@ install_from_pypi() {
|
||||
if [ "$ARCH" = "amd64" ];then ARCH="x86_64";fi
|
||||
if [ "$ARCH" = "aarch64" ];then ARCH="sbsa";fi
|
||||
|
||||
pip3 uninstall -y torch torchvision
|
||||
if [ "$ARCH" = "sbsa" ]; then
|
||||
pip3 install torch==${TORCH_VERSION} torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126
|
||||
else
|
||||
pip3 install torch==${TORCH_VERSION} torchvision
|
||||
fi
|
||||
pip3 uninstall -y torch torchvision torchaudio
|
||||
pip3 install torch==${TORCH_VERSION} torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128
|
||||
}
|
||||
|
||||
case "$1" in
|
||||
|
||||
@ -2,7 +2,8 @@
|
||||
|
||||
# Building from Source Code on Linux
|
||||
|
||||
This document provides instructions for building TensorRT-LLM from source code on Linux. Building from source code is necessary if you want the best performance or debugging capabilities, or if the [GNU C++11 ABI](https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_dual_abi.html) is required.
|
||||
This document provides instructions for building TensorRT-LLM from source code on Linux. Building from source is recommended for achieving optimal performance, enabling debugging capabilities, or when you need a different [GNU CXX11 ABI](https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_dual_abi.html) configuration than what is available in the pre-built TensorRT-LLM wheel on PyPI. Note that the current pre-built TensorRT-LLM wheel on PyPI is linked against PyTorch 2.7.0, which uses the new CXX11 ABI.
|
||||
|
||||
|
||||
## Prerequisites
|
||||
|
||||
@ -169,8 +170,7 @@ The `build_wheel.py` script will also compile the library containing the C++ run
|
||||
python3 ./scripts/build_wheel.py --cuda_architectures "80-real;86-real" --cpp_only --clean
|
||||
```
|
||||
|
||||
This is particularly useful to avoid linking problems which may be introduced by particular versions of `torch` related to the [dual ABI support of GCC](https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_dual_abi.html). The option `--clean` will remove the build directory before building. The default build directory is `cpp/build`, which may be overridden using the option
|
||||
`--build_dir`. Run `build_wheel.py --help` for an overview of all supported options.
|
||||
This is particularly useful for avoiding linking issues that may arise with older versions of `torch` (prior to 2.7.0) due to the [Dual ABI support in GCC](https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_dual_abi.html). The `--clean` option removes the build directory before starting a new build. By default, TensorRT-LLM uses `cpp/build` as the build directory, but you can specify a different location with the `--build_dir` option. For a complete list of available build options, run `python3 ./scripts/build_wheel.py --help`.
|
||||
|
||||
The shared library can be found in the following location:
|
||||
|
||||
@ -217,6 +217,4 @@ TRTLLM_PRECOMPILED_LOCATION=https://pypi.nvidia.com/tensorrt-llm/tensorrt_llm-0.
|
||||
|
||||
#### Known Limitations
|
||||
|
||||
Currently, our released TensorRT-LLM wheel packages are linked against public PyTorch hosted on PyPI, which disables C++11 ABI support. However, the Docker image built previously is based on an NGC container where PyTorch has C++11 ABI enabled; see [NGC PyTorch container page](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch). Therefore, we recommend performing a full build inside this container.
|
||||
|
||||
When using `TRTLLM_PRECOMPILED_LOCATION`, ensure that your wheel is compiled based on the same version of C++ code as your current directory; any discrepancies may lead to compatibility issues.
|
||||
|
||||
@ -5,12 +5,12 @@
|
||||
1. Install TensorRT-LLM (tested on Ubuntu 24.04).
|
||||
|
||||
```bash
|
||||
pip3 install torch==2.6.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126
|
||||
pip3 install torch==2.7.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128
|
||||
|
||||
sudo apt-get -y install libopenmpi-dev && pip3 install --upgrade pip setuptools && pip3 install tensorrt_llm
|
||||
```
|
||||
|
||||
If using the [PyTorch NGC Container](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch) image, the prerequisite step for installing CUDA-enabled PyTorch package is not required.
|
||||
If using the [PyTorch NGC Container](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch) image, the prerequisite steps for installing CUDA-enabled PyTorch package and `libopenmpi-dev` are not required.
|
||||
|
||||
2. Sanity check the installation by running the following in Python (tested on Python 3.12):
|
||||
|
||||
|
||||
@ -5,9 +5,15 @@
|
||||
1. Install TensorRT-LLM (tested on Ubuntu 24.04).
|
||||
|
||||
```bash
|
||||
(Optional) pip3 install torch==2.7.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128
|
||||
|
||||
sudo apt-get -y install libopenmpi-dev && pip3 install --upgrade pip setuptools && pip3 install tensorrt_llm
|
||||
```
|
||||
|
||||
PyTorch CUDA 12.8 package is required for supporting NVIDIA Blackwell GPUs. On prior GPUs, this extra installation is not required.
|
||||
|
||||
If using the [PyTorch NGC Container](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch) image, the prerequisite steps for installing NVIDIA Blackwell-enabled PyTorch package and `libopenmpi-dev` are not required.
|
||||
|
||||
2. Sanity check the installation by running the following in Python (tested on Python 3.12):
|
||||
|
||||
```{literalinclude} ../../../examples/llm-api/quickstart_example.py
|
||||
@ -19,15 +25,7 @@
|
||||
|
||||
There are some known limitations when you pip install pre-built TensorRT-LLM wheel package.
|
||||
|
||||
1. C++11 ABI
|
||||
|
||||
The pre-built TensorRT-LLM wheel has linked against the public pytorch hosted on pypi, which turned off C++11 ABI.
|
||||
While the NVIDIA optimized pytorch inside NGC container nvcr.io/nvidia/pytorch:xx.xx-py3 turned on the C++11 ABI,
|
||||
see [NGC pytorch container page](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch) .
|
||||
Thus we recommend users to build from source inside when using the NGC pytorch container. Build from source guideline can be found in
|
||||
[Build from Source Code on Linux](https://nvidia.github.io/TensorRT-LLM/installation/build-from-source-linux.html)
|
||||
|
||||
2. MPI in the Slurm environment
|
||||
1. MPI in the Slurm environment
|
||||
|
||||
If you encounter an error while running TensorRT-LLM in a Slurm-managed cluster, you need to reconfigure the MPI installation to work with Slurm.
|
||||
The setup methods depends on your slurm configuration, pls check with your admin. This is not a TensorRT-LLM specific, rather a general mpi+slurm issue.
|
||||
@ -38,7 +36,7 @@ There are some known limitations when you pip install pre-built TensorRT-LLM whe
|
||||
to discover a SLURM installation in the usual places.
|
||||
```
|
||||
|
||||
3. CUDA Toolkit
|
||||
2. CUDA Toolkit
|
||||
|
||||
`pip install tensorrt-llm` won't install CUDA toolkit in your system, and the CUDA Toolkit is not required if want to just deploy a TensorRT-LLM engine.
|
||||
TensorRT-LLM uses the [ModelOpt](https://nvidia.github.io/TensorRT-Model-Optimizer/) to quantize a model, while the ModelOpt requires CUDA toolkit to jit compile certain kernels which is not included in the pytorch to do quantization effectively.
|
||||
@ -49,4 +47,12 @@ There are some known limitations when you pip install pre-built TensorRT-LLM whe
|
||||
UserWarning: CUDA_HOME environment variable is not set. Please set it to your CUDA install root.
|
||||
Unable to load extension modelopt_cuda_ext and falling back to CPU version.
|
||||
```
|
||||
The installation of CUDA toolkit can be found in [CUDA Toolkit Documentation](https://docs.nvidia.com/cuda/)
|
||||
The installation of CUDA toolkit can be found in [CUDA Toolkit Documentation](https://docs.nvidia.com/cuda/).
|
||||
|
||||
3. Install inside the PyTorch NGC Container
|
||||
|
||||
The PyTorch NGC Container may lock Python package versions via the `/etc/pip/constraint.txt` file. When installing the pre-built TensorRT-LLM wheel inside the [PyTorch NGC Container](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch), you need to clear this file first.
|
||||
|
||||
```bash
|
||||
[ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt
|
||||
```
|
||||
|
||||
@ -16,7 +16,7 @@ AARCH64_TRIPLE = "aarch64-linux-gnu"
|
||||
|
||||
LLM_DOCKER_IMAGE = env.dockerImage
|
||||
|
||||
AGENT_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.04-py3-x86_64-ubuntu24.04-trt10.10.0.31-skip-tritondevel-202505121727-4049"
|
||||
AGENT_IMAGE = env.dockerImage
|
||||
|
||||
POD_TIMEOUT_SECONDS = env.podTimeoutSeconds ? env.podTimeoutSeconds : "21600"
|
||||
|
||||
@ -192,7 +192,7 @@ def createKubernetesPodConfig(image, type, arch = "amd64")
|
||||
claimName: sw-tensorrt-pvc
|
||||
"""
|
||||
if (arch == "arm64") {
|
||||
// WAR: PVC mount is not setup on GH200 machines, use a small local cache as a WAR
|
||||
// PVC mount isn't supported on aarch64 platform. Use NFS as a WAR.
|
||||
pvcVolume = """
|
||||
- name: sw-tensorrt-pvc
|
||||
nfs:
|
||||
@ -612,7 +612,7 @@ def launchStages(pipeline, cpu_arch, enableFailFast, globalVars)
|
||||
globalVars[ACTION_INFO] = trtllm_utils.setupPipelineDescription(pipeline, globalVars[ACTION_INFO])
|
||||
}
|
||||
|
||||
def wheelDockerImage = env.wheelDockerImage
|
||||
def wheelDockerImage = env.wheelDockerImagePy310
|
||||
if (!wheelDockerImage && cpu_arch == AARCH64_TRIPLE) {
|
||||
wheelDockerImage = env.dockerImage
|
||||
}
|
||||
|
||||
@ -137,7 +137,6 @@ def buildImage(target, action="build", torchInstallType="skip", args="", custom_
|
||||
TORCH_INSTALL_TYPE=${torchInstallType} \
|
||||
IMAGE_NAME=${IMAGE_NAME} IMAGE_TAG=${tag} \
|
||||
BUILD_WHEEL_OPTS='-j ${BUILD_JOBS}' ${args} \
|
||||
BUILD_TRITON=1 \
|
||||
GITHUB_MIRROR=https://urm.nvidia.com/artifactory/github-go-remote
|
||||
"""
|
||||
}
|
||||
@ -150,7 +149,6 @@ def buildImage(target, action="build", torchInstallType="skip", args="", custom_
|
||||
TORCH_INSTALL_TYPE=${torchInstallType} \
|
||||
IMAGE_NAME=${IMAGE_NAME} IMAGE_TAG=${custom_tag} \
|
||||
BUILD_WHEEL_OPTS='-j ${BUILD_JOBS}' ${args} \
|
||||
BUILD_TRITON=1 \
|
||||
GITHUB_MIRROR=https://urm.nvidia.com/artifactory/github-go-remote
|
||||
"""
|
||||
}
|
||||
@ -299,25 +297,24 @@ pipeline {
|
||||
triggerSBSARemoteJob(params.action, "skip")
|
||||
}
|
||||
}
|
||||
// Waived due to a pytorch issue: https://github.com/pytorch/pytorch/issues/141083
|
||||
// stage("Build SBSA-pre_cxx11_abi") {
|
||||
// agent {
|
||||
// kubernetes createKubernetesPodConfig("agent")
|
||||
// }
|
||||
// steps
|
||||
// {
|
||||
// triggerSBSARemoteJob(params.action, "src_non_cxx11_abi")
|
||||
// }
|
||||
// }
|
||||
// stage("Build SBSA-cxx11_abi") {
|
||||
// agent {
|
||||
// kubernetes createKubernetesPodConfig("agent")
|
||||
// }
|
||||
// steps
|
||||
// {
|
||||
// triggerSBSARemoteJob(params.action, "src_cxx11_abi")
|
||||
// }
|
||||
// }
|
||||
stage("Build SBSA-pre_cxx11_abi") {
|
||||
agent {
|
||||
kubernetes createKubernetesPodConfig("agent")
|
||||
}
|
||||
steps
|
||||
{
|
||||
triggerSBSARemoteJob(params.action, "src_non_cxx11_abi")
|
||||
}
|
||||
}
|
||||
stage("Build SBSA-cxx11_abi") {
|
||||
agent {
|
||||
kubernetes createKubernetesPodConfig("agent")
|
||||
}
|
||||
steps
|
||||
{
|
||||
triggerSBSARemoteJob(params.action, "src_cxx11_abi")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} // stages
|
||||
|
||||
@ -73,8 +73,11 @@ def buildImage(action, type)
|
||||
stage ("Perform '${action}' action on image") {
|
||||
retry(3)
|
||||
{
|
||||
sh "cd ${LLM_ROOT} && make -C docker ${stage_docker}_${action} IMAGE_NAME=${IMAGE_NAME} IMAGE_TAG=${tag} TORCH_INSTALL_TYPE=${type} BUILD_TRITON=1" +
|
||||
" GITHUB_MIRROR=https://urm.nvidia.com/artifactory/github-go-remote"
|
||||
sh """cd ${LLM_ROOT} && make -C docker ${stage_docker}_${action} \
|
||||
IMAGE_NAME=${IMAGE_NAME} \
|
||||
IMAGE_TAG=${tag} \
|
||||
TORCH_INSTALL_TYPE=${type} \
|
||||
GITHUB_MIRROR=https://urm.nvidia.com/artifactory/github-go-remote"""
|
||||
}
|
||||
}
|
||||
} catch (Exception ex) {
|
||||
|
||||
@ -26,8 +26,6 @@ LLM_SBSA_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-
|
||||
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.0-devel-rocky8-x86_64-rocky8-py310-trt10.10.0.31-skip-tritondevel-202505121727-4049"
|
||||
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.0-devel-rocky8-x86_64-rocky8-py312-trt10.10.0.31-skip-tritondevel-202505121727-4049"
|
||||
|
||||
LLM_ROCKYLINUX8_DOCKER_IMAGE = LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE
|
||||
|
||||
// TODO: Move common variables to an unified location
|
||||
BUILD_CORES_REQUEST = "8"
|
||||
BUILD_CORES_LIMIT = "8"
|
||||
@ -787,7 +785,8 @@ def launchStages(pipeline, reuseBuild, testFilter, enableFailFast, globalVars)
|
||||
parameters += [
|
||||
'enableFailFast': enableFailFast,
|
||||
'dockerImage': LLM_DOCKER_IMAGE,
|
||||
'wheelDockerImage': LLM_ROCKYLINUX8_DOCKER_IMAGE,
|
||||
'wheelDockerImagePy310': LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE,
|
||||
'wheelDockerImagePy312': LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE,
|
||||
'globalVars': globalVarsJson,
|
||||
]
|
||||
|
||||
@ -827,6 +826,8 @@ def launchStages(pipeline, reuseBuild, testFilter, enableFailFast, globalVars)
|
||||
'enableFailFast': enableFailFast,
|
||||
'testFilter': testFilterJson,
|
||||
'dockerImage': LLM_DOCKER_IMAGE,
|
||||
'wheelDockerImagePy310': LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE,
|
||||
'wheelDockerImagePy312': LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE,
|
||||
'globalVars': globalVarsJson,
|
||||
]
|
||||
|
||||
|
||||
@ -35,8 +35,8 @@ linuxPkgName = ( env.targetArch == AARCH64_TRIPLE ? "tensorrt-llm-sbsa-release-s
|
||||
// available tags can be found in: https://urm.nvidia.com/artifactory/sw-tensorrt-docker/tensorrt-llm/
|
||||
// [base_image_name]-[arch]-[os](-[python_version])-[trt_version]-[torch_install_type]-[stage]-[date]-[mr_id]
|
||||
LLM_DOCKER_IMAGE = env.dockerImage
|
||||
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.0-devel-rocky8-x86_64-rocky8-py310-trt10.10.0.31-skip-tritondevel-202505121727-4049"
|
||||
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.0-devel-rocky8-x86_64-rocky8-py312-trt10.10.0.31-skip-tritondevel-202505121727-4049"
|
||||
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE = env.wheelDockerImagePy310
|
||||
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE = env.wheelDockerImagePy312
|
||||
|
||||
// DLFW torch image
|
||||
DLFW_IMAGE = "nvcr.io/nvidia/pytorch:25.04-py3"
|
||||
@ -481,7 +481,7 @@ def createKubernetesPodConfig(image, type, arch = "amd64", gpuCount = 1, perfMod
|
||||
claimName: sw-tensorrt-pvc
|
||||
"""
|
||||
if (arch == "arm64") {
|
||||
// WAR: PVC mount is not setup on aarch64 platform, use nfs as a WAR
|
||||
// PVC mount isn't supported on aarch64 platform. Use NFS as a WAR.
|
||||
pvcVolume = """
|
||||
- name: sw-tensorrt-pvc
|
||||
nfs:
|
||||
@ -1147,8 +1147,6 @@ def runLLMBuildFromPackage(pipeline, cpu_arch, reinstall_dependencies=false, whe
|
||||
sh "ccache -sv"
|
||||
sh "cat ${CCACHE_DIR}/ccache.conf"
|
||||
sh "bash -c 'pip3 show tensorrt || true'"
|
||||
|
||||
// If the image is pre-installed with cxx11-abi pytorch, using non-cxx11-abi requires reinstallation.
|
||||
if (reinstall_dependencies == true) {
|
||||
sh "#!/bin/bash \n" + "pip3 uninstall -y torch"
|
||||
sh "#!/bin/bash \n" + "yum remove -y libcudnn*"
|
||||
@ -1207,10 +1205,9 @@ def runLLMBuildFromPackage(pipeline, cpu_arch, reinstall_dependencies=false, whe
|
||||
}
|
||||
buildArgs = "--clean"
|
||||
if (cpu_arch == AARCH64_TRIPLE) {
|
||||
buildArgs = "-a '90-real;100-real;120-real'"
|
||||
} else if (reinstall_dependencies == true) {
|
||||
buildArgs = "-a '80-real;86-real;89-real;90-real'"
|
||||
buildArgs += " -a '90-real;100-real;120-real'"
|
||||
}
|
||||
|
||||
withCredentials([usernamePassword(credentialsId: "urm-artifactory-creds", usernameVariable: 'CONAN_LOGIN_USERNAME', passwordVariable: 'CONAN_PASSWORD')]) {
|
||||
trtllm_utils.llmExecStepWithRetry(pipeline, script: "#!/bin/bash \n" + "cd tensorrt_llm/ && python3 scripts/build_wheel.py --use_ccache -j ${BUILD_JOBS} -D 'WARNING_IS_ERROR=ON' ${buildArgs}")
|
||||
}
|
||||
@ -1470,7 +1467,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
|
||||
|
||||
docBuildSpec = createKubernetesPodConfig(LLM_DOCKER_IMAGE, "a10")
|
||||
docBuildConfigs = [
|
||||
"A10-Build_TRT-LLM_Doc": [docBuildSpec, {
|
||||
"A10-Build_Docs": [docBuildSpec, {
|
||||
sh "rm -rf **/*.xml *.tar.gz"
|
||||
runLLMDocBuild(pipeline, config=VANILLA_CONFIG)
|
||||
}],
|
||||
@ -1488,52 +1485,62 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
|
||||
}
|
||||
}]]}
|
||||
|
||||
// Python version and OS for sanity check
|
||||
sanityCheckConfigs = [
|
||||
"DLFW": [
|
||||
LLM_DOCKER_IMAGE,
|
||||
"PY312-DLFW": [
|
||||
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE,
|
||||
"B200_PCIe",
|
||||
X86_64_TRIPLE,
|
||||
false,
|
||||
"cxx11/",
|
||||
true,
|
||||
"dlfw/",
|
||||
DLFW_IMAGE,
|
||||
false,
|
||||
],
|
||||
"manylinux-py310": [
|
||||
"PY310-UB2204": [
|
||||
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE,
|
||||
"A10",
|
||||
X86_64_TRIPLE,
|
||||
true,
|
||||
"",
|
||||
UBUNTU_22_04_IMAGE,
|
||||
false,
|
||||
],
|
||||
"manylinux-py312": [
|
||||
"PY312-UB2404": [
|
||||
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE,
|
||||
"A10",
|
||||
"RTX5090",
|
||||
X86_64_TRIPLE,
|
||||
true,
|
||||
"",
|
||||
UBUNTU_24_04_IMAGE,
|
||||
true, // Extra PyTorch CUDA 12.8 install
|
||||
],
|
||||
]
|
||||
|
||||
def toStageName = { gpuType, key -> "${gpuType}-PackageSanityCheck-${key}".toString() }
|
||||
|
||||
fullSet += sanityCheckConfigs.collectEntries{ key, values -> [toStageName(values[1], key), null] }.keySet()
|
||||
|
||||
if (env.targetArch == AARCH64_TRIPLE) {
|
||||
sanityCheckConfigs = [
|
||||
"DLFW": [
|
||||
"PY312-UB2404": [
|
||||
LLM_DOCKER_IMAGE,
|
||||
"GH200",
|
||||
AARCH64_TRIPLE,
|
||||
false,
|
||||
"",
|
||||
// TODO: Change to UBUNTU_24_04_IMAGE after https://nvbugs/5161461 is fixed
|
||||
UBUNTU_24_04_IMAGE,
|
||||
true, // Extra PyTorch CUDA 12.8 install
|
||||
],
|
||||
"PY312-DLFW": [
|
||||
LLM_DOCKER_IMAGE,
|
||||
"GH200",
|
||||
AARCH64_TRIPLE,
|
||||
false,
|
||||
"dlfw/",
|
||||
DLFW_IMAGE,
|
||||
false,
|
||||
],
|
||||
]
|
||||
}
|
||||
|
||||
fullSet += [toStageName("GH200", "DLFW")]
|
||||
def toStageName = { gpuType, key -> "${gpuType}-PackageSanityCheck-${key}".toString() }
|
||||
fullSet += sanityCheckConfigs.collectEntries{ key, values -> [toStageName(values[1], key), null] }.keySet()
|
||||
|
||||
sanityCheckJobs = sanityCheckConfigs.collectEntries {key, values -> [toStageName(values[1], key), {
|
||||
cacheErrorAndUploadResult(toStageName(values[1], key), {
|
||||
@ -1542,6 +1549,9 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
|
||||
if (values[1] == "B200_PCIe") {
|
||||
gpu_type = "b100-ts2"
|
||||
}
|
||||
if (values[1] == "RTX5090") {
|
||||
gpu_type = "rtx-5090"
|
||||
}
|
||||
|
||||
def k8s_arch = "amd64"
|
||||
if (cpu_arch == AARCH64_TRIPLE) {
|
||||
@ -1563,7 +1573,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
|
||||
def wheelName = ""
|
||||
def cpver = "cp312"
|
||||
def pyver = "3.12"
|
||||
if (key.contains("py310")) {
|
||||
if (key.contains("PY310")) {
|
||||
cpver = "cp310"
|
||||
pyver = "3.10"
|
||||
}
|
||||
@ -1607,10 +1617,13 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
|
||||
trtllm_utils.llmExecStepWithRetry(pipeline, script: "pip3 config set global.break-system-packages true")
|
||||
trtllm_utils.llmExecStepWithRetry(pipeline, script: "pip3 install requests")
|
||||
trtllm_utils.llmExecStepWithRetry(pipeline, script: "pip3 uninstall -y tensorrt")
|
||||
if ((values[5] != DLFW_IMAGE) && (cpu_arch == AARCH64_TRIPLE)) {
|
||||
echo "###### Extra prerequisites on aarch64 Start ######"
|
||||
trtllm_utils.llmExecStepWithRetry(pipeline, script: "pip3 install torch==2.6.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126")
|
||||
|
||||
// Extra PyTorch CUDA 12.8 install
|
||||
if (values[6]) {
|
||||
echo "###### Extra PyTorch CUDA 12.8 install Start ######"
|
||||
trtllm_utils.llmExecStepWithRetry(pipeline, script: "pip3 install torch==2.7.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128")
|
||||
}
|
||||
|
||||
def libEnv = []
|
||||
if (env.alternativeTRT) {
|
||||
stage("Replace TensorRT") {
|
||||
|
||||
@ -1,3 +1,4 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu128
|
||||
accelerate>=0.25.0
|
||||
build
|
||||
colored
|
||||
@ -21,7 +22,7 @@ StrEnum
|
||||
sentencepiece>=0.1.99
|
||||
tensorrt~=10.10.0
|
||||
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-04.html#rel-25-04 uses 2.7.0a0.
|
||||
torch>=2.6.0,<=2.7.0a0
|
||||
torch>=2.7.0a0,<=2.7.0
|
||||
torchvision
|
||||
nvidia-modelopt[torch]~=0.29.0
|
||||
nvidia-nccl-cu12
|
||||
|
||||
@ -544,7 +544,7 @@ def main(*,
|
||||
else:
|
||||
env_ld = os.environ.copy()
|
||||
|
||||
new_library_path = "/usr/local/cuda/compat/lib.real"
|
||||
new_library_path = "/usr/local/cuda/compat:/usr/local/cuda/compat/lib:/usr/local/cuda/compat/lib.real"
|
||||
if 'LD_LIBRARY_PATH' in env_ld:
|
||||
new_library_path += f":{env_ld['LD_LIBRARY_PATH']}"
|
||||
env_ld["LD_LIBRARY_PATH"] = new_library_path
|
||||
|
||||
@ -33,6 +33,16 @@ def run_cmd(cmd):
|
||||
return result
|
||||
|
||||
|
||||
def handle_check_failure(error_msg):
|
||||
"""Helper function to handle check failures with consistent messaging"""
|
||||
|
||||
print(f"\nError: {error_msg}")
|
||||
print(
|
||||
"Please refer to our coding style guidelines at: https://github.com/NVIDIA/TensorRT-LLM/blob/main/CONTRIBUTING.md#coding-style to fix this issue"
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def main():
|
||||
# Install pre-commit and bandit from requirements-dev.txt
|
||||
with open("requirements-dev.txt") as f:
|
||||
@ -47,7 +57,10 @@ def main():
|
||||
run_cmd("pre-commit install")
|
||||
|
||||
# Run pre-commit on all files
|
||||
run_cmd("pre-commit run -a --show-diff-on-failure")
|
||||
try:
|
||||
run_cmd("pre-commit run -a --show-diff-on-failure")
|
||||
except SystemExit:
|
||||
handle_check_failure("pre-commit checks failed")
|
||||
|
||||
# Run bandit security checks
|
||||
bandit_output = run_cmd(
|
||||
@ -56,12 +69,10 @@ def main():
|
||||
|
||||
# Check bandit results
|
||||
if "Total lines skipped (#nosec): 0" not in bandit_output:
|
||||
print("Error: Found #nosec annotations in code")
|
||||
sys.exit(1)
|
||||
handle_check_failure("Found #nosec annotations in code")
|
||||
|
||||
if "Issue:" in bandit_output:
|
||||
print("Error: Bandit found security issues")
|
||||
sys.exit(1)
|
||||
handle_check_failure("Bandit found security issues")
|
||||
|
||||
print("pre-commit and bandit checks passed")
|
||||
|
||||
|
||||
@ -12,6 +12,8 @@ l0_sanity_check:
|
||||
- '*h200*'
|
||||
- '*l40s*'
|
||||
- '*a10*'
|
||||
- '*gb202*'
|
||||
- '*gb203*'
|
||||
linux_distribution_name: ubuntu*
|
||||
tests:
|
||||
- llmapi/test_llm_examples.py::test_llmapi_quickstart
|
||||
|
||||
Loading…
Reference in New Issue
Block a user