doc: [TRTLLM-325]Integrate the NGC image in Makefile automation and document (#4400)

* doc: [TRTLLM-325]Integrate the NGC image in Makefile automation and documentation

Signed-off-by: Martin Marciniszyn Mehringer <11665257+MartinMarciniszyn@users.noreply.github.com>

* WAR against https://github.com/advisories/GHSA-vqfr-h8mv-ghfj

Signed-off-by: Martin Marciniszyn Mehringer <11665257+MartinMarciniszyn@users.noreply.github.com>

* Fix default assignment for CUDA architectures in SBSA build

Signed-off-by: Martin Marciniszyn Mehringer <11665257+MartinMarciniszyn@users.noreply.github.com>

* Push new docker images

Signed-off-by: Martin Marciniszyn Mehringer <11665257+MartinMarciniszyn@users.noreply.github.com>

* Handle constraints.txt in setup.py

Signed-off-by: Martin Marciniszyn Mehringer <11665257+MartinMarciniszyn@users.noreply.github.com>

---------

Signed-off-by: Martin Marciniszyn Mehringer <11665257+MartinMarciniszyn@users.noreply.github.com>
This commit is contained in:
Martin Marciniszyn Mehringer 2025-05-20 08:45:01 +02:00 committed by GitHub
parent f2c0565577
commit 3485347584
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 53 additions and 26 deletions

View File

@ -1,7 +1,7 @@
version: "3.9"
services:
tensorrt_llm-dev:
image: urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.04-py3-x86_64-ubuntu24.04-trt10.10.0.31-skip-tritondevel-202505160532-3934
image: urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.04-py3-x86_64-ubuntu24.04-trt10.10.0.31-skip-tritondevel-202505191345-4400
network_mode: host
ipc: host

2
constraints.txt Normal file
View File

@ -0,0 +1,2 @@
# WAR against https://github.com/advisories/GHSA-vqfr-h8mv-ghfj
h11>=0.16.0

View File

@ -72,6 +72,9 @@ RUN bash ./install_pytorch.sh $TORCH_INSTALL_TYPE && rm install_pytorch.sh
RUN pip3 uninstall -y opencv && rm -rf /usr/local/lib/python3*/dist-packages/cv2/
RUN pip3 install opencv-python-headless --force-reinstall --no-deps --no-cache-dir
# WAR against https://github.com/advisories/GHSA-vqfr-h8mv-ghfj
RUN pip3 install --upgrade h11>=0.16 --no-cache-dir
FROM ${TRITON_IMAGE}:${TRITON_BASE_TAG} AS triton
@ -102,7 +105,7 @@ COPY benchmarks benchmarks
COPY scripts scripts
COPY tensorrt_llm tensorrt_llm
COPY 3rdparty 3rdparty
COPY .gitmodules setup.py requirements.txt requirements-dev.txt ./
COPY .gitmodules setup.py requirements.txt requirements-dev.txt constraints.txt ./
# Create cache directories for pip and ccache
RUN mkdir -p /root/.cache/pip /root/.cache/ccache

View File

@ -28,12 +28,8 @@ PUSH_TO_STAGING ?= 1
DOCKER_BUILD_OPTS ?= --pull --load
DOCKER_BUILD_ARGS ?=
DOCKER_PROGRESS ?= auto
CUDA_ARCHS ?=
PLATFORM ?= $(shell uname -m | grep -q 'aarch64' && echo "arm64" || echo "amd64")
ifeq ($(PLATFORM), arm64)
CUDA_ARCHS = '90-real;100-real;120-real'
endif
CUDA_ARCHS ?= $(if $(filter arm64,$(PLATFORM)),'90-real;100-real;120-real',)
BUILD_WHEEL_OPTS ?=
BUILD_WHEEL_ARGS ?= $(shell grep 'ARG BUILD_WHEEL_ARGS=' Dockerfile.multi | grep -o '=.*' | tr -d '="')$(if $(CUDA_ARCHS), --cuda_architectures $(CUDA_ARCHS))$(if $(BUILD_WHEEL_OPTS), $(BUILD_WHEEL_OPTS))
TORCH_INSTALL_TYPE ?= skip
@ -47,6 +43,8 @@ TRT_LLM_VERSION ?= $(shell grep '^__version__' ../tensorrt_llm/version.py | g
GITHUB_MIRROR ?=
PYTHON_VERSION ?=
NGC_STAGING_REPO ?= nvcr.io/nvstaging/tensorrt-llm
NGC_REPO ?= nvcr.io/nvidia/tensorrt-llm
NGC_USE_STAGING ?= 0
define add_local_user
docker build \
@ -201,22 +199,29 @@ ngc-devel_%: IMAGE_TAG = $(TRT_LLM_VERSION)
ngc-devel_push: DOCKER_BUILD_ARGS = --push
ngc-devel_push: ngc-devel_build ;
ngc-devel_run: IMAGE_NAME = $(if $(filter 1,$(NGC_USE_STAGING)),$(NGC_STAGING_REPO),$(NGC_REPO))
ngc-release_%: STAGE = release
ngc-release_%: DOCKER_BUILD_OPTS = --pull --load --platform linux/$(PLATFORM)
ngc-release_%: DEVEL_IMAGE = $(NGC_STAGING_REPO)/devel:$(TRT_LLM_VERSION)
ngc-release_%: IMAGE_NAME = nvcr.io/nvstaging/tensorrt-llm
ngc-release_%: IMAGE_NAME = $(NGC_STAGING_REPO)
ngc-release_%: IMAGE_TAG = $(TRT_LLM_VERSION)-$(PLATFORM)
ngc-release_run: IMAGE_NAME = $(if $(filter 1,$(NGC_USE_STAGING)),$(NGC_STAGING_REPO),$(NGC_REPO))
ngc-release_run: WORK_DIR = /app/tensorrt_llm
ngc-manifest_%: STAGE = release
ngc-manifest_%: IMAGE_NAME = $(NGC_STAGING_REPO)
ngc-manifest_%: IMAGE_TAG = $(TRT_LLM_VERSION)
ngc-manifest_create:
docker pull $(IMAGE_WITH_TAG)-amd64
docker pull $(IMAGE_WITH_TAG)-arm64
docker manifest create $(IMAGE_WITH_TAG) \
--amend $(IMAGE_WITH_TAG)-amd64 \
--amend $(IMAGE_WITH_TAG)-arm64
ngc-manifest_push:
ngc-manifest_push: ngc-manifest_create
docker manifest push $(IMAGE_WITH_TAG)
build: devel_build ;

View File

@ -52,6 +52,28 @@ make -C docker release_build CUDA_ARCHS="80-real;90-real"
For more build options, see the variables defined in [`Makefile`](Makefile).
### NGC Integration
When building from source, one can conveniently download a docker image for development from
the [NVIDIA NGC Catalog](https://catalog.ngc.nvidia.com/) and start it like so:
```bash
make -C docker ngc-devel_run LOCAL_USER=1 DOCKER_PULL=1
```
As before, specifying `LOCAL_USER=1` will run the container with the local user's identity. Specifying `DOCKER_PULL=1`
is optional, but it will pull the latest image from the NGC Catalog. This will map the source code into the container
in the directory `/code/tensorrt_llm`.
We also provide an image with pre-installed binaries for release. This can be used like so:
```bash
make -C docker ngc-release_run LOCAL_USER=1 DOCKER_PULL=1
```
If you want to deploy a specific version of TensorRT-LLM, you can specify the version with
`TRT_LLM_VERSION=<version_tag>`. The application examples and benchmarks are installed in `/app/tensorrt_llm`.
### Jenkins Integration
[`Makefile`](Makefile) has special targets for building, pushing and running the Docker build image used on Jenkins.
@ -91,14 +113,3 @@ make -C docker trtllm_run LOCAL_USER=1 DOCKER_PULL=1
The argument `DOCKER_PULL=1` instructs `make` to pull the latest version of the image before deploying it in the container.
By default, images are tagged by their `git` branch name and may be frequently updated.
### Binary Compatible Environment
Currently, `BatchManager` is released as a closed source binary library. In order to make it deployable in a wider
scope, the compilation environment needs to be constructed in the following way.
The compilation environment for x86_64 architecture
```bash
make -C docker centos7_push
```

View File

@ -21,10 +21,10 @@ UPLOAD_PATH = env.uploadPath ? env.uploadPath : "sw-tensorrt-generic/llm-artifac
// Container configuration
// available tags can be found in: https://urm.nvidia.com/artifactory/sw-tensorrt-docker/tensorrt-llm/
// [base_image_name]-[arch]-[os](-[python_version])-[trt_version]-[torch_install_type]-[stage]-[date]-[mr_id]
LLM_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.04-py3-x86_64-ubuntu24.04-trt10.10.0.31-skip-tritondevel-202505160532-3934"
LLM_SBSA_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.04-py3-aarch64-ubuntu24.04-trt10.10.0.31-skip-tritondevel-202505160532-3934"
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.0-devel-rocky8-x86_64-rocky8-py310-trt10.10.0.31-skip-tritondevel-202505160532-3934"
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.0-devel-rocky8-x86_64-rocky8-py312-trt10.10.0.31-skip-tritondevel-202505160532-3934"
LLM_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.04-py3-x86_64-ubuntu24.04-trt10.10.0.31-skip-tritondevel-202505191345-4400"
LLM_SBSA_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.04-py3-aarch64-ubuntu24.04-trt10.10.0.31-skip-tritondevel-202505191345-4400"
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.0-devel-rocky8-x86_64-rocky8-py310-trt10.10.0.31-skip-tritondevel-202505191345-4400"
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.0-devel-rocky8-x86_64-rocky8-py312-trt10.10.0.31-skip-tritondevel-202505191345-4400"
// TODO: Move common variables to an unified location
BUILD_CORES_REQUEST = "8"

View File

@ -1,7 +1,7 @@
import java.lang.InterruptedException
DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.04-py3-x86_64-ubuntu24.04-trt10.10.0.31-skip-tritondevel-202505121727-4049"
DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.04-py3-x86_64-ubuntu24.04-trt10.10.0.31-skip-tritondevel-202505191345-4400"
def createKubernetesPodConfig(image)
{

View File

@ -1,4 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/cu128
-c constraints.txt
accelerate>=0.25.0
build
colored

View File

@ -31,7 +31,8 @@ def parse_requirements(filename: os.PathLike):
extra_URLs = []
deps = []
for line in requirements:
if line.startswith("#") or line.startswith("-r"):
if line.startswith("#") or line.startswith("-r") or line.startswith(
"-c"):
continue
# handle -i and --extra-index-url options
@ -87,6 +88,10 @@ required_deps, extra_URLs = parse_requirements(
devel_deps, _ = parse_requirements(
Path("requirements-dev-windows.txt"
if on_windows else "requirements-dev.txt"))
constraints_file = Path("constraints.txt")
if constraints_file.exists():
constraints, _ = parse_requirements(constraints_file)
required_deps.extend(constraints)
if on_windows:
package_data = [