mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 22:18:36 +08:00
doc: [TRTLLM-325]Integrate the NGC image in Makefile automation and document (#4400)
* doc: [TRTLLM-325]Integrate the NGC image in Makefile automation and documentation Signed-off-by: Martin Marciniszyn Mehringer <11665257+MartinMarciniszyn@users.noreply.github.com> * WAR against https://github.com/advisories/GHSA-vqfr-h8mv-ghfj Signed-off-by: Martin Marciniszyn Mehringer <11665257+MartinMarciniszyn@users.noreply.github.com> * Fix default assignment for CUDA architectures in SBSA build Signed-off-by: Martin Marciniszyn Mehringer <11665257+MartinMarciniszyn@users.noreply.github.com> * Push new docker images Signed-off-by: Martin Marciniszyn Mehringer <11665257+MartinMarciniszyn@users.noreply.github.com> * Handle constraints.txt in setup.py Signed-off-by: Martin Marciniszyn Mehringer <11665257+MartinMarciniszyn@users.noreply.github.com> --------- Signed-off-by: Martin Marciniszyn Mehringer <11665257+MartinMarciniszyn@users.noreply.github.com>
This commit is contained in:
parent
f2c0565577
commit
3485347584
@ -1,7 +1,7 @@
|
||||
version: "3.9"
|
||||
services:
|
||||
tensorrt_llm-dev:
|
||||
image: urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.04-py3-x86_64-ubuntu24.04-trt10.10.0.31-skip-tritondevel-202505160532-3934
|
||||
image: urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.04-py3-x86_64-ubuntu24.04-trt10.10.0.31-skip-tritondevel-202505191345-4400
|
||||
network_mode: host
|
||||
ipc: host
|
||||
|
||||
|
||||
2
constraints.txt
Normal file
2
constraints.txt
Normal file
@ -0,0 +1,2 @@
|
||||
# WAR against https://github.com/advisories/GHSA-vqfr-h8mv-ghfj
|
||||
h11>=0.16.0
|
||||
@ -72,6 +72,9 @@ RUN bash ./install_pytorch.sh $TORCH_INSTALL_TYPE && rm install_pytorch.sh
|
||||
RUN pip3 uninstall -y opencv && rm -rf /usr/local/lib/python3*/dist-packages/cv2/
|
||||
RUN pip3 install opencv-python-headless --force-reinstall --no-deps --no-cache-dir
|
||||
|
||||
# WAR against https://github.com/advisories/GHSA-vqfr-h8mv-ghfj
|
||||
RUN pip3 install --upgrade h11>=0.16 --no-cache-dir
|
||||
|
||||
|
||||
FROM ${TRITON_IMAGE}:${TRITON_BASE_TAG} AS triton
|
||||
|
||||
@ -102,7 +105,7 @@ COPY benchmarks benchmarks
|
||||
COPY scripts scripts
|
||||
COPY tensorrt_llm tensorrt_llm
|
||||
COPY 3rdparty 3rdparty
|
||||
COPY .gitmodules setup.py requirements.txt requirements-dev.txt ./
|
||||
COPY .gitmodules setup.py requirements.txt requirements-dev.txt constraints.txt ./
|
||||
|
||||
# Create cache directories for pip and ccache
|
||||
RUN mkdir -p /root/.cache/pip /root/.cache/ccache
|
||||
|
||||
@ -28,12 +28,8 @@ PUSH_TO_STAGING ?= 1
|
||||
DOCKER_BUILD_OPTS ?= --pull --load
|
||||
DOCKER_BUILD_ARGS ?=
|
||||
DOCKER_PROGRESS ?= auto
|
||||
CUDA_ARCHS ?=
|
||||
PLATFORM ?= $(shell uname -m | grep -q 'aarch64' && echo "arm64" || echo "amd64")
|
||||
ifeq ($(PLATFORM), arm64)
|
||||
CUDA_ARCHS = '90-real;100-real;120-real'
|
||||
endif
|
||||
|
||||
CUDA_ARCHS ?= $(if $(filter arm64,$(PLATFORM)),'90-real;100-real;120-real',)
|
||||
BUILD_WHEEL_OPTS ?=
|
||||
BUILD_WHEEL_ARGS ?= $(shell grep 'ARG BUILD_WHEEL_ARGS=' Dockerfile.multi | grep -o '=.*' | tr -d '="')$(if $(CUDA_ARCHS), --cuda_architectures $(CUDA_ARCHS))$(if $(BUILD_WHEEL_OPTS), $(BUILD_WHEEL_OPTS))
|
||||
TORCH_INSTALL_TYPE ?= skip
|
||||
@ -47,6 +43,8 @@ TRT_LLM_VERSION ?= $(shell grep '^__version__' ../tensorrt_llm/version.py | g
|
||||
GITHUB_MIRROR ?=
|
||||
PYTHON_VERSION ?=
|
||||
NGC_STAGING_REPO ?= nvcr.io/nvstaging/tensorrt-llm
|
||||
NGC_REPO ?= nvcr.io/nvidia/tensorrt-llm
|
||||
NGC_USE_STAGING ?= 0
|
||||
|
||||
define add_local_user
|
||||
docker build \
|
||||
@ -201,22 +199,29 @@ ngc-devel_%: IMAGE_TAG = $(TRT_LLM_VERSION)
|
||||
ngc-devel_push: DOCKER_BUILD_ARGS = --push
|
||||
ngc-devel_push: ngc-devel_build ;
|
||||
|
||||
ngc-devel_run: IMAGE_NAME = $(if $(filter 1,$(NGC_USE_STAGING)),$(NGC_STAGING_REPO),$(NGC_REPO))
|
||||
|
||||
ngc-release_%: STAGE = release
|
||||
ngc-release_%: DOCKER_BUILD_OPTS = --pull --load --platform linux/$(PLATFORM)
|
||||
ngc-release_%: DEVEL_IMAGE = $(NGC_STAGING_REPO)/devel:$(TRT_LLM_VERSION)
|
||||
ngc-release_%: IMAGE_NAME = nvcr.io/nvstaging/tensorrt-llm
|
||||
ngc-release_%: IMAGE_NAME = $(NGC_STAGING_REPO)
|
||||
ngc-release_%: IMAGE_TAG = $(TRT_LLM_VERSION)-$(PLATFORM)
|
||||
|
||||
ngc-release_run: IMAGE_NAME = $(if $(filter 1,$(NGC_USE_STAGING)),$(NGC_STAGING_REPO),$(NGC_REPO))
|
||||
ngc-release_run: WORK_DIR = /app/tensorrt_llm
|
||||
|
||||
ngc-manifest_%: STAGE = release
|
||||
ngc-manifest_%: IMAGE_NAME = $(NGC_STAGING_REPO)
|
||||
ngc-manifest_%: IMAGE_TAG = $(TRT_LLM_VERSION)
|
||||
|
||||
ngc-manifest_create:
|
||||
docker pull $(IMAGE_WITH_TAG)-amd64
|
||||
docker pull $(IMAGE_WITH_TAG)-arm64
|
||||
docker manifest create $(IMAGE_WITH_TAG) \
|
||||
--amend $(IMAGE_WITH_TAG)-amd64 \
|
||||
--amend $(IMAGE_WITH_TAG)-arm64
|
||||
|
||||
ngc-manifest_push:
|
||||
ngc-manifest_push: ngc-manifest_create
|
||||
docker manifest push $(IMAGE_WITH_TAG)
|
||||
|
||||
build: devel_build ;
|
||||
|
||||
@ -52,6 +52,28 @@ make -C docker release_build CUDA_ARCHS="80-real;90-real"
|
||||
|
||||
For more build options, see the variables defined in [`Makefile`](Makefile).
|
||||
|
||||
### NGC Integration
|
||||
|
||||
When building from source, one can conveniently download a docker image for development from
|
||||
the [NVIDIA NGC Catalog](https://catalog.ngc.nvidia.com/) and start it like so:
|
||||
|
||||
```bash
|
||||
make -C docker ngc-devel_run LOCAL_USER=1 DOCKER_PULL=1
|
||||
```
|
||||
|
||||
As before, specifying `LOCAL_USER=1` will run the container with the local user's identity. Specifying `DOCKER_PULL=1`
|
||||
is optional, but it will pull the latest image from the NGC Catalog. This will map the source code into the container
|
||||
in the directory `/code/tensorrt_llm`.
|
||||
|
||||
We also provide an image with pre-installed binaries for release. This can be used like so:
|
||||
|
||||
```bash
|
||||
make -C docker ngc-release_run LOCAL_USER=1 DOCKER_PULL=1
|
||||
```
|
||||
|
||||
If you want to deploy a specific version of TensorRT-LLM, you can specify the version with
|
||||
`TRT_LLM_VERSION=<version_tag>`. The application examples and benchmarks are installed in `/app/tensorrt_llm`.
|
||||
|
||||
### Jenkins Integration
|
||||
|
||||
[`Makefile`](Makefile) has special targets for building, pushing and running the Docker build image used on Jenkins.
|
||||
@ -91,14 +113,3 @@ make -C docker trtllm_run LOCAL_USER=1 DOCKER_PULL=1
|
||||
|
||||
The argument `DOCKER_PULL=1` instructs `make` to pull the latest version of the image before deploying it in the container.
|
||||
By default, images are tagged by their `git` branch name and may be frequently updated.
|
||||
|
||||
### Binary Compatible Environment
|
||||
|
||||
Currently, `BatchManager` is released as a closed source binary library. In order to make it deployable in a wider
|
||||
scope, the compilation environment needs to be constructed in the following way.
|
||||
|
||||
The compilation environment for x86_64 architecture
|
||||
|
||||
```bash
|
||||
make -C docker centos7_push
|
||||
```
|
||||
|
||||
@ -21,10 +21,10 @@ UPLOAD_PATH = env.uploadPath ? env.uploadPath : "sw-tensorrt-generic/llm-artifac
|
||||
// Container configuration
|
||||
// available tags can be found in: https://urm.nvidia.com/artifactory/sw-tensorrt-docker/tensorrt-llm/
|
||||
// [base_image_name]-[arch]-[os](-[python_version])-[trt_version]-[torch_install_type]-[stage]-[date]-[mr_id]
|
||||
LLM_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.04-py3-x86_64-ubuntu24.04-trt10.10.0.31-skip-tritondevel-202505160532-3934"
|
||||
LLM_SBSA_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.04-py3-aarch64-ubuntu24.04-trt10.10.0.31-skip-tritondevel-202505160532-3934"
|
||||
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.0-devel-rocky8-x86_64-rocky8-py310-trt10.10.0.31-skip-tritondevel-202505160532-3934"
|
||||
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.0-devel-rocky8-x86_64-rocky8-py312-trt10.10.0.31-skip-tritondevel-202505160532-3934"
|
||||
LLM_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.04-py3-x86_64-ubuntu24.04-trt10.10.0.31-skip-tritondevel-202505191345-4400"
|
||||
LLM_SBSA_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.04-py3-aarch64-ubuntu24.04-trt10.10.0.31-skip-tritondevel-202505191345-4400"
|
||||
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.0-devel-rocky8-x86_64-rocky8-py310-trt10.10.0.31-skip-tritondevel-202505191345-4400"
|
||||
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.0-devel-rocky8-x86_64-rocky8-py312-trt10.10.0.31-skip-tritondevel-202505191345-4400"
|
||||
|
||||
// TODO: Move common variables to an unified location
|
||||
BUILD_CORES_REQUEST = "8"
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
|
||||
import java.lang.InterruptedException
|
||||
|
||||
DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.04-py3-x86_64-ubuntu24.04-trt10.10.0.31-skip-tritondevel-202505121727-4049"
|
||||
DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.04-py3-x86_64-ubuntu24.04-trt10.10.0.31-skip-tritondevel-202505191345-4400"
|
||||
|
||||
def createKubernetesPodConfig(image)
|
||||
{
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu128
|
||||
-c constraints.txt
|
||||
accelerate>=0.25.0
|
||||
build
|
||||
colored
|
||||
|
||||
7
setup.py
7
setup.py
@ -31,7 +31,8 @@ def parse_requirements(filename: os.PathLike):
|
||||
extra_URLs = []
|
||||
deps = []
|
||||
for line in requirements:
|
||||
if line.startswith("#") or line.startswith("-r"):
|
||||
if line.startswith("#") or line.startswith("-r") or line.startswith(
|
||||
"-c"):
|
||||
continue
|
||||
|
||||
# handle -i and --extra-index-url options
|
||||
@ -87,6 +88,10 @@ required_deps, extra_URLs = parse_requirements(
|
||||
devel_deps, _ = parse_requirements(
|
||||
Path("requirements-dev-windows.txt"
|
||||
if on_windows else "requirements-dev.txt"))
|
||||
constraints_file = Path("constraints.txt")
|
||||
if constraints_file.exists():
|
||||
constraints, _ = parse_requirements(constraints_file)
|
||||
required_deps.extend(constraints)
|
||||
|
||||
if on_windows:
|
||||
package_data = [
|
||||
|
||||
Loading…
Reference in New Issue
Block a user