infra: install Triton in the base image (#3759)

* infra: install Triton in the base image

Signed-off-by: Iman Tabrizian <10105175+tabrizian@users.noreply.github.com>

* install Triton from the base image

Signed-off-by: Iman Tabrizian <10105175+tabrizian@users.noreply.github.com>

* update base image

Signed-off-by: Iman Tabrizian <10105175+tabrizian@users.noreply.github.com>

* Address review comments

Signed-off-by: Iman Tabrizian <10105175+tabrizian@users.noreply.github.com>

* update base image

Signed-off-by: Iman Tabrizian <10105175+tabrizian@users.noreply.github.com>

* waive test

Signed-off-by: Iman Tabrizian <10105175+tabrizian@users.noreply.github.com>

---------

Signed-off-by: Iman Tabrizian <10105175+tabrizian@users.noreply.github.com>
This commit is contained in:
Iman Tabrizian 2025-04-27 16:36:30 -07:00 committed by GitHub
parent ad4226d946
commit 74cc9e26ff
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 68 additions and 12 deletions

View File

@ -1,8 +1,7 @@
version: "3.9"
services:
tensorrt_llm-dev:
image: urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.03-py3-x86_64-ubuntu24.04-trt10.9.0.34-skip-devel-202504101610-3421
image: urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.03-py3-x86_64-ubuntu24.04-trt10.9.0.34-skip-devel-202504250100-3759
network_mode: host
ipc: host

View File

@ -65,7 +65,7 @@ repos:
additional_dependencies:
- tomli
# add ignore words list
args: ["-L", "Mor,ans"]
args: ["-L", "Mor,ans,thirdparty"]
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.9.4
hooks:

View File

@ -1,8 +1,12 @@
# Multi-stage Dockerfile
ARG BASE_IMAGE=nvcr.io/nvidia/pytorch
ARG TRITON_IMAGE=nvcr.io/nvidia/tritonserver
ARG BASE_TAG=25.03-py3
ARG TRITON_BASE_TAG=25.03-py3
ARG DEVEL_IMAGE=devel
FROM ${TRITON_IMAGE}:${TRITON_BASE_TAG} AS triton
FROM ${BASE_IMAGE}:${BASE_TAG} AS base
# https://www.gnu.org/software/bash/manual/html_node/Bash-Startup-Files.html
@ -49,6 +53,15 @@ RUN bash ./install_tensorrt.sh \
--CUBLAS_VER=${CUBLAS_VER} && \
rm install_tensorrt.sh
ARG INSTALL_TRITON=0
COPY --from=triton /opt/tritonserver/backends/python /opt/tritonserver/backends/python
COPY --from=triton /opt/tritonserver/lib /opt/tritonserver/lib
COPY --from=triton /opt/tritonserver/include /opt/tritonserver/include
COPY --from=triton /opt/tritonserver/bin /opt/tritonserver/bin
COPY --from=triton /opt/tritonserver/caches /opt/tritonserver/caches
COPY docker/common/install_triton.sh install_triton.sh
RUN bash ./install_triton.sh && rm install_triton.sh
# Install latest Polygraphy
COPY docker/common/install_polygraphy.sh install_polygraphy.sh
RUN bash ./install_polygraphy.sh && rm install_polygraphy.sh

View File

@ -41,6 +41,7 @@ GIT_COMMIT ?= $(shell git rev-parse HEAD)
TRT_LLM_VERSION ?= $(shell grep '^__version__' ../tensorrt_llm/version.py | grep -o '=.*' | tr -d '= "')
GITHUB_MIRROR ?=
PYTHON_VERSION ?=
INSTALL_TRITON ?= 1
define add_local_user
docker build \
@ -79,6 +80,7 @@ endef
$(if $(GIT_COMMIT), --build-arg GIT_COMMIT="$(GIT_COMMIT)") \
$(if $(GITHUB_MIRROR), --build-arg GITHUB_MIRROR="$(GITHUB_MIRROR)") \
$(if $(PYTHON_VERSION), --build-arg PYTHON_VERSION="$(PYTHON_VERSION)") \
$(if $(INSTALL_TRITON), --build-arg INSTALL_TRITON="$(INSTALL_TRITON)") \
$(if $(STAGE), --target $(STAGE)) \
--file Dockerfile.multi \
--tag $(IMAGE_WITH_TAG) \

View File

@ -0,0 +1,39 @@
#!/bin/bash
set -ex
install_boost() {
# Install boost version >= 1.78 for boost::span
# Current libboost-dev apt packages are < 1.78, so install from tar.gz
wget -O /tmp/boost.tar.gz https://archives.boost.io/release/1.80.0/source/boost_1_80_0.tar.gz && (cd /tmp && tar xzf boost.tar.gz) && mv /tmp/boost_1_80_0/boost /usr/include/boost
rm -rf /tmp/boost_1_80_0
rm -rf /tmp/boost.tar.gz
}
install_triton_deps() {
apt-get update && apt-get install -y \
pigz \
libxml2-dev \
libre2-dev \
libnuma-dev \
python3-build \
libb64-dev \
libarchive-dev \
datacenter-gpu-manager=1:3.3.6
install_boost
}
# Install Triton only if base image in Ubuntu
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
if [ "$INSTALL_TRITON" == "1" ]; then
if [ "$ID" == "ubuntu" ]; then
install_triton_deps
else
rm -rf /opt/tritonserver
echo "Skip Triton installation for non-Ubuntu base image"
fi
else
echo "Skip Triton installation when INSTALL_TRITON is set to 0"
rm -rf /opt/tritonserver
fi

View File

@ -16,7 +16,7 @@ AARCH64_TRIPLE = "aarch64-linux-gnu"
LLM_DOCKER_IMAGE = env.dockerImage
AGENT_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.01-py3-x86_64-ubuntu24.04-trt10.8.0.43-skip-devel-202503131720-8877"
AGENT_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.03-py3-x86_64-ubuntu24.04-trt10.9.0.34-skip-devel-202504250100-3759"
POD_TIMEOUT_SECONDS = env.podTimeoutSeconds ? env.podTimeoutSeconds : "21600"

View File

@ -137,6 +137,7 @@ def buildImage(target, action="build", torchInstallType="skip", args="", custom_
TORCH_INSTALL_TYPE=${torchInstallType} \
IMAGE_NAME=${IMAGE_NAME} IMAGE_TAG=${tag} \
BUILD_WHEEL_OPTS='-j ${BUILD_JOBS}' ${args} \
BUILD_TRITON=1 \
GITHUB_MIRROR=https://urm.nvidia.com/artifactory/github-go-remote
"""
}
@ -149,6 +150,7 @@ def buildImage(target, action="build", torchInstallType="skip", args="", custom_
TORCH_INSTALL_TYPE=${torchInstallType} \
IMAGE_NAME=${IMAGE_NAME} IMAGE_TAG=${custom_tag} \
BUILD_WHEEL_OPTS='-j ${BUILD_JOBS}' ${args} \
BUILD_TRITON=1 \
GITHUB_MIRROR=https://urm.nvidia.com/artifactory/github-go-remote
"""
}

View File

@ -71,7 +71,7 @@ def buildImage(action, type)
stage ("Perform '${action}' action on image") {
retry(3)
{
sh "cd ${LLM_ROOT} && make -C docker devel_${action} IMAGE_NAME=${IMAGE_NAME} IMAGE_TAG=${tag} TORCH_INSTALL_TYPE=${type}" +
sh "cd ${LLM_ROOT} && make -C docker devel_${action} IMAGE_NAME=${IMAGE_NAME} IMAGE_TAG=${tag} TORCH_INSTALL_TYPE=${type} BUILD_TRITON=1" +
" GITHUB_MIRROR=https://urm.nvidia.com/artifactory/github-go-remote"
}
}

View File

@ -21,10 +21,10 @@ UPLOAD_PATH = env.uploadPath ? env.uploadPath : "sw-tensorrt-generic/llm-artifac
// Container configuration
// available tags can be found in: https://urm.nvidia.com/artifactory/sw-tensorrt-docker/tensorrt-llm/
// [base_image_name]-[arch]-[os](-[python_version])-[trt_version]-[torch_install_type]-[stage]-[date]-[mr_id]
LLM_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.03-py3-x86_64-ubuntu24.04-trt10.9.0.34-skip-devel-202504101610-3421"
LLM_SBSA_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.03-py3-aarch64-ubuntu24.04-trt10.9.0.34-skip-devel-202504101610-3421"
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.8.1-devel-rocky8-x86_64-rocky8-py310-trt10.9.0.34-skip-devel-202504101610-3421"
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.8.1-devel-rocky8-x86_64-rocky8-py312-trt10.9.0.34-skip-devel-202504101610-3421"
LLM_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.03-py3-x86_64-ubuntu24.04-trt10.9.0.34-skip-devel-202504250100-3759"
LLM_SBSA_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.03-py3-aarch64-ubuntu24.04-trt10.9.0.34-skip-devel-202504250100-3759"
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.8.1-devel-rocky8-x86_64-rocky8-py310-trt10.9.0.34-skip-devel-202504250100-3759"
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.8.1-devel-rocky8-x86_64-rocky8-py312-trt10.9.0.34-skip-devel-202504250100-3759"
LLM_ROCKYLINUX8_DOCKER_IMAGE = LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE

View File

@ -29,8 +29,8 @@ linuxPkgName = ( env.targetArch == AARCH64_TRIPLE ? "tensorrt-llm-sbsa-release-s
// available tags can be found in: https://urm.nvidia.com/artifactory/sw-tensorrt-docker/tensorrt-llm/
// [base_image_name]-[arch]-[os](-[python_version])-[trt_version]-[torch_install_type]-[stage]-[date]-[mr_id]
LLM_DOCKER_IMAGE = env.dockerImage
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.8.1-devel-rocky8-x86_64-rocky8-py310-trt10.9.0.34-skip-devel-202504101610-3421"
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.8.1-devel-rocky8-x86_64-rocky8-py312-trt10.9.0.34-skip-devel-202504101610-3421"
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.8.1-devel-rocky8-x86_64-rocky8-py310-trt10.9.0.34-skip-devel-202504250100-3759"
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.8.1-devel-rocky8-x86_64-rocky8-py312-trt10.9.0.34-skip-devel-202504250100-3759"
// DLFW torch image
DLFW_IMAGE = "nvcr.io/nvidia/pytorch:25.03-py3"

View File

@ -1,7 +1,7 @@
import java.lang.InterruptedException
DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.03-py3-x86_64-ubuntu24.04-trt10.9.0.34-skip-devel-202504101610-3421"
DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.03-py3-x86_64-ubuntu24.04-trt10.9.0.34-skip-devel-202504250100-3759"
def createKubernetesPodConfig(image)
{

View File

@ -487,3 +487,4 @@ test_e2e.py::test_ptp_quickstart_multimodal[NVILA-8B-FP16-vila/NVILA-8B-image] S
accuracy/test_cli_flow.py::TestLlama3_1_8BInstruct::test_medusa_fp8_prequantized SKIP (https://nvbugs/5238599)
accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_tp4 SKIP (https://nvbugs/5238602)
unittest/_torch/multi_gpu_modeling/test_llama4.py::test_llama4[tp8-trtllm-scout] SKIP (https://nvbugs/5244009)
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16_4gpus[tp2pp2-attn_backend=TRTLLM] SKIP (https://nvbugs/5241627)