mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 22:18:36 +08:00
infra: install Triton in the base image (#3759)
* infra: install Triton in the base image Signed-off-by: Iman Tabrizian <10105175+tabrizian@users.noreply.github.com> * install Triton from the base image Signed-off-by: Iman Tabrizian <10105175+tabrizian@users.noreply.github.com> * update base image Signed-off-by: Iman Tabrizian <10105175+tabrizian@users.noreply.github.com> * Address review comments Signed-off-by: Iman Tabrizian <10105175+tabrizian@users.noreply.github.com> * update base image Signed-off-by: Iman Tabrizian <10105175+tabrizian@users.noreply.github.com> * waive test Signed-off-by: Iman Tabrizian <10105175+tabrizian@users.noreply.github.com> --------- Signed-off-by: Iman Tabrizian <10105175+tabrizian@users.noreply.github.com>
This commit is contained in:
parent
ad4226d946
commit
74cc9e26ff
@ -1,8 +1,7 @@
|
||||
version: "3.9"
|
||||
services:
|
||||
tensorrt_llm-dev:
|
||||
image: urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.03-py3-x86_64-ubuntu24.04-trt10.9.0.34-skip-devel-202504101610-3421
|
||||
|
||||
image: urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.03-py3-x86_64-ubuntu24.04-trt10.9.0.34-skip-devel-202504250100-3759
|
||||
network_mode: host
|
||||
ipc: host
|
||||
|
||||
|
||||
@ -65,7 +65,7 @@ repos:
|
||||
additional_dependencies:
|
||||
- tomli
|
||||
# add ignore words list
|
||||
args: ["-L", "Mor,ans"]
|
||||
args: ["-L", "Mor,ans,thirdparty"]
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
rev: v0.9.4
|
||||
hooks:
|
||||
|
||||
@ -1,8 +1,12 @@
|
||||
# Multi-stage Dockerfile
|
||||
ARG BASE_IMAGE=nvcr.io/nvidia/pytorch
|
||||
ARG TRITON_IMAGE=nvcr.io/nvidia/tritonserver
|
||||
ARG BASE_TAG=25.03-py3
|
||||
ARG TRITON_BASE_TAG=25.03-py3
|
||||
ARG DEVEL_IMAGE=devel
|
||||
|
||||
FROM ${TRITON_IMAGE}:${TRITON_BASE_TAG} AS triton
|
||||
|
||||
FROM ${BASE_IMAGE}:${BASE_TAG} AS base
|
||||
|
||||
# https://www.gnu.org/software/bash/manual/html_node/Bash-Startup-Files.html
|
||||
@ -49,6 +53,15 @@ RUN bash ./install_tensorrt.sh \
|
||||
--CUBLAS_VER=${CUBLAS_VER} && \
|
||||
rm install_tensorrt.sh
|
||||
|
||||
ARG INSTALL_TRITON=0
|
||||
COPY --from=triton /opt/tritonserver/backends/python /opt/tritonserver/backends/python
|
||||
COPY --from=triton /opt/tritonserver/lib /opt/tritonserver/lib
|
||||
COPY --from=triton /opt/tritonserver/include /opt/tritonserver/include
|
||||
COPY --from=triton /opt/tritonserver/bin /opt/tritonserver/bin
|
||||
COPY --from=triton /opt/tritonserver/caches /opt/tritonserver/caches
|
||||
COPY docker/common/install_triton.sh install_triton.sh
|
||||
RUN bash ./install_triton.sh && rm install_triton.sh
|
||||
|
||||
# Install latest Polygraphy
|
||||
COPY docker/common/install_polygraphy.sh install_polygraphy.sh
|
||||
RUN bash ./install_polygraphy.sh && rm install_polygraphy.sh
|
||||
|
||||
@ -41,6 +41,7 @@ GIT_COMMIT ?= $(shell git rev-parse HEAD)
|
||||
TRT_LLM_VERSION ?= $(shell grep '^__version__' ../tensorrt_llm/version.py | grep -o '=.*' | tr -d '= "')
|
||||
GITHUB_MIRROR ?=
|
||||
PYTHON_VERSION ?=
|
||||
INSTALL_TRITON ?= 1
|
||||
|
||||
define add_local_user
|
||||
docker build \
|
||||
@ -79,6 +80,7 @@ endef
|
||||
$(if $(GIT_COMMIT), --build-arg GIT_COMMIT="$(GIT_COMMIT)") \
|
||||
$(if $(GITHUB_MIRROR), --build-arg GITHUB_MIRROR="$(GITHUB_MIRROR)") \
|
||||
$(if $(PYTHON_VERSION), --build-arg PYTHON_VERSION="$(PYTHON_VERSION)") \
|
||||
$(if $(INSTALL_TRITON), --build-arg INSTALL_TRITON="$(INSTALL_TRITON)") \
|
||||
$(if $(STAGE), --target $(STAGE)) \
|
||||
--file Dockerfile.multi \
|
||||
--tag $(IMAGE_WITH_TAG) \
|
||||
|
||||
39
docker/common/install_triton.sh
Normal file
39
docker/common/install_triton.sh
Normal file
@ -0,0 +1,39 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
install_boost() {
|
||||
# Install boost version >= 1.78 for boost::span
|
||||
# Current libboost-dev apt packages are < 1.78, so install from tar.gz
|
||||
wget -O /tmp/boost.tar.gz https://archives.boost.io/release/1.80.0/source/boost_1_80_0.tar.gz && (cd /tmp && tar xzf boost.tar.gz) && mv /tmp/boost_1_80_0/boost /usr/include/boost
|
||||
rm -rf /tmp/boost_1_80_0
|
||||
rm -rf /tmp/boost.tar.gz
|
||||
}
|
||||
|
||||
install_triton_deps() {
|
||||
apt-get update && apt-get install -y \
|
||||
pigz \
|
||||
libxml2-dev \
|
||||
libre2-dev \
|
||||
libnuma-dev \
|
||||
python3-build \
|
||||
libb64-dev \
|
||||
libarchive-dev \
|
||||
datacenter-gpu-manager=1:3.3.6
|
||||
|
||||
install_boost
|
||||
}
|
||||
|
||||
# Install Triton only if base image in Ubuntu
|
||||
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
|
||||
if [ "$INSTALL_TRITON" == "1" ]; then
|
||||
if [ "$ID" == "ubuntu" ]; then
|
||||
install_triton_deps
|
||||
else
|
||||
rm -rf /opt/tritonserver
|
||||
echo "Skip Triton installation for non-Ubuntu base image"
|
||||
fi
|
||||
else
|
||||
echo "Skip Triton installation when INSTALL_TRITON is set to 0"
|
||||
rm -rf /opt/tritonserver
|
||||
fi
|
||||
@ -16,7 +16,7 @@ AARCH64_TRIPLE = "aarch64-linux-gnu"
|
||||
|
||||
LLM_DOCKER_IMAGE = env.dockerImage
|
||||
|
||||
AGENT_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.01-py3-x86_64-ubuntu24.04-trt10.8.0.43-skip-devel-202503131720-8877"
|
||||
AGENT_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.03-py3-x86_64-ubuntu24.04-trt10.9.0.34-skip-devel-202504250100-3759"
|
||||
|
||||
POD_TIMEOUT_SECONDS = env.podTimeoutSeconds ? env.podTimeoutSeconds : "21600"
|
||||
|
||||
|
||||
@ -137,6 +137,7 @@ def buildImage(target, action="build", torchInstallType="skip", args="", custom_
|
||||
TORCH_INSTALL_TYPE=${torchInstallType} \
|
||||
IMAGE_NAME=${IMAGE_NAME} IMAGE_TAG=${tag} \
|
||||
BUILD_WHEEL_OPTS='-j ${BUILD_JOBS}' ${args} \
|
||||
BUILD_TRITON=1 \
|
||||
GITHUB_MIRROR=https://urm.nvidia.com/artifactory/github-go-remote
|
||||
"""
|
||||
}
|
||||
@ -149,6 +150,7 @@ def buildImage(target, action="build", torchInstallType="skip", args="", custom_
|
||||
TORCH_INSTALL_TYPE=${torchInstallType} \
|
||||
IMAGE_NAME=${IMAGE_NAME} IMAGE_TAG=${custom_tag} \
|
||||
BUILD_WHEEL_OPTS='-j ${BUILD_JOBS}' ${args} \
|
||||
BUILD_TRITON=1 \
|
||||
GITHUB_MIRROR=https://urm.nvidia.com/artifactory/github-go-remote
|
||||
"""
|
||||
}
|
||||
|
||||
@ -71,7 +71,7 @@ def buildImage(action, type)
|
||||
stage ("Perform '${action}' action on image") {
|
||||
retry(3)
|
||||
{
|
||||
sh "cd ${LLM_ROOT} && make -C docker devel_${action} IMAGE_NAME=${IMAGE_NAME} IMAGE_TAG=${tag} TORCH_INSTALL_TYPE=${type}" +
|
||||
sh "cd ${LLM_ROOT} && make -C docker devel_${action} IMAGE_NAME=${IMAGE_NAME} IMAGE_TAG=${tag} TORCH_INSTALL_TYPE=${type} BUILD_TRITON=1" +
|
||||
" GITHUB_MIRROR=https://urm.nvidia.com/artifactory/github-go-remote"
|
||||
}
|
||||
}
|
||||
|
||||
@ -21,10 +21,10 @@ UPLOAD_PATH = env.uploadPath ? env.uploadPath : "sw-tensorrt-generic/llm-artifac
|
||||
// Container configuration
|
||||
// available tags can be found in: https://urm.nvidia.com/artifactory/sw-tensorrt-docker/tensorrt-llm/
|
||||
// [base_image_name]-[arch]-[os](-[python_version])-[trt_version]-[torch_install_type]-[stage]-[date]-[mr_id]
|
||||
LLM_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.03-py3-x86_64-ubuntu24.04-trt10.9.0.34-skip-devel-202504101610-3421"
|
||||
LLM_SBSA_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.03-py3-aarch64-ubuntu24.04-trt10.9.0.34-skip-devel-202504101610-3421"
|
||||
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.8.1-devel-rocky8-x86_64-rocky8-py310-trt10.9.0.34-skip-devel-202504101610-3421"
|
||||
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.8.1-devel-rocky8-x86_64-rocky8-py312-trt10.9.0.34-skip-devel-202504101610-3421"
|
||||
LLM_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.03-py3-x86_64-ubuntu24.04-trt10.9.0.34-skip-devel-202504250100-3759"
|
||||
LLM_SBSA_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.03-py3-aarch64-ubuntu24.04-trt10.9.0.34-skip-devel-202504250100-3759"
|
||||
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.8.1-devel-rocky8-x86_64-rocky8-py310-trt10.9.0.34-skip-devel-202504250100-3759"
|
||||
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.8.1-devel-rocky8-x86_64-rocky8-py312-trt10.9.0.34-skip-devel-202504250100-3759"
|
||||
|
||||
LLM_ROCKYLINUX8_DOCKER_IMAGE = LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE
|
||||
|
||||
|
||||
@ -29,8 +29,8 @@ linuxPkgName = ( env.targetArch == AARCH64_TRIPLE ? "tensorrt-llm-sbsa-release-s
|
||||
// available tags can be found in: https://urm.nvidia.com/artifactory/sw-tensorrt-docker/tensorrt-llm/
|
||||
// [base_image_name]-[arch]-[os](-[python_version])-[trt_version]-[torch_install_type]-[stage]-[date]-[mr_id]
|
||||
LLM_DOCKER_IMAGE = env.dockerImage
|
||||
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.8.1-devel-rocky8-x86_64-rocky8-py310-trt10.9.0.34-skip-devel-202504101610-3421"
|
||||
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.8.1-devel-rocky8-x86_64-rocky8-py312-trt10.9.0.34-skip-devel-202504101610-3421"
|
||||
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.8.1-devel-rocky8-x86_64-rocky8-py310-trt10.9.0.34-skip-devel-202504250100-3759"
|
||||
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.8.1-devel-rocky8-x86_64-rocky8-py312-trt10.9.0.34-skip-devel-202504250100-3759"
|
||||
|
||||
// DLFW torch image
|
||||
DLFW_IMAGE = "nvcr.io/nvidia/pytorch:25.03-py3"
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
|
||||
import java.lang.InterruptedException
|
||||
|
||||
DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.03-py3-x86_64-ubuntu24.04-trt10.9.0.34-skip-devel-202504101610-3421"
|
||||
DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.03-py3-x86_64-ubuntu24.04-trt10.9.0.34-skip-devel-202504250100-3759"
|
||||
|
||||
def createKubernetesPodConfig(image)
|
||||
{
|
||||
|
||||
@ -487,3 +487,4 @@ test_e2e.py::test_ptp_quickstart_multimodal[NVILA-8B-FP16-vila/NVILA-8B-image] S
|
||||
accuracy/test_cli_flow.py::TestLlama3_1_8BInstruct::test_medusa_fp8_prequantized SKIP (https://nvbugs/5238599)
|
||||
accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_tp4 SKIP (https://nvbugs/5238602)
|
||||
unittest/_torch/multi_gpu_modeling/test_llama4.py::test_llama4[tp8-trtllm-scout] SKIP (https://nvbugs/5244009)
|
||||
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16_4gpus[tp2pp2-attn_backend=TRTLLM] SKIP (https://nvbugs/5241627)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user