TensorRT-LLMs/docker/Dockerfile.multi
Emma Qiao ff32caf4d7
[Infra] - Update dependencies with NGC PyTorch 25.05 and TRT 10.11 (#4885)
Signed-off-by: qqiao <qqiao@nvidia.com>
Signed-off-by: Iman Tabrizian <10105175+tabrizian@users.noreply.github.com>
Signed-off-by: Erin Ho <14718778+hchings@users.noreply.github.com>
Signed-off-by: Emma Qiao <qqiao@nvidia.com>
Co-authored-by: Iman Tabrizian <10105175+tabrizian@users.noreply.github.com>
Co-authored-by: Erin Ho <14718778+hchings@users.noreply.github.com>
Co-authored-by: Yanchao Lu <yanchaol@nvidia.com>
2025-06-17 23:48:34 +08:00

191 lines
6.7 KiB
Docker

# Multi-stage Dockerfile
ARG BASE_IMAGE=nvcr.io/nvidia/pytorch
ARG TRITON_IMAGE=nvcr.io/nvidia/tritonserver
ARG BASE_TAG=25.05-py3
ARG TRITON_BASE_TAG=25.05-py3
ARG DEVEL_IMAGE=devel
FROM ${BASE_IMAGE}:${BASE_TAG} AS base
# Add NVIDIA EULA and AI Terms labels
LABEL com.nvidia.eula="https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-software-license-agreement/"
LABEL com.nvidia.ai-terms="https://www.nvidia.com/en-us/agreements/enterprise-software/product-specific-terms-for-ai-products/"
# https://www.gnu.org/software/bash/manual/html_node/Bash-Startup-Files.html
# The default values come from `nvcr.io/nvidia/pytorch`
ENV BASH_ENV=${BASH_ENV:-/etc/bash.bashrc}
ENV ENV=${ENV:-/etc/shinit_v2}
ARG GITHUB_MIRROR=""
ENV GITHUB_MIRROR=$GITHUB_MIRROR
RUN echo "Using GitHub mirror: $GITHUB_MIRROR"
SHELL ["/bin/bash", "-c"]
# Clean up the pip constraint file from the base NGC PyTorch image.
RUN [ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true
FROM base AS devel
ARG PYTHON_VERSION="3.12.3"
RUN echo "Using Python version: $PYTHON_VERSION"
COPY docker/common/install_base.sh install_base.sh
RUN bash ./install_base.sh $PYTHON_VERSION && rm install_base.sh
COPY docker/common/install_cmake.sh install_cmake.sh
RUN bash ./install_cmake.sh && rm install_cmake.sh
COPY docker/common/install_ccache.sh install_ccache.sh
RUN bash ./install_ccache.sh && rm install_ccache.sh
# Only take effect when the base image is Rocky Linux 8 with old CUDA version.
COPY docker/common/install_cuda_toolkit.sh install_cuda_toolkit.sh
RUN bash ./install_cuda_toolkit.sh && rm install_cuda_toolkit.sh
# Download & install latest TRT release
ARG TRT_VER
ARG CUDA_VER
ARG CUDNN_VER
ARG NCCL_VER
ARG CUBLAS_VER
COPY docker/common/install_tensorrt.sh install_tensorrt.sh
RUN bash ./install_tensorrt.sh \
--TRT_VER=${TRT_VER} \
--CUDA_VER=${CUDA_VER} \
--CUDNN_VER=${CUDNN_VER} \
--NCCL_VER=${NCCL_VER} \
--CUBLAS_VER=${CUBLAS_VER} && \
rm install_tensorrt.sh
# Install latest Polygraphy
COPY docker/common/install_polygraphy.sh install_polygraphy.sh
RUN bash ./install_polygraphy.sh && rm install_polygraphy.sh
# Install mpi4py
COPY docker/common/install_mpi4py.sh install_mpi4py.sh
RUN bash ./install_mpi4py.sh && rm install_mpi4py.sh
# Install PyTorch
ARG TORCH_INSTALL_TYPE="skip"
COPY docker/common/install_pytorch.sh install_pytorch.sh
RUN bash ./install_pytorch.sh $TORCH_INSTALL_TYPE && rm install_pytorch.sh
# Install OpenCV with FFMPEG support
RUN pip3 uninstall -y opencv && rm -rf /usr/local/lib/python3*/dist-packages/cv2/
RUN pip3 install opencv-python-headless --force-reinstall --no-deps --no-cache-dir
# Install DeepEP
COPY docker/common/install_deep_ep.sh install_deep_ep.sh
RUN bash ./install_deep_ep.sh && rm install_deep_ep.sh
# WARs against security issues inherited from pytorch:25.04
# * https://github.com/advisories/GHSA-vqfr-h8mv-ghfj
# * https://github.com/advisories/GHSA-7cx3-6m66-7c5m
# * https://github.com/advisories/GHSA-5rjg-fvgr-3xxf
RUN pip3 install --upgrade --no-cache-dir \
"h11>=0.16" \
"tornado>=6.5.0" \
"setuptools>=78.1.1,<80"
FROM ${TRITON_IMAGE}:${TRITON_BASE_TAG} AS triton
FROM devel AS tritondevel
COPY --from=triton /opt/tritonserver/backends/python /opt/tritonserver/backends/python
COPY --from=triton /opt/tritonserver/lib /opt/tritonserver/lib
COPY --from=triton /opt/tritonserver/include /opt/tritonserver/include
COPY --from=triton /opt/tritonserver/bin /opt/tritonserver/bin
COPY --from=triton /opt/tritonserver/caches /opt/tritonserver/caches
COPY docker/common/install_triton.sh install_triton.sh
RUN bash ./install_triton.sh && rm install_triton.sh
# Install NIXL
COPY docker/common/install_nixl.sh install_nixl.sh
RUN bash ./install_nixl.sh && rm install_nixl.sh
# Install etcd
COPY docker/common/install_etcd.sh install_etcd.sh
RUN bash ./install_etcd.sh && rm install_etcd.sh
FROM ${DEVEL_IMAGE} AS wheel
WORKDIR /src/tensorrt_llm
COPY benchmarks benchmarks
COPY cpp cpp
COPY benchmarks benchmarks
COPY scripts scripts
COPY tensorrt_llm tensorrt_llm
COPY 3rdparty 3rdparty
COPY .gitmodules setup.py requirements.txt requirements-dev.txt constraints.txt ./
# Create cache directories for pip and ccache
RUN mkdir -p /root/.cache/pip /root/.cache/ccache
ENV CCACHE_DIR=/root/.cache/ccache
# Build the TRT-LLM wheel
ARG BUILD_WHEEL_ARGS="--clean --python_bindings --benchmarks"
RUN --mount=type=cache,target=/root/.cache/pip --mount=type=cache,target=${CCACHE_DIR} \
python3 scripts/build_wheel.py ${BUILD_WHEEL_ARGS}
FROM ${DEVEL_IMAGE} AS release
# Create a cache directory for pip
RUN mkdir -p /root/.cache/pip
WORKDIR /app/tensorrt_llm
COPY --from=wheel /src/tensorrt_llm/build/tensorrt_llm*.whl .
RUN --mount=type=cache,target=/root/.cache/pip \
pip install tensorrt_llm*.whl && \
rm tensorrt_llm*.whl && \
pip cache purge
COPY README.md ./
COPY docs docs
COPY cpp/include include
RUN ln -sv $(python3 -c 'import site; print(f"{site.getsitepackages()[0]}/tensorrt_llm/bin")') bin && \
test -f bin/executorWorker && \
ln -sv $(python3 -c 'import site; print(f"{site.getsitepackages()[0]}/tensorrt_llm/libs")') lib && \
test -f lib/libnvinfer_plugin_tensorrt_llm.so && \
echo "/app/tensorrt_llm/lib" > /etc/ld.so.conf.d/tensorrt_llm.conf && \
ldconfig && \
! ( ldd -v bin/executorWorker | grep tensorrt_llm | grep -q "not found" )
ARG SRC_DIR=/src/tensorrt_llm
COPY --from=wheel ${SRC_DIR}/benchmarks benchmarks
ARG CPP_BUILD_DIR=${SRC_DIR}/cpp/build
COPY --from=wheel \
${CPP_BUILD_DIR}/benchmarks/bertBenchmark \
${CPP_BUILD_DIR}/benchmarks/gptManagerBenchmark \
${CPP_BUILD_DIR}/benchmarks/disaggServerBenchmark \
benchmarks/cpp/
COPY examples examples
RUN chmod -R a+w examples && \
rm -v \
benchmarks/cpp/bertBenchmark.cpp \
benchmarks/cpp/gptManagerBenchmark.cpp \
benchmarks/cpp/disaggServerBenchmark.cpp \
benchmarks/cpp/CMakeLists.txt && \
rm -rf /root/.cache/pip
ARG GIT_COMMIT
ARG TRT_LLM_VER
ENV TRT_LLM_GIT_COMMIT=${GIT_COMMIT} \
TRT_LLM_VERSION=${TRT_LLM_VER}
FROM wheel AS tritonbuild
WORKDIR /src/tensorrt_llm
RUN pip install /src/tensorrt_llm/build/tensorrt_llm*.whl
COPY ./triton_backend/ ./triton_backend/
RUN bash ./triton_backend/inflight_batcher_llm/scripts/build.sh
FROM release AS tritonrelease
WORKDIR /app/tensorrt_llm
COPY ./triton_backend/all_models ./triton_backend/all_models
COPY ./triton_backend/scripts ./triton_backend/scripts
COPY ./triton_backend/tools ./triton_backend/tools
COPY ./triton_backend/inflight_batcher_llm/scripts ./triton_backend/inflight_batcher_llm/scripts
COPY ./triton_backend/inflight_batcher_llm/client ./triton_backend/inflight_batcher_llm/client
COPY --from=tritonbuild /opt/tritonserver/backends/tensorrtllm /opt/tritonserver/backends/tensorrtllm