[None][infra] install mooncake in docker images (#8447)

Signed-off-by: Bo Deng <deemod@nvidia.com>
Signed-off-by: zhengd-nv <200704041+zhengd-nv@users.noreply.github.com>
Signed-off-by: Zheng Duan <200704041+zhengd-nv@users.noreply.github.com>
Co-authored-by: zhengd-nv <200704041+zhengd-nv@users.noreply.github.com>
This commit is contained in:
Bo Deng 2025-11-11 13:34:27 +08:00 committed by GitHub
parent da1f0e2465
commit 0b9bc5aae8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 99 additions and 4 deletions

View File

@ -14889,6 +14889,24 @@ Chen, Tianqi
```
## Mooncake
- **Repository URL**: https://github.com/kvcache-ai/Mooncake
- **License URL**: https://github.com/kvcache-ai/Mooncake/blob/main/LICENSE-APACHE
- **License name**: Apache 2.0
### Authors
© Copyright 2025, Mooncake Team.
Copyright (c) Meta Platforms, Inc. and affiliates.
Copyright 2024 KVCache.AI
Ruoyu Qin
Zheming Li
Weiran He
Mingxing Zhang
Yongwei Wu
Weimin Zheng
Xinran Xu
## flashinfer
### License Text

View File

@ -14697,6 +14697,24 @@ Chen, Tianqi
```
## Mooncake
- **Repository URL**: https://github.com/kvcache-ai/Mooncake
- **License URL**: https://github.com/kvcache-ai/Mooncake/blob/main/LICENSE-APACHE
- **License name**: Apache 2.0
### Authors
© Copyright 2025, Mooncake Team.
Copyright (c) Meta Platforms, Inc. and affiliates.
Copyright 2024 KVCache.AI
Ruoyu Qin
Zheming Li
Weiran He
Mingxing Zhang
Yongwei Wu
Weimin Zheng
Xinran Xu
## flashinfer
### License Text

View File

@ -44,6 +44,7 @@ COPY docker/common/install.sh \
docker/common/install_ucx.sh \
docker/common/install_nixl.sh \
docker/common/install_etcd.sh \
docker/common/install_mooncake.sh \
./
RUN GITHUB_MIRROR=${GITHUB_MIRROR} \
@ -103,6 +104,13 @@ COPY docker/common/install_triton.sh \
RUN GITHUB_MIRROR=${GITHUB_MIRROR} bash ./install_triton.sh && rm install_triton.sh
# Install Mooncake, after triton handles boost requirement
RUN if [ -f /etc/redhat-release ]; then \
echo "Rocky8 detected, skipping mooncake installation"; \
else \
bash ./install_mooncake.sh; \
fi && rm install_mooncake.sh
FROM ${DEVEL_IMAGE} AS wheel
WORKDIR /src/tensorrt_llm
COPY benchmarks benchmarks

View File

@ -0,0 +1,51 @@
#!/bin/bash
set -ex
MOONCAKE_VERSION="v0.3.6.post1"
MOONCAKE_REPO="https://github.com/kvcache-ai/Mooncake.git"
MOONCAKE_INSTALL_PATH="/usr/local/Mooncake"
apt-get update
# https://kvcache-ai.github.io/Mooncake/getting_started/build.html
# libboost-all-dev is removed because it will install a duplicated MPI library
# triton also installed boost so the requirement is already met
apt-get install -y --no-install-recommends \
build-essential \
libibverbs-dev \
libgoogle-glog-dev \
libgtest-dev \
libjsoncpp-dev \
libnuma-dev \
libunwind-dev \
libssl-dev \
libyaml-cpp-dev \
libcurl4-openssl-dev \
libhiredis-dev \
pkg-config \
patchelf
mkdir -p /third-party-source
git clone --depth 1 https://github.com/alibaba/yalantinglibs.git
tar -czf /third-party-source/yalantinglibs.tar.gz yalantinglibs
cd yalantinglibs
mkdir build && cd build
cmake .. -DBUILD_EXAMPLES=OFF -DBUILD_BENCHMARK=OFF -DBUILD_UNIT_TESTS=OFF
make -j
make install
cd ../..
rm -rf yalantinglibs
git clone --depth 1 -b ${MOONCAKE_VERSION} ${MOONCAKE_REPO}
tar -czf /third-party-source/Mooncake-${MOONCAKE_VERSION}.tar.gz Mooncake
cd Mooncake
git submodule update --init --recursive --depth 1
mkdir build && cd build
cmake .. -DUSE_CUDA=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX=${MOONCAKE_INSTALL_PATH}
make -j
make install
cd ../..
rm -rf Mooncake
echo "export LD_LIBRARY_PATH=${MOONCAKE_INSTALL_PATH}/lib:\$LD_LIBRARY_PATH" >> "${ENV}"

View File

@ -13,7 +13,7 @@
# images are adopted from PostMerge pipelines, the abbreviated commit hash is used instead.
IMAGE_NAME=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm
LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.10-py3-x86_64-ubuntu24.04-trt10.13.3.9-skip-tritondevel-202511021230-8838
LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.10-py3-aarch64-ubuntu24.04-trt10.13.3.9-skip-tritondevel-202511021230-8838
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.2-devel-rocky8-x86_64-rocky8-py310-trt10.13.3.9-skip-tritondevel-202511021230-8838
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.2-devel-rocky8-x86_64-rocky8-py312-trt10.13.3.9-skip-tritondevel-202511021230-8838
LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.10-py3-x86_64-ubuntu24.04-trt10.13.3.9-skip-tritondevel-202511110140-8447
LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.10-py3-aarch64-ubuntu24.04-trt10.13.3.9-skip-tritondevel-202511110140-8447
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.2-devel-rocky8-x86_64-rocky8-py310-trt10.13.3.9-skip-tritondevel-202511110140-8447
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.2-devel-rocky8-x86_64-rocky8-py312-trt10.13.3.9-skip-tritondevel-202511110140-8447