mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 14:07:21 +08:00
[None][infra] install mooncake in docker images (#8447)
Signed-off-by: Bo Deng <deemod@nvidia.com> Signed-off-by: zhengd-nv <200704041+zhengd-nv@users.noreply.github.com> Signed-off-by: Zheng Duan <200704041+zhengd-nv@users.noreply.github.com> Co-authored-by: zhengd-nv <200704041+zhengd-nv@users.noreply.github.com>
This commit is contained in:
parent
da1f0e2465
commit
0b9bc5aae8
@ -14889,6 +14889,24 @@ Chen, Tianqi
|
||||
|
||||
```
|
||||
|
||||
## Mooncake
|
||||
|
||||
- **Repository URL**: https://github.com/kvcache-ai/Mooncake
|
||||
- **License URL**: https://github.com/kvcache-ai/Mooncake/blob/main/LICENSE-APACHE
|
||||
- **License name**: Apache 2.0
|
||||
|
||||
### Authors
|
||||
|
||||
© Copyright 2025, Mooncake Team.
|
||||
Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
Copyright 2024 KVCache.AI
|
||||
Ruoyu Qin
|
||||
Zheming Li
|
||||
Weiran He
|
||||
Mingxing Zhang
|
||||
Yongwei Wu
|
||||
Weimin Zheng
|
||||
Xinran Xu
|
||||
## flashinfer
|
||||
|
||||
### License Text
|
||||
|
||||
@ -14697,6 +14697,24 @@ Chen, Tianqi
|
||||
|
||||
```
|
||||
|
||||
## Mooncake
|
||||
|
||||
- **Repository URL**: https://github.com/kvcache-ai/Mooncake
|
||||
- **License URL**: https://github.com/kvcache-ai/Mooncake/blob/main/LICENSE-APACHE
|
||||
- **License name**: Apache 2.0
|
||||
|
||||
### Authors
|
||||
|
||||
© Copyright 2025, Mooncake Team.
|
||||
Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
Copyright 2024 KVCache.AI
|
||||
Ruoyu Qin
|
||||
Zheming Li
|
||||
Weiran He
|
||||
Mingxing Zhang
|
||||
Yongwei Wu
|
||||
Weimin Zheng
|
||||
Xinran Xu
|
||||
## flashinfer
|
||||
|
||||
### License Text
|
||||
|
||||
@ -44,6 +44,7 @@ COPY docker/common/install.sh \
|
||||
docker/common/install_ucx.sh \
|
||||
docker/common/install_nixl.sh \
|
||||
docker/common/install_etcd.sh \
|
||||
docker/common/install_mooncake.sh \
|
||||
./
|
||||
|
||||
RUN GITHUB_MIRROR=${GITHUB_MIRROR} \
|
||||
@ -103,6 +104,13 @@ COPY docker/common/install_triton.sh \
|
||||
|
||||
RUN GITHUB_MIRROR=${GITHUB_MIRROR} bash ./install_triton.sh && rm install_triton.sh
|
||||
|
||||
# Install Mooncake, after triton handles boost requirement
|
||||
RUN if [ -f /etc/redhat-release ]; then \
|
||||
echo "Rocky8 detected, skipping mooncake installation"; \
|
||||
else \
|
||||
bash ./install_mooncake.sh; \
|
||||
fi && rm install_mooncake.sh
|
||||
|
||||
FROM ${DEVEL_IMAGE} AS wheel
|
||||
WORKDIR /src/tensorrt_llm
|
||||
COPY benchmarks benchmarks
|
||||
|
||||
51
docker/common/install_mooncake.sh
Normal file
51
docker/common/install_mooncake.sh
Normal file
@ -0,0 +1,51 @@
|
||||
#!/bin/bash
|
||||
set -ex
|
||||
|
||||
MOONCAKE_VERSION="v0.3.6.post1"
|
||||
MOONCAKE_REPO="https://github.com/kvcache-ai/Mooncake.git"
|
||||
MOONCAKE_INSTALL_PATH="/usr/local/Mooncake"
|
||||
|
||||
apt-get update
|
||||
|
||||
# https://kvcache-ai.github.io/Mooncake/getting_started/build.html
|
||||
# libboost-all-dev is removed because it will install a duplicated MPI library
|
||||
# triton also installed boost so the requirement is already met
|
||||
apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
libibverbs-dev \
|
||||
libgoogle-glog-dev \
|
||||
libgtest-dev \
|
||||
libjsoncpp-dev \
|
||||
libnuma-dev \
|
||||
libunwind-dev \
|
||||
libssl-dev \
|
||||
libyaml-cpp-dev \
|
||||
libcurl4-openssl-dev \
|
||||
libhiredis-dev \
|
||||
pkg-config \
|
||||
patchelf
|
||||
|
||||
mkdir -p /third-party-source
|
||||
|
||||
git clone --depth 1 https://github.com/alibaba/yalantinglibs.git
|
||||
tar -czf /third-party-source/yalantinglibs.tar.gz yalantinglibs
|
||||
cd yalantinglibs
|
||||
mkdir build && cd build
|
||||
cmake .. -DBUILD_EXAMPLES=OFF -DBUILD_BENCHMARK=OFF -DBUILD_UNIT_TESTS=OFF
|
||||
make -j
|
||||
make install
|
||||
cd ../..
|
||||
rm -rf yalantinglibs
|
||||
|
||||
git clone --depth 1 -b ${MOONCAKE_VERSION} ${MOONCAKE_REPO}
|
||||
tar -czf /third-party-source/Mooncake-${MOONCAKE_VERSION}.tar.gz Mooncake
|
||||
cd Mooncake
|
||||
git submodule update --init --recursive --depth 1
|
||||
mkdir build && cd build
|
||||
cmake .. -DUSE_CUDA=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX=${MOONCAKE_INSTALL_PATH}
|
||||
make -j
|
||||
make install
|
||||
cd ../..
|
||||
rm -rf Mooncake
|
||||
|
||||
echo "export LD_LIBRARY_PATH=${MOONCAKE_INSTALL_PATH}/lib:\$LD_LIBRARY_PATH" >> "${ENV}"
|
||||
@ -13,7 +13,7 @@
|
||||
# images are adopted from PostMerge pipelines, the abbreviated commit hash is used instead.
|
||||
IMAGE_NAME=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm
|
||||
|
||||
LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.10-py3-x86_64-ubuntu24.04-trt10.13.3.9-skip-tritondevel-202511021230-8838
|
||||
LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.10-py3-aarch64-ubuntu24.04-trt10.13.3.9-skip-tritondevel-202511021230-8838
|
||||
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.2-devel-rocky8-x86_64-rocky8-py310-trt10.13.3.9-skip-tritondevel-202511021230-8838
|
||||
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.2-devel-rocky8-x86_64-rocky8-py312-trt10.13.3.9-skip-tritondevel-202511021230-8838
|
||||
LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.10-py3-x86_64-ubuntu24.04-trt10.13.3.9-skip-tritondevel-202511110140-8447
|
||||
LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.10-py3-aarch64-ubuntu24.04-trt10.13.3.9-skip-tritondevel-202511110140-8447
|
||||
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.2-devel-rocky8-x86_64-rocky8-py310-trt10.13.3.9-skip-tritondevel-202511110140-8447
|
||||
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.2-devel-rocky8-x86_64-rocky8-py312-trt10.13.3.9-skip-tritondevel-202511110140-8447
|
||||
|
||||
Loading…
Reference in New Issue
Block a user