diff --git a/ATTRIBUTIONS-CPP-aarch64.md b/ATTRIBUTIONS-CPP-aarch64.md index 9ed9b53383..05de42485d 100755 --- a/ATTRIBUTIONS-CPP-aarch64.md +++ b/ATTRIBUTIONS-CPP-aarch64.md @@ -14889,6 +14889,24 @@ Chen, Tianqi ``` +## Mooncake + +- **Repository URL**: https://github.com/kvcache-ai/Mooncake +- **License URL**: https://github.com/kvcache-ai/Mooncake/blob/main/LICENSE-APACHE +- **License name**: Apache 2.0 + +### Authors + +© Copyright 2025, Mooncake Team. +Copyright (c) Meta Platforms, Inc. and affiliates. +Copyright 2024 KVCache.AI +Ruoyu Qin +Zheming Li +Weiran He +Mingxing Zhang +Yongwei Wu +Weimin Zheng +Xinran Xu ## flashinfer ### License Text diff --git a/ATTRIBUTIONS-CPP-x86_64.md b/ATTRIBUTIONS-CPP-x86_64.md index 11b05c2704..dd343dcef2 100755 --- a/ATTRIBUTIONS-CPP-x86_64.md +++ b/ATTRIBUTIONS-CPP-x86_64.md @@ -14697,6 +14697,24 @@ Chen, Tianqi ``` +## Mooncake + +- **Repository URL**: https://github.com/kvcache-ai/Mooncake +- **License URL**: https://github.com/kvcache-ai/Mooncake/blob/main/LICENSE-APACHE +- **License name**: Apache 2.0 + +### Authors + +© Copyright 2025, Mooncake Team. +Copyright (c) Meta Platforms, Inc. and affiliates. +Copyright 2024 KVCache.AI +Ruoyu Qin +Zheming Li +Weiran He +Mingxing Zhang +Yongwei Wu +Weimin Zheng +Xinran Xu ## flashinfer ### License Text diff --git a/docker/Dockerfile.multi b/docker/Dockerfile.multi index dfbcfecb71..ebb3f15265 100644 --- a/docker/Dockerfile.multi +++ b/docker/Dockerfile.multi @@ -44,6 +44,7 @@ COPY docker/common/install.sh \ docker/common/install_ucx.sh \ docker/common/install_nixl.sh \ docker/common/install_etcd.sh \ + docker/common/install_mooncake.sh \ ./ RUN GITHUB_MIRROR=${GITHUB_MIRROR} \ @@ -103,6 +104,13 @@ COPY docker/common/install_triton.sh \ RUN GITHUB_MIRROR=${GITHUB_MIRROR} bash ./install_triton.sh && rm install_triton.sh +# Install Mooncake, after triton handles boost requirement +RUN if [ -f /etc/redhat-release ]; then \ + echo "Rocky8 detected, skipping mooncake installation"; \ + else \ + bash ./install_mooncake.sh; \ + fi && rm install_mooncake.sh + FROM ${DEVEL_IMAGE} AS wheel WORKDIR /src/tensorrt_llm COPY benchmarks benchmarks diff --git a/docker/common/install_mooncake.sh b/docker/common/install_mooncake.sh new file mode 100644 index 0000000000..15301ba0fc --- /dev/null +++ b/docker/common/install_mooncake.sh @@ -0,0 +1,51 @@ +#!/bin/bash +set -ex + +MOONCAKE_VERSION="v0.3.6.post1" +MOONCAKE_REPO="https://github.com/kvcache-ai/Mooncake.git" +MOONCAKE_INSTALL_PATH="/usr/local/Mooncake" + +apt-get update + +# https://kvcache-ai.github.io/Mooncake/getting_started/build.html +# libboost-all-dev is removed because it will install a duplicated MPI library +# triton also installed boost so the requirement is already met +apt-get install -y --no-install-recommends \ + build-essential \ + libibverbs-dev \ + libgoogle-glog-dev \ + libgtest-dev \ + libjsoncpp-dev \ + libnuma-dev \ + libunwind-dev \ + libssl-dev \ + libyaml-cpp-dev \ + libcurl4-openssl-dev \ + libhiredis-dev \ + pkg-config \ + patchelf + +mkdir -p /third-party-source + +git clone --depth 1 https://github.com/alibaba/yalantinglibs.git +tar -czf /third-party-source/yalantinglibs.tar.gz yalantinglibs +cd yalantinglibs +mkdir build && cd build +cmake .. -DBUILD_EXAMPLES=OFF -DBUILD_BENCHMARK=OFF -DBUILD_UNIT_TESTS=OFF +make -j +make install +cd ../.. +rm -rf yalantinglibs + +git clone --depth 1 -b ${MOONCAKE_VERSION} ${MOONCAKE_REPO} +tar -czf /third-party-source/Mooncake-${MOONCAKE_VERSION}.tar.gz Mooncake +cd Mooncake +git submodule update --init --recursive --depth 1 +mkdir build && cd build +cmake .. -DUSE_CUDA=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX=${MOONCAKE_INSTALL_PATH} +make -j +make install +cd ../.. +rm -rf Mooncake + +echo "export LD_LIBRARY_PATH=${MOONCAKE_INSTALL_PATH}/lib:\$LD_LIBRARY_PATH" >> "${ENV}" diff --git a/jenkins/current_image_tags.properties b/jenkins/current_image_tags.properties index fc097cc9e4..ca519defa6 100644 --- a/jenkins/current_image_tags.properties +++ b/jenkins/current_image_tags.properties @@ -13,7 +13,7 @@ # images are adopted from PostMerge pipelines, the abbreviated commit hash is used instead. IMAGE_NAME=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm -LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.10-py3-x86_64-ubuntu24.04-trt10.13.3.9-skip-tritondevel-202511021230-8838 -LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.10-py3-aarch64-ubuntu24.04-trt10.13.3.9-skip-tritondevel-202511021230-8838 -LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.2-devel-rocky8-x86_64-rocky8-py310-trt10.13.3.9-skip-tritondevel-202511021230-8838 -LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.2-devel-rocky8-x86_64-rocky8-py312-trt10.13.3.9-skip-tritondevel-202511021230-8838 +LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.10-py3-x86_64-ubuntu24.04-trt10.13.3.9-skip-tritondevel-202511110140-8447 +LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.10-py3-aarch64-ubuntu24.04-trt10.13.3.9-skip-tritondevel-202511110140-8447 +LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.2-devel-rocky8-x86_64-rocky8-py310-trt10.13.3.9-skip-tritondevel-202511110140-8447 +LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.2-devel-rocky8-x86_64-rocky8-py312-trt10.13.3.9-skip-tritondevel-202511110140-8447