TensorRT-LLMs/cpp/tensorrt_llm/CMakeLists.txt
Wangjue Yao 9f283f330b
[None][feat] Support Mooncake transfer engine as a cache transceiver backend (#8309)
Signed-off-by: wjueyao <wyao123@terpmail.umd.edu>
Signed-off-by: Shunkang <182541032+Shunkangz@users.noreply.github.co>
Co-authored-by: Shunkang <182541032+Shunkangz@users.noreply.github.co>
2025-12-19 10:09:51 +08:00

316 lines
10 KiB
CMake

# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION &
# AFFILIATES. All rights reserved. SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not
# use this file except in compliance with the License. You may obtain a copy of
# the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations under
# the License.
set(TARGET_NAME tensorrt_llm)
set(SHARED_TARGET ${TARGET_NAME})
set(SHARED_TARGET
${SHARED_TARGET}
PARENT_SCOPE)
set(API_INCLUDE_DIR ${PROJECT_SOURCE_DIR}/include)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/cutlass_extensions/include
${API_INCLUDE_DIR})
set(TARGET_ARCH "unknown")
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
if(NOT WIN32) # Linux
execute_process(
COMMAND grep -oP "(?<=^ID=).+" /etc/os-release
COMMAND tr -d "\""
COMMAND tr -d "\n"
RESULT_VARIABLE _OS_ID_SUCCESS
OUTPUT_VARIABLE OS_ID)
execute_process(
COMMAND grep -oP "(?<=^VERSION_ID=).+" /etc/os-release
COMMAND tr -d "\""
COMMAND tr -d "\n"
RESULT_VARIABLE _OS_VERSION_ID_SUCCESS
OUTPUT_VARIABLE OS_VERSION_ID)
message(STATUS "Operating System: ${OS_ID}, ${OS_VERSION_ID}")
if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
set(TARGET_ARCH "x86_64-linux-gnu")
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
set(TARGET_ARCH "aarch64-linux-gnu")
if(NOT ${OS_ID} MATCHES "ubuntu" OR ${OS_VERSION_ID} VERSION_LESS 22.04)
message(
FATAL_ERROR
"The minimum system requirement for aarch64 is Ubuntu 22.04.")
endif()
else()
message(
FATAL_ERROR
"The system processor type is unsupported: ${CMAKE_SYSTEM_PROCESSOR}")
endif()
else() # Windows
# AMD64, IA64, ARM64, EM64T, X86
if(CMAKE_SYSTEM_PROCESSOR MATCHES "AMD64")
set(TARGET_ARCH "x86_64-windows-msvc")
else()
message(
FATAL_ERROR
"The system processor type is unsupported: ${CMAKE_SYSTEM_PROCESSOR}")
endif()
endif()
if(ENABLE_MULTI_DEVICE)
find_package(MPI REQUIRED)
message(STATUS "Using MPI_C_INCLUDE_DIRS: ${MPI_C_INCLUDE_DIRS}")
message(STATUS "Using MPI_C_LIBRARIES: ${MPI_C_LIBRARIES}")
include_directories(${MPI_C_INCLUDE_DIRS})
endif()
if(ENABLE_NVSHMEM)
# Add hints for aarch64
find_package(NVSHMEM REQUIRED HINTS /usr/lib/sbsa-linux-gnu/cmake/nvshmem/)
include_directories(/usr/include/nvshmem/)
endif()
if(NOT WIN32)
set(DECODER_SHARED_TARGET_0 decoder_attention_0)
set(DECODER_SHARED_TARGET_1 decoder_attention_1)
endif()
# Build internal cutlass kernels as subproject
if(INTERNAL_CUTLASS_KERNELS_PATH)
set(BUILD_INTERNAL_CUTLASS_KERNELS ON)
set(BUILD_NVRTC_WRAPPER ON)
if(NOT EXISTS ${INTERNAL_CUTLASS_KERNELS_PATH}/CMakeLists.txt
AND EXISTS ${INTERNAL_CUTLASS_KERNELS_PATH}/cpp/CMakeLists.txt)
set(INTERNAL_CUTLASS_KERNELS_PATH ${INTERNAL_CUTLASS_KERNELS_PATH}/cpp)
endif()
add_subdirectory(${INTERNAL_CUTLASS_KERNELS_PATH}
${PROJECT_BINARY_DIR}/internal_cutlass_kernels)
endif()
# Import internal cutlass kernels
set(INTERNAL_CUTLASS_KERNELS_TARGET
tensorrt_llm_internal_cutlass_kernels_static)
set(INTERNAL_CUTLASS_KERNELS_TARGET_ARCH ${TARGET_ARCH})
if(NOT INTERNAL_CUTLASS_KERNELS_PATH)
add_library(${INTERNAL_CUTLASS_KERNELS_TARGET} STATIC IMPORTED)
set(INTERNAL_CUTLASS_KERNELS_LIB_TARBALL
"${CMAKE_CURRENT_SOURCE_DIR}/kernels/internal_cutlass_kernels/${INTERNAL_CUTLASS_KERNELS_TARGET_ARCH}/${INTERNAL_CUTLASS_KERNELS_TARGET}.tar.xz"
)
if(NOT WIN32) # Linux
set(INTERNAL_CUTLASS_KERNELS_LIB_NAME
"lib${INTERNAL_CUTLASS_KERNELS_TARGET}.a")
else() # Windows
set(INTERNAL_CUTLASS_KERNELS_LIB_NAME
"${INTERNAL_CUTLASS_KERNELS_TARGET}.lib")
endif()
set(INTERNAL_CUTLASS_KERNELS_LIB_PATH
"${CMAKE_CURRENT_BINARY_DIR}/${INTERNAL_CUTLASS_KERNELS_LIB_NAME}")
add_custom_command(
OUTPUT ${INTERNAL_CUTLASS_KERNELS_LIB_PATH}
COMMAND ${CMAKE_COMMAND} -E tar xf ${INTERNAL_CUTLASS_KERNELS_LIB_TARBALL}
DEPENDS ${INTERNAL_CUTLASS_KERNELS_LIB_TARBALL}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
VERBATIM)
add_custom_target(${INTERNAL_CUTLASS_KERNELS_TARGET}_helper
DEPENDS ${INTERNAL_CUTLASS_KERNELS_LIB_PATH})
add_dependencies(${INTERNAL_CUTLASS_KERNELS_TARGET}
${INTERNAL_CUTLASS_KERNELS_TARGET}_helper)
set_property(TARGET ${INTERNAL_CUTLASS_KERNELS_TARGET}
PROPERTY IMPORTED_LOCATION ${INTERNAL_CUTLASS_KERNELS_LIB_PATH})
target_include_directories(
${INTERNAL_CUTLASS_KERNELS_TARGET}
INTERFACE
"${CMAKE_CURRENT_SOURCE_DIR}/kernels/internal_cutlass_kernels/include")
file(SIZE ${INTERNAL_CUTLASS_KERNELS_LIB_TARBALL}
INTERNAL_CUTLASS_KERNELS_LIB_SIZE)
if(INTERNAL_CUTLASS_KERNELS_LIB_SIZE LESS 1024)
message(
FATAL_ERROR
"The internal_cutlass_kernels library is truncated or incomplete. This is usually caused by using Git LFS (Large File Storage) incorrectly. Please try running command `git lfs install && git lfs pull`."
)
endif()
endif()
add_subdirectory(common)
add_subdirectory(kernels)
add_subdirectory(layers)
add_subdirectory(runtime)
add_subdirectory(testing)
add_subdirectory(executor_worker)
set(BATCH_MANAGER_TARGET tensorrt_llm_batch_manager_static)
set(BATCH_MANAGER_TARGET_ARCH ${TARGET_ARCH})
add_subdirectory(batch_manager)
set(EXECUTOR_TARGET tensorrt_llm_executor_static)
set(EXECUTOR_TARGET_ARCH ${TARGET_ARCH})
set(UCX_WRAPPER_TARGET tensorrt_llm_ucx_wrapper)
if(NIXL_ROOT)
set(NIXL_WRAPPER_TARGET tensorrt_llm_nixl_wrapper)
endif()
if(MOONCAKE_ROOT)
set(MOONCAKE_WRAPPER_TARGET tensorrt_llm_mooncake_wrapper)
endif()
add_subdirectory(executor)
find_package(Threads REQUIRED)
target_link_libraries(${BATCH_MANAGER_TARGET} INTERFACE Threads::Threads)
target_link_libraries(${EXECUTOR_TARGET} INTERFACE Threads::Threads)
set(TRTLLM_LINK_LIBS
${CUDA_DRV_LIB}
${CUBLAS_LIB}
${CUBLASLT_LIB}
${CURAND_LIB}
${CMAKE_DL_LIBS}
${TRT_LIB}
common_src
kernels_src
flash_mla_src
context_attention_src
decoder_attention_src
trtllm_gen_fmha
trtllm_gen_fp8_block_scale_moe
trtllm_gen_gemm
trtllm_gen_gemm_gated_act
trtllm_gen_batched_gemm
selective_scan_src
ws_layernorm_src
fpA_intB_gemm_src
# moe_gemm_src
fb_gemm_src
gemm_swiglu_sm90_src
cutlass_src
cute_dsl_src
layers_src
runtime_src
testing_src
userbuffers_src
${DECODER_SHARED_TARGET_0}
${DECODER_SHARED_TARGET_1})
if(USING_OSS_CUTLASS_LOW_LATENCY_GEMM)
set(TRTLLM_LINK_LIBS ${TRTLLM_LINK_LIBS} low_latency_gemm_src)
message(STATUS "USING_OSS_CUTLASS_LOW_LATENCY_GEMM")
endif()
if(USING_OSS_CUTLASS_FP4_GEMM)
set(TRTLLM_LINK_LIBS ${TRTLLM_LINK_LIBS} fp4_gemm_src)
message(STATUS "USING_OSS_CUTLASS_FP4_GEMM")
endif()
if(USING_OSS_CUTLASS_MOE_GEMM)
set(TRTLLM_LINK_LIBS ${TRTLLM_LINK_LIBS} moe_gemm_src)
message(STATUS "USING_OSS_CUTLASS_MOE_GEMM")
endif()
if(USING_OSS_CUTLASS_ALLREDUCE_GEMM)
set(TRTLLM_LINK_LIBS ${TRTLLM_LINK_LIBS} ar_gemm_src)
message(STATUS "USING_OSS_CUTLASS_ALLREDUCE_GEMM")
endif()
if(ENABLE_MULTI_DEVICE)
set(TRTLLM_LINK_LIBS ${TRTLLM_LINK_LIBS} ${MPI_C_LIBRARIES} ${NCCL_LIB})
endif()
if(ENABLE_NVSHMEM)
set(TRTLLM_LINK_LIBS ${TRTLLM_LINK_LIBS} nvshmem::nvshmem_host
nvshmem::nvshmem_device)
endif()
if(NOT WIN32) # Unix-like compilers
set(UNDEFINED_FLAG "-Wl,--no-undefined")
set(AS_NEEDED_FLAG "-Wl,--as-needed")
set(NO_AS_NEEDED_FLAG "-Wl,--no-as-needed")
else() # Windows
set(UNDEFINED_FLAG "")
set(AS_NEEDED_FLAG "")
set(NO_AS_NEEDED_FLAG "")
endif()
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
add_library(${SHARED_TARGET} SHARED)
set_target_properties(
${SHARED_TARGET}
PROPERTIES CXX_STANDARD "17" CXX_STANDARD_REQUIRED "YES" CXX_EXTENSIONS "NO"
LINK_FLAGS "${AS_NEEDED_FLAG} ${UNDEFINED_FLAG}")
target_link_libraries(${SHARED_TARGET} PUBLIC ${TRTLLM_LINK_LIBS})
target_link_libraries(
${SHARED_TARGET}
PRIVATE $<LINK_LIBRARY:WHOLE_ARCHIVE,${BATCH_MANAGER_TARGET}>
$<LINK_LIBRARY:WHOLE_ARCHIVE,${EXECUTOR_TARGET}>
$<LINK_LIBRARY:WHOLE_ARCHIVE,fp8_blockscale_gemm_src>
$<LINK_LIBRARY:WHOLE_ARCHIVE,${INTERNAL_CUTLASS_KERNELS_TARGET}>)
# Link kernel_src and cutlass_src. static internal cutlass lib overridden.
target_link_libraries(${SHARED_TARGET} PUBLIC kernels_src cutlass_src)
# Cyclic dependency of batch manager on TRT-LLM
target_link_libraries(${BATCH_MANAGER_TARGET} INTERFACE ${SHARED_TARGET})
# Cyclic dependency of executor on TRT-LLM
target_link_libraries(${EXECUTOR_TARGET} INTERFACE ${SHARED_TARGET})
# Cyclic dependency of UCX data transceiver on TRT-LLM
if(TARGET ${UCX_WRAPPER_TARGET})
target_link_libraries(${UCX_WRAPPER_TARGET} INTERFACE ${SHARED_TARGET})
add_dependencies(${SHARED_TARGET} ${UCX_WRAPPER_TARGET})
endif()
if(TARGET ${NIXL_WRAPPER_TARGET})
target_link_libraries(${NIXL_WRAPPER_TARGET} INTERFACE ${SHARED_TARGET})
add_dependencies(${SHARED_TARGET} ${NIXL_WRAPPER_TARGET})
endif()
if(TARGET ${MOONCAKE_WRAPPER_TARGET})
target_link_libraries(${MOONCAKE_WRAPPER_TARGET} INTERFACE ${SHARED_TARGET})
add_dependencies(${SHARED_TARGET} ${MOONCAKE_WRAPPER_TARGET})
endif()
if(NOT WIN32)
# Load libraries at $PREFIX/lib from
# $PREFIX/lib/python3.12/site-packages/tensorrt_llm/libs
set_target_properties(${SHARED_TARGET}
PROPERTIES BUILD_RPATH "$ORIGIN;$ORIGIN/../../../..")
endif()
if(BUILD_PYT)
add_subdirectory(thop)
endif()
if(BINDING_TYPE STREQUAL "pybind")
add_subdirectory(pybind)
endif()
if(BINDING_TYPE STREQUAL "nanobind")
add_subdirectory(nanobind)
endif()
if(BUILD_DEEP_EP)
add_subdirectory(deep_ep)
endif()
if(BUILD_DEEP_GEMM)
add_subdirectory(deep_gemm)
endif()
if(BUILD_FLASH_MLA)
add_subdirectory(flash_mla)
endif()
add_subdirectory(plugins)