TensorRT-LLMs/cpp/tensorrt_llm/CMakeLists.txt
石晓伟 548b5b7310
Update TensorRT-LLM (#2532)
* blossom-ci.yml: run vulnerability scan on blossom

* open source efb18c1256f8c9c3d47b7d0c740b83e5d5ebe0ec

---------

Co-authored-by: niukuo <6831097+niukuo@users.noreply.github.com>
Co-authored-by: pei0033 <59505847+pei0033@users.noreply.github.com>
Co-authored-by: Kyungmin Lee <30465912+lkm2835@users.noreply.github.com>
Co-authored-by: Kaiyu Xie <26294424+kaiyux@users.noreply.github.com>
2024-12-04 21:16:56 +08:00

453 lines
16 KiB
CMake

# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION &
# AFFILIATES. All rights reserved. SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not
# use this file except in compliance with the License. You may obtain a copy of
# the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations under
# the License.
set(TARGET_NAME tensorrt_llm)
set(SHARED_TARGET ${TARGET_NAME})
set(SHARED_TARGET
${SHARED_TARGET}
PARENT_SCOPE)
set(API_INCLUDE_DIR ${PROJECT_SOURCE_DIR}/include)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/cutlass_extensions/include
${API_INCLUDE_DIR})
if(ENABLE_MULTI_DEVICE)
find_package(MPI REQUIRED)
message(STATUS "Using MPI_C_INCLUDE_DIRS: ${MPI_C_INCLUDE_DIRS}")
message(STATUS "Using MPI_C_LIBRARIES: ${MPI_C_LIBRARIES}")
include_directories(${MPI_C_INCLUDE_DIRS})
endif()
if(NOT WIN32)
set(DECODER_SHARED_TARGET decoder_attention)
endif()
add_subdirectory(common)
add_subdirectory(kernels)
add_subdirectory(layers)
add_subdirectory(runtime)
add_subdirectory(executor_worker)
set(TARGET_ARCH "unknown")
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
if(NOT WIN32) # Linux
execute_process(
COMMAND grep -oP "(?<=^ID=).+" /etc/os-release
COMMAND tr -d "\""
COMMAND tr -d "\n"
RESULT_VARIABLE _OS_ID_SUCCESS
OUTPUT_VARIABLE OS_ID)
execute_process(
COMMAND grep -oP "(?<=^VERSION_ID=).+" /etc/os-release
COMMAND tr -d "\""
COMMAND tr -d "\n"
RESULT_VARIABLE _OS_VERSION_ID_SUCCESS
OUTPUT_VARIABLE OS_VERSION_ID)
message(STATUS "Operating System: ${OS_ID}, ${OS_VERSION_ID}")
if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
set(TARGET_ARCH "x86_64-linux-gnu")
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
set(TARGET_ARCH "aarch64-linux-gnu")
if(NOT ${OS_ID} MATCHES "ubuntu" OR ${OS_VERSION_ID} VERSION_LESS 22.04)
message(
FATAL_ERROR
"The minimum system requirement for aarch64 is Ubuntu 22.04.")
endif()
else()
message(
FATAL_ERROR
"The system processor type is unsupported: ${CMAKE_SYSTEM_PROCESSOR}")
endif()
else() # Windows
# AMD64, IA64, ARM64, EM64T, X86
if(CMAKE_SYSTEM_PROCESSOR MATCHES "AMD64")
set(TARGET_ARCH "x86_64-windows-msvc")
else()
message(
FATAL_ERROR
"The system processor type is unsupported: ${CMAKE_SYSTEM_PROCESSOR}")
endif()
endif()
set(BATCH_MANAGER_TARGET tensorrt_llm_batch_manager_static)
set(BATCH_MANAGER_TARGET_ARCH ${TARGET_ARCH})
set(UCX_WRAPPER_TARGET tensorrt_llm_ucx_wrapper)
if(BUILD_BATCH_MANAGER)
add_subdirectory(batch_manager)
else()
add_library(${BATCH_MANAGER_TARGET} STATIC IMPORTED)
if(NOT WIN32) # Linux
if(USE_CXX11_ABI)
set(BATCH_MANAGER_LIB_LOC
"${CMAKE_CURRENT_SOURCE_DIR}/batch_manager/${BATCH_MANAGER_TARGET_ARCH}/libtensorrt_llm_batch_manager_static.a"
)
else()
set(BATCH_MANAGER_LIB_LOC
"${CMAKE_CURRENT_SOURCE_DIR}/batch_manager/${BATCH_MANAGER_TARGET_ARCH}/libtensorrt_llm_batch_manager_static.pre_cxx11.a"
)
endif()
else() # Windows
set(BATCH_MANAGER_LIB_LOC
"${CMAKE_CURRENT_SOURCE_DIR}/batch_manager/${BATCH_MANAGER_TARGET_ARCH}/tensorrt_llm_batch_manager_static.lib"
)
endif()
set_property(TARGET ${BATCH_MANAGER_TARGET} PROPERTY IMPORTED_LOCATION
${BATCH_MANAGER_LIB_LOC})
file(SIZE ${BATCH_MANAGER_LIB_LOC} BATCH_MANAGER_LIB_SIZE)
if(BATCH_MANAGER_LIB_SIZE LESS 1024)
message(
FATAL_ERROR
"The batch manager library is truncated or incomplete. This is usually caused by using Git LFS (Large File Storage) incorrectly. Please try running command `git lfs install && git lfs pull`."
)
endif()
add_library(${UCX_WRAPPER_TARGET} SHARED IMPORTED)
if(NOT WIN32) # Linux
set(UCX_WRAPPER_LIB_SOURCE_REL_LOC
"batch_manager/${BATCH_MANAGER_TARGET_ARCH}/libtensorrt_llm_ucx_wrapper.so"
)
set(UCX_WRAPPER_LIB_BINARY_REL_LOC
"batch_manager/libtensorrt_llm_ucx_wrapper.so")
else()
set(UCX_WRAPPER_LIB_BINARY_REL_DIR "batch_manager/")
set(UCX_WRAPPER_DLL_NAME "tensorrt_llm_ucx_wrapper.dll")
set(UCX_WRAPPER_LIB_NAME "tensorrt_llm_ucx_wrapper.lib")
set(UCX_WRAPPER_LIB_SOURCE_REL_LOC
"${UCX_WRAPPER_LIB_BINARY_REL_DIR}/${BATCH_MANAGER_TARGET_ARCH}/${UCX_WRAPPER_DLL_NAME}"
)
set(UCX_WRAPPER_LIB_BINARY_REL_LOC
"${UCX_WRAPPER_LIB_BINARY_REL_DIR}/${UCX_WRAPPER_DLL_NAME}")
set(UCX_WRAPPER_IMPLIB_SOURCE_REL_LOC
"${UCX_WRAPPER_LIB_BINARY_REL_DIR}/${BATCH_MANAGER_TARGET_ARCH}/${UCX_WRAPPER_LIB_NAME}"
)
set(UCX_WRAPPER_IMPLIB_BINARY_REL_LOC
"${UCX_WRAPPER_LIB_BINARY_REL_DIR}/${UCX_WRAPPER_LIB_NAME}")
endif()
set(UCX_WRAPPER_LIB_LOC
"${CMAKE_CURRENT_SOURCE_DIR}/${UCX_WRAPPER_LIB_SOURCE_REL_LOC}")
# Copy the .so to build directory, which is needed in build_wheel.py.
configure_file(${UCX_WRAPPER_LIB_SOURCE_REL_LOC}
${UCX_WRAPPER_LIB_BINARY_REL_LOC} COPYONLY)
set_property(TARGET ${UCX_WRAPPER_TARGET} PROPERTY IMPORTED_LOCATION
${UCX_WRAPPER_LIB_LOC})
if(WIN32)
set(UCX_WRAPPER_IMPLIB_LOC
"${CMAKE_CURRENT_SOURCE_DIR}/${UCX_WRAPPER_IMPLIB_SOURCE_REL_LOC}")
configure_file(${UCX_WRAPPER_IMPLIB_SOURCE_REL_LOC}
${UCX_WRAPPER_IMPLIB_BINARY_REL_LOC} COPYONLY)
set_property(TARGET ${UCX_WRAPPER_TARGET}
PROPERTY IMPORTED_IMPLIB ${UCX_WRAPPER_IMPLIB_LOC})
endif()
file(SIZE ${UCX_WRAPPER_LIB_LOC} UCX_WRAPPER_LIB_SIZE)
if(UCX_WRAPPER_LIB_SIZE LESS 1024)
message(
FATAL_ERROR
"The ucx wrapper library is truncated or incomplete. This is usually caused by using Git LFS (Large File Storage) incorrectly. Please try running command `git lfs install && git lfs pull`."
)
endif()
endif()
set(EXECUTOR_TARGET tensorrt_llm_executor_static)
set(EXECUTOR_TARGET_ARCH ${TARGET_ARCH})
if(BUILD_EXECUTOR)
add_subdirectory(executor)
else()
add_library(${EXECUTOR_TARGET} STATIC IMPORTED)
if(NOT WIN32) # Linux
if(USE_CXX11_ABI)
set(EXECUTOR_LIB_LOC
"${CMAKE_CURRENT_SOURCE_DIR}/executor/${EXECUTOR_TARGET_ARCH}/libtensorrt_llm_executor_static.a"
)
else()
set(EXECUTOR_LIB_LOC
"${CMAKE_CURRENT_SOURCE_DIR}/executor/${EXECUTOR_TARGET_ARCH}/libtensorrt_llm_executor_static.pre_cxx11.a"
)
endif()
else() # Windows
set(EXECUTOR_LIB_LOC
"${CMAKE_CURRENT_SOURCE_DIR}/executor/${EXECUTOR_TARGET_ARCH}/tensorrt_llm_executor_static.lib"
)
endif()
set_property(TARGET ${EXECUTOR_TARGET} PROPERTY IMPORTED_LOCATION
${EXECUTOR_LIB_LOC})
file(SIZE ${EXECUTOR_LIB_LOC} EXECUTOR_LIB_SIZE)
if(EXECUTOR_LIB_SIZE LESS 1024)
message(
FATAL_ERROR
"The executor library is truncated or incomplete. This is usually caused by using Git LFS (Large File Storage) incorrectly. Please try running command `git lfs install && git lfs pull`."
)
endif()
endif()
set(INTERNAL_CUTLASS_KERNELS_TARGET
tensorrt_llm_internal_cutlass_kernels_static)
set(INTERNAL_CUTLASS_KERNELS_TARGET_ARCH ${TARGET_ARCH})
if(BUILD_INTERNAL_CUTLASS_KERNELS)
add_subdirectory(kernels/internal_cutlass_kernels)
else()
add_library(${INTERNAL_CUTLASS_KERNELS_TARGET} STATIC IMPORTED)
if(NOT WIN32) # Linux
if(USE_CXX11_ABI)
set(INTERNAL_CUTLASS_KERNELS_LIB_LOC
"${CMAKE_CURRENT_SOURCE_DIR}/kernels/internal_cutlass_kernels/${INTERNAL_CUTLASS_KERNELS_TARGET_ARCH}/libtensorrt_llm_internal_cutlass_kernels_static.a"
)
else()
set(INTERNAL_CUTLASS_KERNELS_LIB_LOC
"${CMAKE_CURRENT_SOURCE_DIR}/kernels/internal_cutlass_kernels/${INTERNAL_CUTLASS_KERNELS_TARGET_ARCH}/libtensorrt_llm_internal_cutlass_kernels_static.pre_cxx11.a"
)
endif()
else() # Windows
set(INTERNAL_CUTLASS_KERNELS_LIB_LOC
"${CMAKE_CURRENT_SOURCE_DIR}/kernels/internal_cutlass_kernels/${INTERNAL_CUTLASS_KERNELS_TARGET_ARCH}/tensorrt_llm_internal_cutlass_kernels_static.lib"
)
endif()
set_property(TARGET ${INTERNAL_CUTLASS_KERNELS_TARGET}
PROPERTY IMPORTED_LOCATION ${INTERNAL_CUTLASS_KERNELS_LIB_LOC})
file(SIZE ${INTERNAL_CUTLASS_KERNELS_LIB_LOC}
INTERNAL_CUTLASS_KERNELS_LIB_SIZE)
if(INTERNAL_CUTLASS_KERNELS_LIB_SIZE LESS 1024)
message(
FATAL_ERROR
"The internal_cutlass_kernels library is truncated or incomplete. This is usually caused by using Git LFS (Large File Storage) incorrectly. Please try running command `git lfs install && git lfs pull`."
)
endif()
endif()
find_package(Threads REQUIRED)
target_link_libraries(${BATCH_MANAGER_TARGET} INTERFACE Threads::Threads)
target_link_libraries(${EXECUTOR_TARGET} INTERFACE Threads::Threads)
if(NOT WIN32)
if(USE_CXX11_ABI)
add_custom_command(
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/.check_symbol"
COMMAND nm -C $<TARGET_FILE:${BATCH_MANAGER_TARGET}> | grep -q
'std::__cxx11::'
DEPENDS ${BATCH_MANAGER_TARGET})
else()
add_custom_command(
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/.check_symbol"
COMMAND nm -C $<TARGET_FILE:${BATCH_MANAGER_TARGET}> | grep -qv
'std::__cxx11::'
DEPENDS ${BATCH_MANAGER_TARGET})
endif()
add_custom_target(check_symbol
DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/.check_symbol")
else()
add_custom_target(check_symbol)
endif()
if(NOT WIN32)
if(USE_CXX11_ABI)
add_custom_command(
OUTPUT
"${CMAKE_CURRENT_BINARY_DIR}/.check_symbol_internal_cutlass_kernels"
COMMAND nm -C $<TARGET_FILE:${INTERNAL_CUTLASS_KERNELS_TARGET}> | grep -q
'std::__cxx11::'
DEPENDS ${INTERNAL_CUTLASS_KERNELS_TARGET})
else()
add_custom_command(
OUTPUT
"${CMAKE_CURRENT_BINARY_DIR}/.check_symbol_internal_cutlass_kernels"
COMMAND nm -C $<TARGET_FILE:${INTERNAL_CUTLASS_KERNELS_TARGET}> | grep -qv
'std::__cxx11::'
DEPENDS ${INTERNAL_CUTLASS_KERNELS_TARGET})
endif()
add_custom_target(
check_symbol_internal_cutlass_kernels
DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/.check_symbol_internal_cutlass_kernels"
)
else()
add_custom_target(check_symbol_internal_cutlass_kernels)
endif()
if(NOT WIN32)
if(USE_CXX11_ABI)
add_custom_command(
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/.check_symbol_executor"
COMMAND nm -C $<TARGET_FILE:${EXECUTOR_TARGET}> | grep -q 'std::__cxx11::'
DEPENDS ${EXECUTOR_TARGET})
else()
add_custom_command(
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/.check_symbol_executor"
COMMAND nm -C $<TARGET_FILE:${EXECUTOR_TARGET}> | grep -qv
'std::__cxx11::'
DEPENDS ${EXECUTOR_TARGET})
endif()
add_custom_target(
check_symbol_executor
DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/.check_symbol_executor")
else()
add_custom_target(check_symbol_executor)
endif()
set(NVRTC_WRAPPER_TARGET tensorrt_llm_nvrtc_wrapper)
set(NVRTC_WRAPPER_TARGET_ARCH ${TARGET_ARCH})
if(BUILD_NVRTC_WRAPPER)
add_subdirectory(
kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper)
else()
add_library(${NVRTC_WRAPPER_TARGET} SHARED IMPORTED)
if(NOT WIN32) # Linux
set(NVRTC_WRAPPER_LIB_SOURCE_REL_LOC
"kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper/${NVRTC_WRAPPER_TARGET_ARCH}/libtensorrt_llm_nvrtc_wrapper.so"
)
set(NVRTC_WRAPPER_LIB_BINARY_REL_LOC
"kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper/libtensorrt_llm_nvrtc_wrapper.so"
)
else()
set(NVRTC_WRAPPER_LIB_BINARY_REL_DIR
"kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper"
)
set(NVRTC_WRAPPER_DLL_NAME "tensorrt_llm_nvrtc_wrapper.dll")
set(NVRTC_WRAPPER_LIB_NAME "tensorrt_llm_nvrtc_wrapper.lib")
set(NVRTC_WRAPPER_LIB_SOURCE_REL_LOC
"${NVRTC_WRAPPER_LIB_BINARY_REL_DIR}/${NVRTC_WRAPPER_TARGET_ARCH}/${NVRTC_WRAPPER_DLL_NAME}"
)
set(NVRTC_WRAPPER_LIB_BINARY_REL_LOC
"${NVRTC_WRAPPER_LIB_BINARY_REL_DIR}/${NVRTC_WRAPPER_DLL_NAME}")
set(NVRTC_WRAPPER_IMPLIB_SOURCE_REL_LOC
"${NVRTC_WRAPPER_LIB_BINARY_REL_DIR}/${NVRTC_WRAPPER_TARGET_ARCH}/${NVRTC_WRAPPER_LIB_NAME}"
)
set(NVRTC_WRAPPER_IMPLIB_BINARY_REL_LOC
"${NVRTC_WRAPPER_LIB_BINARY_REL_DIR}/${NVRTC_WRAPPER_LIB_NAME}")
endif()
set(NVRTC_WRAPPER_LIB_LOC
"${CMAKE_CURRENT_SOURCE_DIR}/${NVRTC_WRAPPER_LIB_SOURCE_REL_LOC}")
# Copy the .so to build directory, which is needed in build_wheel.py.
configure_file(${NVRTC_WRAPPER_LIB_SOURCE_REL_LOC}
${NVRTC_WRAPPER_LIB_BINARY_REL_LOC} COPYONLY)
set_property(TARGET ${NVRTC_WRAPPER_TARGET} PROPERTY IMPORTED_LOCATION
${NVRTC_WRAPPER_LIB_LOC})
if(WIN32)
set(NVRTC_WRAPPER_IMPLIB_LOC
"${CMAKE_CURRENT_SOURCE_DIR}/${NVRTC_WRAPPER_IMPLIB_SOURCE_REL_LOC}")
configure_file(${NVRTC_WRAPPER_IMPLIB_SOURCE_REL_LOC}
${NVRTC_WRAPPER_IMPLIB_BINARY_REL_LOC} COPYONLY)
set_property(TARGET ${NVRTC_WRAPPER_TARGET}
PROPERTY IMPORTED_IMPLIB ${NVRTC_WRAPPER_IMPLIB_LOC})
endif()
file(SIZE ${NVRTC_WRAPPER_LIB_LOC} NVRTC_WRAPPER_LIB_SIZE)
if(NVRTC_WRAPPER_LIB_SIZE LESS 1024)
message(
FATAL_ERROR
"The nvrtc wrapper library is truncated or incomplete. This is usually caused by using Git LFS (Large File Storage) incorrectly. Please try running command `git lfs install && git lfs pull`."
)
endif()
endif()
set(TRTLLM_LINK_LIBS
${CUBLAS_LIB}
${CUBLASLT_LIB}
${CMAKE_DL_LIBS}
${TRT_LIB}
common_src
kernels_src
context_attention_src
decoder_attention_src
selective_scan_src
fpA_intB_gemm_src
moe_gemm_src
fb_gemm_src
gemm_swiglu_sm90_src
cutlass_src
layers_src
runtime_src
userbuffers_src
${DECODER_SHARED_TARGET})
if(ENABLE_MULTI_DEVICE)
set(TRTLLM_LINK_LIBS ${TRTLLM_LINK_LIBS} ${MPI_C_LIBRARIES} ${NCCL_LIB})
endif()
if(NOT WIN32) # Unix-like compilers
set(UNDEFINED_FLAG "-Wl,--no-undefined")
set(AS_NEEDED_FLAG "-Wl,--as-needed")
set(NO_AS_NEEDED_FLAG "-Wl,--no-as-needed")
else() # Windows
set(UNDEFINED_FLAG "")
set(AS_NEEDED_FLAG "")
set(NO_AS_NEEDED_FLAG "")
endif()
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
add_library(${SHARED_TARGET} SHARED)
set_target_properties(
${SHARED_TARGET}
PROPERTIES CXX_STANDARD "17" CXX_STANDARD_REQUIRED "YES" CXX_EXTENSIONS "NO"
LINK_FLAGS "${AS_NEEDED_FLAG} ${UNDEFINED_FLAG}")
function(link_whole_archive TARGET LIBRARY_TO_LINK)
if(WIN32)
target_link_libraries(${TARGET} PUBLIC $<TARGET_FILE:${LIBRARY_TO_LINK}>)
set_target_properties(
${TARGET} PROPERTIES LINK_FLAGS "/WHOLEARCHIVE:${LIBRARY_TO_LINK}")
else()
# Assume everything else is like gcc
target_link_libraries(
${TARGET} PRIVATE "-Wl,--whole-archive" $<TARGET_FILE:${LIBRARY_TO_LINK}>
"-Wl,--no-whole-archive")
endif()
endfunction()
target_link_libraries(${SHARED_TARGET} PUBLIC ${TRTLLM_LINK_LIBS})
link_whole_archive(${SHARED_TARGET} ${BATCH_MANAGER_TARGET})
link_whole_archive(${SHARED_TARGET} ${EXECUTOR_TARGET})
link_whole_archive(${SHARED_TARGET} ${INTERNAL_CUTLASS_KERNELS_TARGET})
# Cyclic dependency of batch manager on TRT-LLM
target_link_libraries(${BATCH_MANAGER_TARGET} INTERFACE ${SHARED_TARGET})
# Cyclic dependency of executor on TRT-LLM
target_link_libraries(${EXECUTOR_TARGET} INTERFACE ${SHARED_TARGET})
# Cyclic dependency of internal_cutlass_kernels on TRT-LLM
target_link_libraries(${INTERNAL_CUTLASS_KERNELS_TARGET}
INTERFACE ${SHARED_TARGET})
# Cyclic dependency of UCX data transceiver on TRT-LLM
target_link_libraries(${UCX_WRAPPER_TARGET} INTERFACE ${SHARED_TARGET})
add_dependencies(${SHARED_TARGET} ${UCX_WRAPPER_TARGET})
if(NOT WIN32)
set_target_properties(${SHARED_TARGET} PROPERTIES LINK_FLAGS
"-Wl,-rpath='$ORIGIN'")
endif()
target_link_libraries(${SHARED_TARGET} PUBLIC ${NVRTC_WRAPPER_TARGET})
add_dependencies(${SHARED_TARGET} check_symbol)
add_dependencies(${SHARED_TARGET} check_symbol_executor)
add_dependencies(${SHARED_TARGET} check_symbol_internal_cutlass_kernels)
if(BUILD_PYT)
add_subdirectory(thop)
endif()
if(BUILD_PYBIND)
add_subdirectory(pybind)
endif()
add_subdirectory(plugins)