chore: Stabilize ABI boundary for internal kernel library (#3117)

chore: Stabilize ABI boundary for internal kernel library

Signed-off-by: Yuan Tong <13075180+tongyuantongyu@users.noreply.github.com>
This commit is contained in:
Yuan Tong 2025-04-11 15:07:50 +08:00 committed by GitHub
parent 410f56357e
commit a139eae425
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
36 changed files with 217 additions and 407 deletions

1
.gitattributes vendored
View File

@ -2,3 +2,4 @@
*.lib filter=lfs diff=lfs merge=lfs -text
*.so filter=lfs diff=lfs merge=lfs -text
*.dll filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text

View File

@ -50,36 +50,6 @@ else()
message(STATUS "NVTX is enabled")
endif()
if(EXISTS
"${CMAKE_CURRENT_SOURCE_DIR}/tensorrt_llm/batch_manager/CMakeLists.txt")
set(BUILD_BATCH_MANAGER_DEFAULT ON)
else()
set(BUILD_BATCH_MANAGER_DEFAULT OFF)
endif()
option(BUILD_BATCH_MANAGER "Build batch manager from source"
${BUILD_BATCH_MANAGER_DEFAULT})
if(BUILD_BATCH_MANAGER)
message(STATUS "Building batch manager")
else()
message(STATUS "Importing batch manager")
endif()
if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/tensorrt_llm/executor/CMakeLists.txt")
set(BUILD_EXECUTOR_DEFAULT ON)
else()
set(BUILD_EXECUTOR_DEFAULT OFF)
endif()
option(BUILD_EXECUTOR "Build executor from source" ${BUILD_EXECUTOR_DEFAULT})
if(BUILD_EXECUTOR)
message(STATUS "Building executor")
else()
message(STATUS "Importing executor")
endif()
if(EXISTS
"${CMAKE_CURRENT_SOURCE_DIR}/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper/CMakeLists.txt"
)

View File

@ -25,7 +25,7 @@ namespace tensorrt_llm::common
{
[[noreturn]] inline void throwRuntimeError(char const* const file, int const line, std::string const& info = "")
{
throw TllmException(file, line, fmtstr("[TensorRT-LLM][ERROR] Assertion failed: %s", info.c_str()));
throw TllmException(file, line, fmtstr("[TensorRT-LLM][ERROR] Assertion failed: %s", info.c_str()).c_str());
}
} // namespace tensorrt_llm::common

View File

@ -125,8 +125,8 @@ void check(T ptr, char const* const func, char const* const file, int const line
{
if (ptr)
{
throw TllmException(
file, line, fmtstr("[TensorRT-LLM][ERROR] CUDA runtime error in %s: %s", func, _cudaGetErrorEnum(ptr)));
throw TllmException(file, line,
fmtstr("[TensorRT-LLM][ERROR] CUDA runtime error in %s: %s", func, _cudaGetErrorEnum(ptr)).c_str());
}
}
@ -136,8 +136,8 @@ void checkEx(
{
if (std::all_of(std::begin(validReturns), std::end(validReturns), [&ptr](T const& t) { return t != ptr; }))
{
throw TllmException(
file, line, fmtstr("[TensorRT-LLM][ERROR] CUDA runtime error in %s: %s", func, _cudaGetErrorEnum(ptr)));
throw TllmException(file, line,
fmtstr("[TensorRT-LLM][ERROR] CUDA runtime error in %s: %s", func, _cudaGetErrorEnum(ptr)).c_str());
}
}

View File

@ -21,6 +21,7 @@
#endif // ENABLE_BF16
#include <cuda_fp16.h>
#include <cstdarg>
#include <memory> // std::make_unique
#include <sstream> // std::stringstream
#include <string>
@ -101,12 +102,40 @@ inline std::string fmtstr(std::string&& s)
return s;
}
typedef char* (*fmtstr_allocator)(void* target, size_t count);
void fmtstr_(char const* format, fmtstr_allocator alloc, void* target, va_list args);
#if defined(_MSC_VER)
std::string fmtstr(char const* format, ...);
inline std::string fmtstr(char const* format, ...);
#else
std::string fmtstr(char const* format, ...) __attribute__((format(printf, 1, 2)));
inline std::string fmtstr(char const* format, ...) __attribute__((format(printf, 1, 2)));
#endif
inline std::string fmtstr(char const* format, ...)
{
std::string result;
va_list args;
va_start(args, format);
fmtstr_(
format,
[](void* target, size_t count) -> char*
{
if (count <= 0)
{
return nullptr;
}
const auto str = static_cast<std::string*>(target);
str->resize(count);
return str->data();
},
&result, args);
va_end(args);
return result;
}
// __PRETTY_FUNCTION__ is used for neat debugging printing but is not supported on Windows
// The alternative is __FUNCSIG__, which is similar but not identical
#if defined(_WIN32)

View File

@ -22,7 +22,7 @@
#include <string>
#define NEW_TLLM_EXCEPTION(...) \
tensorrt_llm::common::TllmException(__FILE__, __LINE__, tensorrt_llm::common::fmtstr(__VA_ARGS__))
tensorrt_llm::common::TllmException(__FILE__, __LINE__, tensorrt_llm::common::fmtstr(__VA_ARGS__).c_str())
namespace tensorrt_llm::common
{
@ -32,7 +32,7 @@ class TllmException : public std::runtime_error
public:
static auto constexpr MAX_FRAMES = 128;
explicit TllmException(char const* file, std::size_t line, std::string const& msg);
explicit TllmException(char const* file, std::size_t line, char const* msg);
~TllmException() noexcept override;

View File

@ -24,10 +24,13 @@ namespace tensorrt_llm::runtime
{
struct IpcNvlsHandle
{
// Begin internal kernel visible fields
// Changes to these fields must sync with ipcNvlsMemory.h in internal kernel repo
size_t size = 0;
// Device pointers used by kernels
uintptr_t uc_ptr = 0;
uintptr_t mc_ptr = 0;
// End internal kernel visible fields
std::vector<uintptr_t> ipc_uc_ptrs;
// Device pointers
CUdeviceptr uc_va;
@ -43,9 +46,9 @@ void MPI_group_barrier(std::set<int> ranks);
bool ipcNvlsSupported();
IpcNvlsHandle ipcNvlsAllocate(size_t size, std::set<int> ranks);
IpcNvlsHandle* ipcNvlsAllocate(size_t size, std::set<int> ranks);
void ipcNvlsFree(IpcNvlsHandle handle);
void ipcNvlsFree(IpcNvlsHandle* handle);
template <typename T>
class DeviceAllocationNvls
@ -68,19 +71,19 @@ public:
// Return device pointer to multicast memory
[[nodiscard]] T* getMulticastPointer() const
{
return reinterpret_cast<T*>(_handle.mc_ptr);
return reinterpret_cast<T*>(_handle->mc_ptr);
}
// Return device pointer for current rank
[[nodiscard]] T* getUnicastPointer() const
{
return reinterpret_cast<T*>(_handle.uc_ptr);
return reinterpret_cast<T*>(_handle->uc_ptr);
}
// Return host list of device pointers to memory on each rank
[[nodiscard]] T** getIpcUnicastPointers()
{
return reinterpret_cast<T**>(_handle.ipc_uc_ptrs.data());
return reinterpret_cast<T**>(_handle->ipc_uc_ptrs.data());
}
[[nodiscard]] size_t getCapacity() const
@ -99,6 +102,6 @@ public:
private:
size_t _capacity = 0;
IpcNvlsHandle _handle;
IpcNvlsHandle* _handle;
};
} // namespace tensorrt_llm::runtime

View File

@ -85,121 +85,12 @@ endif()
set(BATCH_MANAGER_TARGET tensorrt_llm_batch_manager_static)
set(BATCH_MANAGER_TARGET_ARCH ${TARGET_ARCH})
if(BUILD_BATCH_MANAGER)
add_subdirectory(batch_manager)
else()
add_library(${BATCH_MANAGER_TARGET} STATIC IMPORTED)
if(NOT WIN32) # Linux
if(USE_CXX11_ABI)
set(BATCH_MANAGER_LIB_LOC
"${CMAKE_CURRENT_SOURCE_DIR}/batch_manager/${BATCH_MANAGER_TARGET_ARCH}/libtensorrt_llm_batch_manager_static.a"
)
else()
set(BATCH_MANAGER_LIB_LOC
"${CMAKE_CURRENT_SOURCE_DIR}/batch_manager/${BATCH_MANAGER_TARGET_ARCH}/libtensorrt_llm_batch_manager_static.pre_cxx11.a"
)
endif()
else() # Windows
set(BATCH_MANAGER_LIB_LOC
"${CMAKE_CURRENT_SOURCE_DIR}/batch_manager/${BATCH_MANAGER_TARGET_ARCH}/tensorrt_llm_batch_manager_static.lib"
)
endif()
set_property(TARGET ${BATCH_MANAGER_TARGET} PROPERTY IMPORTED_LOCATION
${BATCH_MANAGER_LIB_LOC})
file(SIZE ${BATCH_MANAGER_LIB_LOC} BATCH_MANAGER_LIB_SIZE)
if(BATCH_MANAGER_LIB_SIZE LESS 1024)
message(
FATAL_ERROR
"The batch manager library is truncated or incomplete. This is usually caused by using Git LFS (Large File Storage) incorrectly. Please try running command `git lfs install && git lfs pull`."
)
endif()
endif()
add_subdirectory(batch_manager)
set(EXECUTOR_TARGET tensorrt_llm_executor_static)
set(EXECUTOR_TARGET_ARCH ${TARGET_ARCH})
set(UCX_WRAPPER_TARGET tensorrt_llm_ucx_wrapper)
if(BUILD_EXECUTOR)
add_subdirectory(executor)
else()
add_library(${EXECUTOR_TARGET} STATIC IMPORTED)
if(NOT WIN32) # Linux
if(USE_CXX11_ABI)
set(EXECUTOR_LIB_LOC
"${CMAKE_CURRENT_SOURCE_DIR}/executor/${EXECUTOR_TARGET_ARCH}/libtensorrt_llm_executor_static.a"
)
else()
set(EXECUTOR_LIB_LOC
"${CMAKE_CURRENT_SOURCE_DIR}/executor/${EXECUTOR_TARGET_ARCH}/libtensorrt_llm_executor_static.pre_cxx11.a"
)
endif()
else() # Windows
set(EXECUTOR_LIB_LOC
"${CMAKE_CURRENT_SOURCE_DIR}/executor/${EXECUTOR_TARGET_ARCH}/tensorrt_llm_executor_static.lib"
)
endif()
set_property(TARGET ${EXECUTOR_TARGET} PROPERTY IMPORTED_LOCATION
${EXECUTOR_LIB_LOC})
file(SIZE ${EXECUTOR_LIB_LOC} EXECUTOR_LIB_SIZE)
if(EXECUTOR_LIB_SIZE LESS 1024)
message(
FATAL_ERROR
"The executor library is truncated or incomplete. This is usually caused by using Git LFS (Large File Storage) incorrectly. Please try running command `git lfs install && git lfs pull`."
)
endif()
if(ENABLE_UCX)
add_library(${UCX_WRAPPER_TARGET} SHARED IMPORTED)
if(NOT WIN32) # Linux
set(UCX_WRAPPER_LIB_SOURCE_REL_LOC
"executor/cache_transmission/ucx_utils/${EXECUTOR_TARGET_ARCH}/libtensorrt_llm_ucx_wrapper.so"
)
set(UCX_WRAPPER_LIB_BINARY_REL_LOC
"executor/cache_transmission/ucx_utils/libtensorrt_llm_ucx_wrapper.so"
)
else()
set(UCX_WRAPPER_LIB_BINARY_REL_DIR
"executor/cache_transmission/ucx_utils/")
set(UCX_WRAPPER_DLL_NAME "tensorrt_llm_ucx_wrapper.dll")
set(UCX_WRAPPER_LIB_NAME "tensorrt_llm_ucx_wrapper.lib")
set(UCX_WRAPPER_LIB_SOURCE_REL_LOC
"${UCX_WRAPPER_LIB_BINARY_REL_DIR}/${EXECUTOR_TARGET_ARCH}/${UCX_WRAPPER_DLL_NAME}"
)
set(UCX_WRAPPER_LIB_BINARY_REL_LOC
"${UCX_WRAPPER_LIB_BINARY_REL_DIR}/${UCX_WRAPPER_DLL_NAME}")
set(UCX_WRAPPER_IMPLIB_SOURCE_REL_LOC
"${UCX_WRAPPER_LIB_BINARY_REL_DIR}/${EXECUTOR_TARGET_ARCH}/${UCX_WRAPPER_LIB_NAME}"
)
set(UCX_WRAPPER_IMPLIB_BINARY_REL_LOC
"${UCX_WRAPPER_LIB_BINARY_REL_DIR}/${UCX_WRAPPER_LIB_NAME}")
endif()
set(UCX_WRAPPER_LIB_LOC
"${CMAKE_CURRENT_SOURCE_DIR}/${UCX_WRAPPER_LIB_SOURCE_REL_LOC}")
# Copy the .so to build directory, which is needed in build_wheel.py.
configure_file(${UCX_WRAPPER_LIB_SOURCE_REL_LOC}
${UCX_WRAPPER_LIB_BINARY_REL_LOC} COPYONLY)
set_property(TARGET ${UCX_WRAPPER_TARGET} PROPERTY IMPORTED_LOCATION
${UCX_WRAPPER_LIB_LOC})
if(WIN32)
set(UCX_WRAPPER_IMPLIB_LOC
"${CMAKE_CURRENT_SOURCE_DIR}/${UCX_WRAPPER_IMPLIB_SOURCE_REL_LOC}")
configure_file(${UCX_WRAPPER_IMPLIB_SOURCE_REL_LOC}
${UCX_WRAPPER_IMPLIB_BINARY_REL_LOC} COPYONLY)
set_property(TARGET ${UCX_WRAPPER_TARGET}
PROPERTY IMPORTED_IMPLIB ${UCX_WRAPPER_IMPLIB_LOC})
endif()
file(SIZE ${UCX_WRAPPER_LIB_LOC} UCX_WRAPPER_LIB_SIZE)
if(UCX_WRAPPER_LIB_SIZE LESS 1024)
message(
FATAL_ERROR
"The ucx wrapper library is truncated or incomplete. This is usually caused by using Git LFS (Large File Storage) incorrectly. Please try running command `git lfs install && git lfs pull`."
)
endif()
endif()
endif()
add_subdirectory(executor)
set(INTERNAL_CUTLASS_KERNELS_TARGET
tensorrt_llm_internal_cutlass_kernels_static)
@ -208,24 +99,33 @@ if(BUILD_INTERNAL_CUTLASS_KERNELS)
add_subdirectory(kernels/internal_cutlass_kernels)
else()
add_library(${INTERNAL_CUTLASS_KERNELS_TARGET} STATIC IMPORTED)
set(INTERNAL_CUTLASS_KERNELS_LIB_TARBALL
"${CMAKE_CURRENT_SOURCE_DIR}/kernels/internal_cutlass_kernels/${INTERNAL_CUTLASS_KERNELS_TARGET_ARCH}/${INTERNAL_CUTLASS_KERNELS_TARGET}.tar.xz"
)
if(NOT WIN32) # Linux
if(USE_CXX11_ABI)
set(INTERNAL_CUTLASS_KERNELS_LIB_LOC
"${CMAKE_CURRENT_SOURCE_DIR}/kernels/internal_cutlass_kernels/${INTERNAL_CUTLASS_KERNELS_TARGET_ARCH}/libtensorrt_llm_internal_cutlass_kernels_static.a"
)
else()
set(INTERNAL_CUTLASS_KERNELS_LIB_LOC
"${CMAKE_CURRENT_SOURCE_DIR}/kernels/internal_cutlass_kernels/${INTERNAL_CUTLASS_KERNELS_TARGET_ARCH}/libtensorrt_llm_internal_cutlass_kernels_static.pre_cxx11.a"
)
endif()
set(INTERNAL_CUTLASS_KERNELS_LIB_NAME
"lib${INTERNAL_CUTLASS_KERNELS_TARGET}.a")
else() # Windows
set(INTERNAL_CUTLASS_KERNELS_LIB_LOC
"${CMAKE_CURRENT_SOURCE_DIR}/kernels/internal_cutlass_kernels/${INTERNAL_CUTLASS_KERNELS_TARGET_ARCH}/tensorrt_llm_internal_cutlass_kernels_static.lib"
)
set(INTERNAL_CUTLASS_KERNELS_LIB_NAME
"${INTERNAL_CUTLASS_KERNELS_TARGET}.lib")
endif()
set(INTERNAL_CUTLASS_KERNELS_LIB_PATH
"${CMAKE_CURRENT_BINARY_DIR}/${INTERNAL_CUTLASS_KERNELS_LIB_NAME}")
add_custom_command(
OUTPUT ${INTERNAL_CUTLASS_KERNELS_LIB_PATH}
COMMAND ${CMAKE_COMMAND} -E tar xf ${INTERNAL_CUTLASS_KERNELS_LIB_TARBALL}
DEPENDS ${INTERNAL_CUTLASS_KERNELS_LIB_TARBALL}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
VERBATIM)
add_custom_target(${INTERNAL_CUTLASS_KERNELS_TARGET}_helper
DEPENDS ${INTERNAL_CUTLASS_KERNELS_LIB_PATH})
add_dependencies(${INTERNAL_CUTLASS_KERNELS_TARGET}
${INTERNAL_CUTLASS_KERNELS_TARGET}_helper)
set_property(TARGET ${INTERNAL_CUTLASS_KERNELS_TARGET}
PROPERTY IMPORTED_LOCATION ${INTERNAL_CUTLASS_KERNELS_LIB_LOC})
file(SIZE ${INTERNAL_CUTLASS_KERNELS_LIB_LOC}
PROPERTY IMPORTED_LOCATION ${INTERNAL_CUTLASS_KERNELS_LIB_PATH})
target_link_libraries(${INTERNAL_CUTLASS_KERNELS_TARGET}
INTERFACE ${INTERNAL_CUTLASS_KERNELS_LIB_PATH})
file(SIZE ${INTERNAL_CUTLASS_KERNELS_LIB_TARBALL}
INTERNAL_CUTLASS_KERNELS_LIB_SIZE)
if(INTERNAL_CUTLASS_KERNELS_LIB_SIZE LESS 1024)
message(
@ -239,70 +139,6 @@ find_package(Threads REQUIRED)
target_link_libraries(${BATCH_MANAGER_TARGET} INTERFACE Threads::Threads)
target_link_libraries(${EXECUTOR_TARGET} INTERFACE Threads::Threads)
if(NOT WIN32)
if(USE_CXX11_ABI)
add_custom_command(
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/.check_symbol"
COMMAND nm -C $<TARGET_FILE:${BATCH_MANAGER_TARGET}> | grep -q
'std::__cxx11::'
DEPENDS ${BATCH_MANAGER_TARGET})
else()
add_custom_command(
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/.check_symbol"
COMMAND nm -C $<TARGET_FILE:${BATCH_MANAGER_TARGET}> | grep -qv
'std::__cxx11::'
DEPENDS ${BATCH_MANAGER_TARGET})
endif()
add_custom_target(check_symbol
DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/.check_symbol")
else()
add_custom_target(check_symbol)
endif()
if(NOT WIN32)
if(USE_CXX11_ABI)
add_custom_command(
OUTPUT
"${CMAKE_CURRENT_BINARY_DIR}/.check_symbol_internal_cutlass_kernels"
COMMAND nm -C $<TARGET_FILE:${INTERNAL_CUTLASS_KERNELS_TARGET}> | grep -q
'std::__cxx11::'
DEPENDS ${INTERNAL_CUTLASS_KERNELS_TARGET})
else()
add_custom_command(
OUTPUT
"${CMAKE_CURRENT_BINARY_DIR}/.check_symbol_internal_cutlass_kernels"
COMMAND nm -C $<TARGET_FILE:${INTERNAL_CUTLASS_KERNELS_TARGET}> | grep -qv
'std::__cxx11::'
DEPENDS ${INTERNAL_CUTLASS_KERNELS_TARGET})
endif()
add_custom_target(
check_symbol_internal_cutlass_kernels
DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/.check_symbol_internal_cutlass_kernels"
)
else()
add_custom_target(check_symbol_internal_cutlass_kernels)
endif()
if(NOT WIN32)
if(USE_CXX11_ABI)
add_custom_command(
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/.check_symbol_executor"
COMMAND nm -C $<TARGET_FILE:${EXECUTOR_TARGET}> | grep -q 'std::__cxx11::'
DEPENDS ${EXECUTOR_TARGET})
else()
add_custom_command(
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/.check_symbol_executor"
COMMAND nm -C $<TARGET_FILE:${EXECUTOR_TARGET}> | grep -qv
'std::__cxx11::'
DEPENDS ${EXECUTOR_TARGET})
endif()
add_custom_target(
check_symbol_executor
DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/.check_symbol_executor")
else()
add_custom_target(check_symbol_executor)
endif()
set(NVRTC_WRAPPER_TARGET tensorrt_llm_nvrtc_wrapper)
set(NVRTC_WRAPPER_TARGET_ARCH ${TARGET_ARCH})
@ -311,48 +147,40 @@ if(BUILD_NVRTC_WRAPPER)
kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper)
else()
add_library(${NVRTC_WRAPPER_TARGET} SHARED IMPORTED)
set(NVRTC_WRAPPER_LIB_TARBALL
"${CMAKE_CURRENT_SOURCE_DIR}/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper/${NVRTC_WRAPPER_TARGET_ARCH}/${NVRTC_WRAPPER_TARGET}.tar.xz"
)
set(NVRTC_WRAPPER_LIB_BINARY_DIR
"${CMAKE_CURRENT_BINARY_DIR}/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper"
)
if(NOT WIN32) # Linux
set(NVRTC_WRAPPER_LIB_SOURCE_REL_LOC
"kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper/${NVRTC_WRAPPER_TARGET_ARCH}/libtensorrt_llm_nvrtc_wrapper.so"
)
set(NVRTC_WRAPPER_LIB_BINARY_REL_LOC
"kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper/libtensorrt_llm_nvrtc_wrapper.so"
)
else()
set(NVRTC_WRAPPER_LIB_BINARY_REL_DIR
"kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper"
)
set(NVRTC_WRAPPER_DLL_NAME "tensorrt_llm_nvrtc_wrapper.dll")
set(NVRTC_WRAPPER_LIB_NAME "tensorrt_llm_nvrtc_wrapper.lib")
set(NVRTC_WRAPPER_LIB_SOURCE_REL_LOC
"${NVRTC_WRAPPER_LIB_BINARY_REL_DIR}/${NVRTC_WRAPPER_TARGET_ARCH}/${NVRTC_WRAPPER_DLL_NAME}"
)
set(NVRTC_WRAPPER_LIB_BINARY_REL_LOC
"${NVRTC_WRAPPER_LIB_BINARY_REL_DIR}/${NVRTC_WRAPPER_DLL_NAME}")
set(NVRTC_WRAPPER_IMPLIB_SOURCE_REL_LOC
"${NVRTC_WRAPPER_LIB_BINARY_REL_DIR}/${NVRTC_WRAPPER_TARGET_ARCH}/${NVRTC_WRAPPER_LIB_NAME}"
)
set(NVRTC_WRAPPER_IMPLIB_BINARY_REL_LOC
"${NVRTC_WRAPPER_LIB_BINARY_REL_DIR}/${NVRTC_WRAPPER_LIB_NAME}")
set(NVRTC_WRAPPER_LIB_NAME "lib${NVRTC_WRAPPER_TARGET}.so")
else() # Windows
set(NVRTC_WRAPPER_LIB_NAME "${NVRTC_WRAPPER_TARGET}.lib")
set(NVRTC_WRAPPER_DLL_NAME "${NVRTC_WRAPPER_TARGET}.dll")
set(NVRTC_WRAPPER_DLL_PATH
"${NVRTC_WRAPPER_LIB_BINARY_DIR}/${NVRTC_WRAPPER_DLL_NAME}")
endif()
set(NVRTC_WRAPPER_LIB_LOC
"${CMAKE_CURRENT_SOURCE_DIR}/${NVRTC_WRAPPER_LIB_SOURCE_REL_LOC}")
# Copy the .so to build directory, which is needed in build_wheel.py.
configure_file(${NVRTC_WRAPPER_LIB_SOURCE_REL_LOC}
${NVRTC_WRAPPER_LIB_BINARY_REL_LOC} COPYONLY)
set_property(TARGET ${NVRTC_WRAPPER_TARGET} PROPERTY IMPORTED_LOCATION
${NVRTC_WRAPPER_LIB_LOC})
set(NVRTC_WRAPPER_LIB_PATH
"${NVRTC_WRAPPER_LIB_BINARY_DIR}/${NVRTC_WRAPPER_LIB_NAME}")
add_custom_command(
OUTPUT ${NVRTC_WRAPPER_LIB_PATH} ${NVRTC_WRAPPER_DLL_PATH}
COMMAND ${CMAKE_COMMAND} -E make_directory ${NVRTC_WRAPPER_LIB_BINARY_DIR}
COMMAND ${CMAKE_COMMAND} -E chdir ${NVRTC_WRAPPER_LIB_BINARY_DIR}
${CMAKE_COMMAND} -E tar xf ${NVRTC_WRAPPER_LIB_TARBALL}
DEPENDS ${NVRTC_WRAPPER_LIB_TARBALL}
VERBATIM)
add_custom_target(${NVRTC_WRAPPER_TARGET}_helper
DEPENDS ${NVRTC_WRAPPER_LIB_PATH} ${NVRTC_WRAPPER_DLL_PATH})
add_dependencies(${NVRTC_WRAPPER_TARGET} ${NVRTC_WRAPPER_TARGET}_helper)
set_property(TARGET ${NVRTC_WRAPPER_TARGET}
PROPERTY IMPORTED_LOCATION ${NVRTC_WRAPPER_LIB_PATH})
if(WIN32)
set(NVRTC_WRAPPER_IMPLIB_LOC
"${CMAKE_CURRENT_SOURCE_DIR}/${NVRTC_WRAPPER_IMPLIB_SOURCE_REL_LOC}")
configure_file(${NVRTC_WRAPPER_IMPLIB_SOURCE_REL_LOC}
${NVRTC_WRAPPER_IMPLIB_BINARY_REL_LOC} COPYONLY)
set_property(TARGET ${NVRTC_WRAPPER_TARGET}
PROPERTY IMPORTED_IMPLIB ${NVRTC_WRAPPER_IMPLIB_LOC})
PROPERTY IMPORTED_IMPLIB ${NVRTC_WRAPPER_DLL_PATH})
endif()
file(SIZE ${NVRTC_WRAPPER_LIB_LOC} NVRTC_WRAPPER_LIB_SIZE)
file(SIZE ${INTERNAL_CUTLASS_KERNELS_LIB_TARBALL} NVRTC_WRAPPER_LIB_SIZE)
if(NVRTC_WRAPPER_LIB_SIZE LESS 1024)
message(
FATAL_ERROR
@ -414,25 +242,14 @@ set_target_properties(
PROPERTIES CXX_STANDARD "17" CXX_STANDARD_REQUIRED "YES" CXX_EXTENSIONS "NO"
LINK_FLAGS "${AS_NEEDED_FLAG} ${UNDEFINED_FLAG}")
function(link_whole_archive TARGET LIBRARY_TO_LINK)
if(WIN32)
target_link_libraries(${TARGET} PUBLIC $<TARGET_FILE:${LIBRARY_TO_LINK}>)
set_target_properties(
${TARGET} PROPERTIES LINK_FLAGS "/WHOLEARCHIVE:${LIBRARY_TO_LINK}")
else()
# Assume everything else is like gcc
target_link_libraries(
${TARGET} PRIVATE "-Wl,--whole-archive" $<TARGET_FILE:${LIBRARY_TO_LINK}>
"-Wl,--no-whole-archive")
endif()
endfunction()
target_link_libraries(${SHARED_TARGET} PUBLIC ${TRTLLM_LINK_LIBS})
link_whole_archive(${SHARED_TARGET} ${BATCH_MANAGER_TARGET})
link_whole_archive(${SHARED_TARGET} ${EXECUTOR_TARGET})
link_whole_archive(${SHARED_TARGET} fp8_blockscale_gemm_src)
link_whole_archive(${SHARED_TARGET} ${INTERNAL_CUTLASS_KERNELS_TARGET})
target_link_libraries(
${SHARED_TARGET}
PRIVATE $<LINK_LIBRARY:WHOLE_ARCHIVE,${BATCH_MANAGER_TARGET}>
$<LINK_LIBRARY:WHOLE_ARCHIVE,${EXECUTOR_TARGET}>
$<LINK_LIBRARY:WHOLE_ARCHIVE,fp8_blockscale_gemm_src>
$<LINK_LIBRARY:WHOLE_ARCHIVE,${INTERNAL_CUTLASS_KERNELS_TARGET}>)
# Link kernel_src and cutlass_src. static internal cutlass lib overridden.
target_link_libraries(${SHARED_TARGET} PUBLIC kernels_src cutlass_src)
@ -458,10 +275,6 @@ endif()
target_link_libraries(${SHARED_TARGET} PUBLIC ${NVRTC_WRAPPER_TARGET})
add_dependencies(${SHARED_TARGET} check_symbol)
add_dependencies(${SHARED_TARGET} check_symbol_executor)
add_dependencies(${SHARED_TARGET} check_symbol_internal_cutlass_kernels)
if(BUILD_PYT)
add_subdirectory(thop)
endif()

View File

@ -126,8 +126,8 @@ void checkDriver(
char const* errorString = nullptr;
wrap.cuGetErrorName(result, &errorName);
wrap.cuGetErrorString(result, &errorString);
throw TllmException(
file, line, fmtstr("[TensorRT-LLM][ERROR] CUDA driver error in %s: %s: %s.", func, errorName, errorString));
throw TllmException(file, line,
fmtstr("[TensorRT-LLM][ERROR] CUDA driver error in %s: %s: %s.", func, errorName, errorString).c_str());
}
}

View File

@ -26,35 +26,33 @@
namespace tensorrt_llm::common
{
namespace
{
std::string vformat(char const* fmt, va_list args)
void fmtstr_(char const* format, fmtstr_allocator alloc, void* target, va_list args)
{
va_list args0;
va_copy(args0, args);
auto const size = vsnprintf(nullptr, 0, fmt, args0);
if (size <= 0)
return "";
std::string stringBuf(size, char{});
auto const size2 = std::vsnprintf(&stringBuf[0], size + 1, fmt, args);
size_t constexpr init_size = 2048;
char fixed_buffer[init_size];
auto const size = std::vsnprintf(fixed_buffer, init_size, format, args0);
TLLM_CHECK_WITH_INFO(size >= 0, std::string(std::strerror(errno)));
if (size == 0)
{
return;
}
TLLM_CHECK_WITH_INFO(size2 == size, std::string(std::strerror(errno)));
auto* memory = alloc(target, size);
return stringBuf;
if (static_cast<size_t>(size) < init_size)
{
std::memcpy(memory, fixed_buffer, size + 1);
}
else
{
auto const size2 = std::vsnprintf(memory, size + 1, format, args);
TLLM_CHECK_WITH_INFO(size2 == size, std::string(std::strerror(errno)));
}
}
} // namespace
std::string fmtstr(char const* format, ...)
{
va_list args;
va_start(args, format);
std::string result = vformat(format, args);
va_end(args);
return result;
};
std::unordered_set<std::string> str2set(std::string const& input, char delimiter)
{
std::unordered_set<std::string> values;

View File

@ -35,18 +35,17 @@ int constexpr VOID_PTR_SZ = 2 + sizeof(void*) * 2;
#if !defined(_MSC_VER)
TllmException::TllmException(char const* file, std::size_t line, std::string const& msg)
TllmException::TllmException(char const* file, std::size_t line, char const* msg)
: std::runtime_error{""}
{
mNbFrames = backtrace(mCallstack.data(), MAX_FRAMES);
auto const trace = getTrace();
std::runtime_error::operator=(
std::runtime_error{fmtstr("%s (%s:%zu)\n%s", msg.c_str(), file, line, trace.c_str())});
std::runtime_error::operator=(std::runtime_error{fmtstr("%s (%s:%zu)\n%s", msg, file, line, trace.c_str())});
}
#else
TllmException::TllmException(char const* file, std::size_t line, std::string const& msg)
TllmException::TllmException(char const* file, std::size_t line, char const* msg)
: mNbFrames{}
, std::runtime_error{fmtstr("%s (%s:%zu)", msg.c_str(), file, line)}
, std::runtime_error{fmtstr("%s (%s:%zu)", msg, file, line)}
{
}
#endif

View File

@ -16,23 +16,8 @@ if(ENABLE_UCX)
set(TOP_LEVEL_DIR "${PROJECT_SOURCE_DIR}/..")
target_compile_definitions(${UCX_WRAPPER_TARGET}
PUBLIC TOP_LEVEL_DIR="${TOP_LEVEL_DIR}")
target_include_directories(
${UCX_WRAPPER_TARGET}
PRIVATE $<TARGET_PROPERTY:ucxx::ucxx,INTERFACE_INCLUDE_DIRECTORIES>)
# link_whole_archive
if(WIN32)
target_link_libraries(${UCX_WRAPPER_TARGET}
PUBLIC $<TARGET_FILE:ucxx::ucxx>)
set_target_properties(${UCX_WRAPPER_TARGET}
PROPERTIES LINK_FLAGS "/WHOLEARCHIVE:ucxx::ucxx")
else()
# Assume everything else is like gcc
target_link_libraries(
${UCX_WRAPPER_TARGET}
PRIVATE "-Wl,--whole-archive" $<TARGET_FILE:ucxx::ucxx>
"-Wl,--no-whole-archive")
endif()
target_link_libraries(${UCX_WRAPPER_TARGET}
PRIVATE $<LINK_LIBRARY:WHOLE_ARCHIVE,ucxx::ucxx>)
target_link_libraries(${UCX_WRAPPER_TARGET} PUBLIC ucxx::ucxx ucx::ucs)
target_link_libraries(${UCX_WRAPPER_TARGET} PUBLIC ${CUDA_RT_LIB})
endif()

View File

@ -35,7 +35,8 @@ void CHECK_TLLM_XQA_JIT_ERROR_(tllmXqaJitStatus result, char const* const func,
std::vector<char> log(tllmXqaJitGetLastErrorStringSize());
tllmXqaJitGetLastErrorString(log.data());
throw tensorrt_llm::common::TllmException(file, line,
tensorrt_llm::common::fmtstr("[TensorRT-LLM][ERROR] TllmXqaJit runtime error in %s: %s", func, log.data()));
tensorrt_llm::common::fmtstr("[TensorRT-LLM][ERROR] TllmXqaJit runtime error in %s: %s", func, log.data())
.c_str());
}
}

View File

@ -1,3 +0,0 @@
version https://git-lfs.github.com/spec/v1
oid sha256:5ad6be58302fad71488246c4dea6f96d710143988a195d67b304ea251bd0aa89
size 126817632

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:8702e9bf2ad0e50a86f9d3a9be52fc70b8fdf5be644d585c69d9560b6fe42dad
size 34773116

View File

@ -1,2 +1,2 @@
a8252eee786f39e51f70a4c011588c7d libtensorrt_llm_nvrtc_wrapper.so
commit 705292307acd1546f4f9e2b2fab84350d01d41ab
5ad6be58302fad71488246c4dea6f96d710143988a195d67b304ea251bd0aa89 libtensorrt_llm_nvrtc_wrapper.so
commit 9c24486cb2cd9dd9582b311b84e1b428d29a735a

View File

@ -1,3 +0,0 @@
version https://git-lfs.github.com/spec/v1
oid sha256:253ba70949732bf3d79c759ba3601516cd5b5b03a121f00c3ce45bbb40aea035
size 133862752

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:7cddaa17269f699ffa0783ed296cabb7fe71cf61910a2799c71c4e39192fc513
size 38282412

View File

@ -1,2 +1,2 @@
1dacb3147d5d47d795a447ee563ee92a libtensorrt_llm_nvrtc_wrapper.so
commit 705292307acd1546f4f9e2b2fab84350d01d41ab
9d1104bbe6b4f258482549ec71c9d1aed0de912b5824dced5cf7829bff66ba0d libtensorrt_llm_nvrtc_wrapper.so
commit 9c24486cb2cd9dd9582b311b84e1b428d29a735a

View File

@ -1,3 +0,0 @@
version https://git-lfs.github.com/spec/v1
oid sha256:42d72057eac00d2f82cecb27f7401258c2fe932d51a945f1be4baa4271307acb
size 138648070

View File

@ -1,3 +0,0 @@
version https://git-lfs.github.com/spec/v1
oid sha256:ef06a1b7cc3e1a2e71a2ce2f4081412eded9e75a236e2c4dda0ed636de8148b8
size 138563288

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:5e50158a750697d719bbca9d4e18680290c10cb2bc30e5711854c49edb92ce95
size 45029036

View File

@ -1,3 +1,2 @@
7ef325eb05b4770773732c0f8bc5748d libtensorrt_llm_internal_cutlass_kernels_static.a
ef25d7af2b5d9824ddc50a1b79db36e8 libtensorrt_llm_internal_cutlass_kernels_static.pre_cxx11.a
commit b43c46c83bd0833eae16ea0eae7cef6bee81644c
a357a7193265159ac09d7ddcc47e0445f0f348d8f93e08c5d82c98ed38d3e342 libtensorrt_llm_internal_cutlass_kernels_static.a
commit 9c24486cb2cd9dd9582b311b84e1b428d29a735a

View File

@ -18,13 +18,14 @@
#pragma once
#include "cutlass/gemm/gemm.h"
#include "tensorrt_llm/common/assert.h"
#include "tensorrt_llm/common/cudaUtils.h"
#include "tensorrt_llm/common/quantization.h"
#include "tensorrt_llm/kernels/cutlass_kernels/fp8_blockscale_gemm/fp8_blockscale_gemm.h"
#include "tensorrt_llm/kernels/internal_cutlass_kernels/include/moe_gemm_kernels.h"
#include "tensorrt_llm/kernels/lora/lora.h"
#ifdef ENABLE_FP4
#include <cuda_fp4.h>
#endif
#include <NvInferRuntime.h>
#include <cuda_runtime_api.h>
#include <optional>
#include <random>
@ -221,6 +222,12 @@ struct QuantParams
}
};
// Change to following declarations must sync with lora.h in public repo
class LoraImpl;
int Lora_run(LoraImpl* impl, int64_t numTokens, int64_t numReqs, void const* input, int32_t const* loraRanks,
void const* const* loraWeightsPtr, int weightIndex, void* const* outputs, void* workspace, cudaStream_t stream);
struct LoraParams
{
using LoraImplPtr = std::shared_ptr<LoraImpl>;

View File

@ -1,3 +0,0 @@
version https://git-lfs.github.com/spec/v1
oid sha256:651dceb91cafbe24997cdc71c3f984ab6b18e99dbc2ed2958ca08b2cf4897cc3
size 135328862

View File

@ -1,3 +0,0 @@
version https://git-lfs.github.com/spec/v1
oid sha256:2cbe6c81bae4a338f6a61f081571abff720c2275058c38ed67c879886056cd98
size 134434150

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:0255e3a868db94e0bb555f692c087ee73c6b800f907c64fc36e0d3846ffa12f6
size 44693484

View File

@ -1,3 +1,2 @@
893ae060cdb1d5a54729afca9d1b9b99 libtensorrt_llm_internal_cutlass_kernels_static.a
a7ee89c7577bf9bf8d8a9ac8072b810d libtensorrt_llm_internal_cutlass_kernels_static.pre_cxx11.a
commit b43c46c83bd0833eae16ea0eae7cef6bee81644c
4f6da1c3b64b7cef5841dd7507839e718c5f47fa81f3a8e2e6839a81bda459db libtensorrt_llm_internal_cutlass_kernels_static.a
commit 9c24486cb2cd9dd9582b311b84e1b428d29a735a

View File

@ -332,4 +332,11 @@ int LoraImpl::run(int64_t numTokens, int64_t numReqs, void const* input, int32_t
return 0;
}
int Lora_run(LoraImpl* impl, int64_t numTokens, int64_t numReqs, void const* input, int32_t const* loraRanks,
void const* const* loraWeightsPtr, int weightIndex, void* const* outputs, void* workspace, cudaStream_t stream)
{
TLLM_CHECK_WITH_INFO(impl != nullptr, "Attempt to run an empty LoraImpl");
return impl->run(numTokens, numReqs, input, loraRanks, loraWeightsPtr, weightIndex, outputs, workspace, stream);
}
} // namespace tensorrt_llm::kernels

View File

@ -66,4 +66,8 @@ private:
std::optional<Config> mBestConfig;
};
// Change to following declarations must sync with moe_kernels.h in internal kernel repo
int Lora_run(LoraImpl* impl, int64_t numTokens, int64_t numReqs, void const* input, int32_t const* loraRanks,
void const* const* loraWeightsPtr, int weightIndex, void* const* outputs, void* workspace, cudaStream_t stream);
} // namespace tensorrt_llm::kernels

View File

@ -24,6 +24,8 @@ namespace tensorrt_llm
{
namespace kernels
{
// Change to this enum must sync with nvrtcWrapper.cpp in internal kernel repo
enum Data_type
{
DATA_TYPE_BOOL,

View File

@ -560,7 +560,7 @@ PYBIND11_MODULE(TRTLLM_PYBIND_MODULE, m)
.def("get_ipc_ptrs",
[](tr::IpcNvlsHandle& self) { return reinterpret_cast<uintptr_t>(self.ipc_uc_ptrs.data()); });
m.def("ipc_nvls_allocate", &tr::ipcNvlsAllocate);
m.def("ipc_nvls_allocate", &tr::ipcNvlsAllocate, py::return_value_policy::reference);
m.def("ipc_nvls_free", &tr::ipcNvlsFree);
m.def("ipc_nvls_supported", &tr::ipcNvlsSupported);
}

View File

@ -152,7 +152,7 @@ bool ipcNvlsSupported()
return true;
}
IpcNvlsHandle ipcNvlsAllocate(size_t size, std::set<int> group)
IpcNvlsHandle* ipcNvlsAllocate(size_t size, std::set<int> group)
{
#if ENABLE_MULTI_DEVICE
TLLM_CHECK(size > 0);
@ -324,26 +324,33 @@ IpcNvlsHandle ipcNvlsAllocate(size_t size, std::set<int> group)
printf("Rank %d imported IPC handles successfully\n", rank);
return handle;
return new IpcNvlsHandle(std::move(handle));
#else
TLLM_THROW("ipcNvlsAllocate needs to be compiled with ENABLE_MULTI_DEVICE");
#endif
}
void ipcNvlsFree(IpcNvlsHandle handle)
void ipcNvlsFree(IpcNvlsHandle* handle)
{
#if ENABLE_MULTI_DEVICE
// Unmap and release MC VA
CUCHECK(cuMemUnmap(handle.mc_va, handle.size));
CUCHECK(cuMemRelease(handle.mc_handle));
CUCHECK(cuMemAddressFree(handle.mc_va, handle.size));
// Unmap and release UC VA
for (size_t i = 0; i < handle.ipc_uc_vas.size(); ++i)
if (handle == nullptr)
{
CUCHECK(cuMemUnmap(handle.ipc_uc_vas[i], handle.size));
CUCHECK(cuMemRelease(handle.ipc_uc_handles[i]));
CUCHECK(cuMemAddressFree(handle.ipc_uc_vas[i], handle.size));
return;
}
// Unmap and release MC VA
CUCHECK(cuMemUnmap(handle->mc_va, handle->size));
CUCHECK(cuMemRelease(handle->mc_handle));
CUCHECK(cuMemAddressFree(handle->mc_va, handle->size));
// Unmap and release UC VA
for (size_t i = 0; i < handle->ipc_uc_vas.size(); ++i)
{
CUCHECK(cuMemUnmap(handle->ipc_uc_vas[i], handle->size));
CUCHECK(cuMemRelease(handle->ipc_uc_handles[i]));
CUCHECK(cuMemAddressFree(handle->ipc_uc_vas[i], handle->size));
}
delete handle;
#else
TLLM_THROW("ipcNvlsFree needs to be compiled with ENABLE_MULTI_DEVICE");
#endif

View File

@ -709,7 +709,7 @@ public:
{
other.mSize = 0;
other.mCapacity = 0;
other.mHandle = IpcNvlsHandle{};
other.mHandle = nullptr;
}
~MulticastBuffer() override
@ -733,7 +733,7 @@ public:
// reset other
other.mSize = 0;
other.mCapacity = 0;
other.mHandle = IpcNvlsHandle{};
other.mHandle = nullptr;
}
return *this;
}
@ -741,22 +741,22 @@ public:
// Return list of pointers to each rank
[[nodiscard]] void* dataIpcList()
{
return reinterpret_cast<void*>(mHandle.ipc_uc_ptrs.data());
return reinterpret_cast<void*>(mHandle->ipc_uc_ptrs.data());
}
[[nodiscard]] void const* dataIpcList() const
{
return reinterpret_cast<void const*>(mHandle.ipc_uc_ptrs.data());
return reinterpret_cast<void const*>(mHandle->ipc_uc_ptrs.data());
}
[[nodiscard]] void* dataMC()
{
return reinterpret_cast<void*>(mHandle.mc_ptr);
return reinterpret_cast<void*>(mHandle->mc_ptr);
}
[[nodiscard]] void const* dataMC() const
{
return reinterpret_cast<void const*>(mHandle.mc_ptr);
return reinterpret_cast<void const*>(mHandle->mc_ptr);
}
//////////////////////////
@ -768,13 +768,13 @@ public:
// Return unicast pointer
[[nodiscard]] void* data() override
{
return reinterpret_cast<void*>(mHandle.uc_ptr);
return reinterpret_cast<void*>(mHandle->uc_ptr);
}
// Return unicast pointer
[[nodiscard]] void const* data() const override
{
return reinterpret_cast<void const*>(mHandle.uc_ptr);
return reinterpret_cast<void const*>(mHandle->uc_ptr);
}
[[nodiscard]] std::size_t getSize() const override
@ -806,8 +806,8 @@ public:
printf("MulticastBuffer resize: %d B\n", int(toBytes(newSize)));
mHandle = ipcNvlsAllocate(toBytes(newSize), mRanks);
TLLM_CHECK(mHandle.size % BufferDataType(mType).getSize() == 0);
mCapacity = mHandle.size / BufferDataType(mType).getSize();
TLLM_CHECK(mHandle->size % BufferDataType(mType).getSize() == 0);
mCapacity = mHandle->size / BufferDataType(mType).getSize();
}
mSize = newSize;
}
@ -816,7 +816,7 @@ public:
{
if (mCapacity > 0)
{
TLLM_CHECK(mHandle.size > 0);
TLLM_CHECK(mHandle->size > 0);
ipcNvlsFree(mHandle);
}
}
@ -826,7 +826,7 @@ private:
std::size_t mCapacity = 0;
nvinfer1::DataType mType;
std::set<int> mRanks;
IpcNvlsHandle mHandle;
IpcNvlsHandle* mHandle;
};
using DeviceBuffer = GenericBuffer<CudaAllocatorAsync>;

View File

@ -100,14 +100,10 @@ add_gtest(eagleLayerTest layers/eagleLayerTest.cpp)
add_subdirectory(utils)
if(BUILD_BATCH_MANAGER)
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/batch_manager)
add_subdirectory(batch_manager)
endif()
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/batch_manager)
add_subdirectory(batch_manager)
endif()
if(BUILD_EXECUTOR)
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/executor)
add_subdirectory(executor)
endif()
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/executor)
add_subdirectory(executor)
endif()

View File

@ -9,16 +9,12 @@
# license agreement from NVIDIA CORPORATION or its affiliates is strictly
# prohibited.
if(BUILD_BATCH_MANAGER)
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/batch_manager)
add_subdirectory(batch_manager)
endif()
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/batch_manager)
add_subdirectory(batch_manager)
endif()
if(BUILD_EXECUTOR)
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/executor)
add_subdirectory(executor)
endif()
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/executor)
add_subdirectory(executor)
endif()
add_subdirectory(common)