TensorRT-LLMs/cpp/tests/CMakeLists.txt
Kaiyu Xie bca9a33b02
Update TensorRT-LLM (#2008)
* Update TensorRT-LLM

---------

Co-authored-by: Timur Abishev <abishev.timur@gmail.com>
Co-authored-by: MahmoudAshraf97 <hassouna97.ma@gmail.com>
Co-authored-by: Saeyoon Oh <saeyoon.oh@furiosa.ai>
Co-authored-by: hattizai <hattizai@gmail.com>
2024-07-23 23:05:09 +08:00

208 lines
8.8 KiB
CMake

# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION &
# AFFILIATES. All rights reserved. SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not
# use this file except in compliance with the License. You may obtain a copy of
# the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations under
# the License.
# GoogleTest Preparation - Code block copied from
# https://google.github.io/googletest/quickstart-cmake.html
include(FetchContent)
FetchContent_Declare(
googletest
GIT_REPOSITORY
https://github.com/google/googletest.git
GIT_TAG release-1.12.1)
FetchContent_MakeAvailable(googletest)
include(GoogleTest)
# On Windows major version is appended to nvinfer libs.
if(WIN32)
set(ONNX_PARSER_LIB_NAME nvonnxparser_10)
else()
set(ONNX_PARSER_LIB_NAME nvonnxparser)
endif()
find_library_create_target(nvonnxparser ${ONNX_PARSER_LIB_NAME} SHARED
${TRT_OUT_DIR} ${TRT_LIB_DIR})
include_directories(
${PROJECT_SOURCE_DIR}/tensorrt_llm/cutlass_extensions/include
${PROJECT_SOURCE_DIR}/include
${3RDPARTY_DIR}/cutlass/include
${3RDPARTY_DIR}/cutlass/tools/util/include
${PROJECT_SOURCE_DIR}/tests/batch_manager
${PROJECT_SOURCE_DIR}/tests/utils)
set(TOP_LEVEL_DIR "${PROJECT_SOURCE_DIR}/..")
add_custom_target(google-tests)
function(add_gtest test_name test_src)
set(options NO_GTEST_MAIN NO_TLLM_LINKAGE)
cmake_parse_arguments(ARGS "${options}" "${oneValueArgs}" "${multiValueArgs}"
${ARGN})
add_executable(${test_name} ${test_src})
target_link_libraries(${test_name} PUBLIC gmock_main nvonnxparser)
if(NOT ARGS_NO_GTEST_MAIN)
target_link_libraries(${test_name} PUBLIC gtest_main)
endif()
if(NOT ARGS_NO_TLLM_LINKAGE)
target_link_libraries(${test_name} PUBLIC ${SHARED_TARGET}
nvinfer_plugin_tensorrt_llm)
endif()
target_compile_features(${test_name} PRIVATE cxx_std_17)
target_compile_definitions(${test_name}
PUBLIC TOP_LEVEL_DIR="${TOP_LEVEL_DIR}")
gtest_discover_tests(
${test_name}
PROPERTIES ENVIRONMENT "CUDA_MODULE_LOADING=LAZY" DISCOVERY_MODE
PRE_TEST # WAR for DLL discovery on windows.
DISCOVERY_TIMEOUT 30) # Longer timeout needed because discovery
# can be slow on Windows
add_dependencies(google-tests ${test_name})
endfunction()
add_gtest(loraManagerTest runtime/loraManagerTest.cpp)
add_gtest(loraUtilsTest runtime/loraUtilsTest.cpp)
add_gtest(loraCacheTest runtime/loraCacheTest.cpp)
add_gtest(workerPoolTest runtime/workerPoolTest.cpp)
add_gtest(transposeKVKernelTest runtime/transposeKVKernelTest.cpp)
add_gtest(gptDecoderTest runtime/gptDecoderTest.cpp)
add_gtest(gptDecoderBatchTest runtime/gptDecoderBatchTest.cpp)
add_gtest(gptSessionTest runtime/gptSessionTest.cpp)
target_link_libraries(gptSessionTest PRIVATE modelSpecStatic)
add_gtest(memoryUtilsTest common/memoryUtilsTest.cu)
if(ENABLE_MULTI_DEVICE EQUAL 1)
add_gtest(mpiUtilsTest common/mpiUtilsTest.cpp)
endif()
add_gtest(quantizationTest common/quantizationTest.cpp)
add_gtest(stringUtilsTest common/stringUtilsTest.cpp)
add_gtest(tllmExceptionTest common/tllmExceptionTest.cpp)
add_gtest(stlUtilsTest common/stlUtilsTest.cpp)
add_gtest(cudaProfilerUtilsTest common/cudaProfilerUtilsTest.cpp)
add_gtest(timestampUtilsTest common/timestampUtilsTest.cpp)
add_gtest(tllmRuntimeTest runtime/tllmRuntimeTest.cpp)
add_gtest(tllmBuffersTest runtime/tllmBuffersTest.cpp)
add_gtest(bufferManagerTest runtime/bufferManagerTest.cpp)
add_gtest(runtimeKernelTest runtime/runtimeKernelTest.cpp)
add_gtest(samplingTest runtime/samplingTest.cpp)
add_gtest(samplingConfigTest runtime/samplingConfigTest.cpp)
add_gtest(iTensorTest runtime/iTensorTest.cpp)
add_gtest(iBufferTest runtime/iBufferTest.cpp)
add_gtest(worldConfigTest runtime/worldConfigTest.cpp)
add_gtest(medusaModuleTest runtime/medusaModuleTest.cpp)
add_gtest(mixtureOfExpertsTest kernels/mixtureOfExpertsTest.cu)
add_gtest(ropeTest kernels/ropeTest.cu)
if(${BUILD_PYT})
add_gtest(torchTest runtime/torchTest.cpp)
add_gtest(thUtilsTest thop/thUtilsTest.cpp)
target_link_libraries(torchTest PUBLIC ${TORCH_LIBRARIES})
target_link_libraries(thUtilsTest PUBLIC th_utils ${Python3_LIBRARIES}
${TORCH_LIBRARIES})
endif()
set(SAMPLING_KERNEL_TEST_SRC
kernels/sampling/samplingTest.cpp
kernels/sampling/samplingTopKTest.cpp
kernels/sampling/samplingTopPTest.cpp
kernels/sampling/samplingAirTopPTest.cpp
kernels/sampling/samplingPenaltyTest.cpp
kernels/sampling/samplingUtilsTest.cu)
add_gtest(samplingKernelsTest "${SAMPLING_KERNEL_TEST_SRC}")
add_gtest(weightOnlyKernelTest kernels/weightOnly/weightOnlyKernelTest.cpp)
add_gtest(smoothQuantKernelTest kernels/smoothQuant/smoothQuantKernelTest.cpp)
add_gtest(cudaCoreGemmKernelTest
kernels/cudaCoreGemm/cudaCoreGemmKernelTest.cpp)
if(NOT ENABLE_MULTI_DEVICE EQUAL 0)
add_gtest(allReduceKernelTest kernels/allReduce/allReduceKernelTest.cu)
endif()
add_gtest(decodingKernelsTest kernels/decodingKernelTest.cpp)
add_gtest(banRepeatNGramsKernelsTest kernels/banRepeatNGramsKernelsTest.cpp)
add_gtest(stopCriteriaKernelsTest kernels/stopCriteriaKernelsTest.cpp)
add_gtest(shiftKCacheKernelTest kernels/shiftKCacheKernelTest.cu)
set(SAMPLING_LAYER_TEST_SRC
layers/baseSamplingLayerTest.cpp layers/samplingLayerTest.cpp
layers/topKSamplingLayerTest.cpp layers/topPSamplingLayerTest.cpp)
add_gtest(samplingLayerTest "${SAMPLING_LAYER_TEST_SRC}")
add_gtest(dynamicDecodeLayerTest layers/dynamicDecodeLayerTest.cpp)
add_gtest(medusaDecodeLayerTest layers/medusaDecodeLayerTest.cpp)
set(LOOKAHEAD_POOLMANAGER_TEST_SRC layers/randomLlm.cpp
layers/lookaheadPoolManagerTest.cpp)
add_gtest(lookaheadPoolManagerTest "${LOOKAHEAD_POOLMANAGER_TEST_SRC}")
set(LOOKAHEAD_ALGORITHM_TEST_SRC layers/randomLlm.cpp
layers/lookaheadAlgorithmTest.cpp)
add_gtest(lookaheadAlgorithmTest "${LOOKAHEAD_ALGORITHM_TEST_SRC}")
set(LOOKAHEAD_RANDOMLLM_TEST_SRC layers/randomLlm.cpp
layers/lookaheadRandomLlmTest.cpp)
add_gtest(lookaheadRandomLlmTest "${LOOKAHEAD_RANDOMLLM_TEST_SRC}")
add_gtest(explicitDraftTokensLayerTest layers/explicitDraftTokensLayerTest.cpp)
set(LOOKAHEAD_DECODING_TEST_SRC layers/randomLlm.cpp
layers/lookaheadDecodingLayerTest.cpp)
add_gtest(lookaheadDecodingLayerTest "${LOOKAHEAD_DECODING_TEST_SRC}")
add_gtest(
gemmSwigluRunnerTest
kernels/fused_gated_gemm/gemmSwigluRunnerTest.cu
${PROJECT_SOURCE_DIR}/tensorrt_llm/cutlass_extensions/kernels/fused_gated_gemm/gemm_swiglu_e4m3.cu
NO_GTEST_MAIN)
add_gtest(gemmSwigluKernelTestSm90Fp8
kernels/fused_gated_gemm/gemmSwigluKernelTestSm90Fp8.cu NO_GTEST_MAIN
NO_TLLM_LINKAGE)
foreach(target_name gemmSwigluRunnerTest;gemmSwigluKernelTestSm90Fp8)
set_property(TARGET ${target_name} PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
# Note - we deliberately do not include 90a PTX (even when 9.0+PTX is
# specified). This is because sm_90a has arch conditional instructions that
# are not forward compatible. As a result, it does not make sense to embed PTX
# into the binary anyway.
if("90" IN_LIST CMAKE_CUDA_ARCHITECTURES_ORIG
OR "90-real" IN_LIST CMAKE_CUDA_ARCHITECTURES_ORIG
OR "90-real" IN_LIST CMAKE_CUDA_ARCHITECTURES_NATIVE)
message(STATUS "MANUALLY APPENDING FLAG TO COMPILE FOR SM_90a.")
target_compile_options(
${target_name}
PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-gencode=arch=compute_90a,code=sm_90a
-res-usage>)
# Hopper kernels require cuda lib for TMA APIs
target_link_libraries(${target_name} PRIVATE CUDA::cuda_driver)
# No kernels should be parsed, unless hopper is specified. This is a build
# time improvement
target_compile_definitions(${target_name} PRIVATE COMPILE_HOPPER_TMA_GEMMS)
endif()
# Suppress GCC note: the ABI for passing parameters with 64-byte alignment has
# changed in GCC 4.6 This note appears for kernels using TMA and clutters the
# compilation output.
if(NOT WIN32)
target_compile_options(
${target_name} PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-Wno-psabi>)
endif()
endforeach()
add_subdirectory(utils)
if(BUILD_BATCH_MANAGER)
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/batch_manager)
add_subdirectory(batch_manager)
endif()
endif()
if(BUILD_EXECUTOR)
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/executor)
add_subdirectory(executor)
endif()
endif()