# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & # AFFILIATES. All rights reserved. SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); you may not # use this file except in compliance with the License. You may obtain a copy of # the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations under # the License. # GoogleTest Preparation - Code block copied from # https://google.github.io/googletest/quickstart-cmake.html include(FetchContent) FetchContent_Declare( googletest GIT_REPOSITORY https://github.com/google/googletest.git GIT_TAG release-1.12.1) FetchContent_MakeAvailable(googletest) include(GoogleTest) # On Windows major version is appended to nvinfer libs. if(WIN32) set(ONNX_PARSER_LIB_NAME nvonnxparser_10) else() set(ONNX_PARSER_LIB_NAME nvonnxparser) endif() find_library_create_target(nvonnxparser ${ONNX_PARSER_LIB_NAME} SHARED ${TRT_OUT_DIR} ${TRT_LIB_DIR}) include_directories( ${PROJECT_SOURCE_DIR}/tensorrt_llm/cutlass_extensions/include ${PROJECT_SOURCE_DIR}/include ${3RDPARTY_DIR}/cutlass/include ${3RDPARTY_DIR}/cutlass/tools/util/include ${PROJECT_SOURCE_DIR}/tests/batch_manager ${PROJECT_SOURCE_DIR}/tests/utils) set(TOP_LEVEL_DIR "${PROJECT_SOURCE_DIR}/..") add_custom_target(google-tests) function(add_gtest test_name test_src) set(options NO_GTEST_MAIN NO_TLLM_LINKAGE) cmake_parse_arguments(ARGS "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) add_executable(${test_name} ${test_src}) target_link_libraries(${test_name} PUBLIC gmock_main nvonnxparser) if(NOT ARGS_NO_GTEST_MAIN) target_link_libraries(${test_name} PUBLIC gtest_main) endif() if(NOT ARGS_NO_TLLM_LINKAGE) target_link_libraries(${test_name} PUBLIC ${SHARED_TARGET} nvinfer_plugin_tensorrt_llm) endif() target_compile_features(${test_name} PRIVATE cxx_std_17) target_compile_definitions(${test_name} PUBLIC TOP_LEVEL_DIR="${TOP_LEVEL_DIR}") gtest_discover_tests( ${test_name} PROPERTIES ENVIRONMENT "CUDA_MODULE_LOADING=LAZY" DISCOVERY_MODE PRE_TEST # WAR for DLL discovery on windows. DISCOVERY_TIMEOUT 30) # Longer timeout needed because discovery # can be slow on Windows add_dependencies(google-tests ${test_name}) endfunction() add_gtest(loraManagerTest runtime/loraManagerTest.cpp) add_gtest(loraUtilsTest runtime/loraUtilsTest.cpp) add_gtest(loraCacheTest runtime/loraCacheTest.cpp) add_gtest(workerPoolTest runtime/workerPoolTest.cpp) add_gtest(transposeKVKernelTest runtime/transposeKVKernelTest.cpp) add_gtest(gptDecoderTest runtime/gptDecoderTest.cpp) add_gtest(gptDecoderBatchTest runtime/gptDecoderBatchTest.cpp) add_gtest(gptSessionTest runtime/gptSessionTest.cpp) target_link_libraries(gptSessionTest PRIVATE modelSpecStatic) add_gtest(memoryUtilsTest common/memoryUtilsTest.cu) if(ENABLE_MULTI_DEVICE EQUAL 1) add_gtest(mpiUtilsTest common/mpiUtilsTest.cpp) endif() add_gtest(quantizationTest common/quantizationTest.cpp) add_gtest(stringUtilsTest common/stringUtilsTest.cpp) add_gtest(tllmExceptionTest common/tllmExceptionTest.cpp) add_gtest(stlUtilsTest common/stlUtilsTest.cpp) add_gtest(cudaProfilerUtilsTest common/cudaProfilerUtilsTest.cpp) add_gtest(timestampUtilsTest common/timestampUtilsTest.cpp) add_gtest(tllmRuntimeTest runtime/tllmRuntimeTest.cpp) add_gtest(tllmBuffersTest runtime/tllmBuffersTest.cpp) add_gtest(bufferManagerTest runtime/bufferManagerTest.cpp) add_gtest(runtimeKernelTest runtime/runtimeKernelTest.cpp) add_gtest(samplingTest runtime/samplingTest.cpp) add_gtest(samplingConfigTest runtime/samplingConfigTest.cpp) add_gtest(iTensorTest runtime/iTensorTest.cpp) add_gtest(iBufferTest runtime/iBufferTest.cpp) add_gtest(worldConfigTest runtime/worldConfigTest.cpp) add_gtest(medusaModuleTest runtime/medusaModuleTest.cpp) add_gtest(mixtureOfExpertsTest kernels/mixtureOfExpertsTest.cu) add_gtest(ropeTest kernels/ropeTest.cu) if(${BUILD_PYT}) add_gtest(torchTest runtime/torchTest.cpp) add_gtest(thUtilsTest thop/thUtilsTest.cpp) target_link_libraries(torchTest PUBLIC ${TORCH_LIBRARIES}) target_link_libraries(thUtilsTest PUBLIC th_utils ${Python3_LIBRARIES} ${TORCH_LIBRARIES}) endif() set(SAMPLING_KERNEL_TEST_SRC kernels/sampling/samplingTest.cpp kernels/sampling/samplingTopKTest.cpp kernels/sampling/samplingTopPTest.cpp kernels/sampling/samplingAirTopPTest.cpp kernels/sampling/samplingPenaltyTest.cpp kernels/sampling/samplingUtilsTest.cu) add_gtest(samplingKernelsTest "${SAMPLING_KERNEL_TEST_SRC}") add_gtest(weightOnlyKernelTest kernels/weightOnly/weightOnlyKernelTest.cpp) add_gtest(smoothQuantKernelTest kernels/smoothQuant/smoothQuantKernelTest.cpp) add_gtest(cudaCoreGemmKernelTest kernels/cudaCoreGemm/cudaCoreGemmKernelTest.cpp) if(NOT ENABLE_MULTI_DEVICE EQUAL 0) add_gtest(allReduceKernelTest kernels/allReduce/allReduceKernelTest.cu) endif() add_gtest(decodingKernelsTest kernels/decodingKernelTest.cpp) add_gtest(banRepeatNGramsKernelsTest kernels/banRepeatNGramsKernelsTest.cpp) add_gtest(stopCriteriaKernelsTest kernels/stopCriteriaKernelsTest.cpp) add_gtest(shiftKCacheKernelTest kernels/shiftKCacheKernelTest.cu) set(SAMPLING_LAYER_TEST_SRC layers/baseSamplingLayerTest.cpp layers/samplingLayerTest.cpp layers/topKSamplingLayerTest.cpp layers/topPSamplingLayerTest.cpp) add_gtest(samplingLayerTest "${SAMPLING_LAYER_TEST_SRC}") add_gtest(dynamicDecodeLayerTest layers/dynamicDecodeLayerTest.cpp) add_gtest(medusaDecodeLayerTest layers/medusaDecodeLayerTest.cpp) set(LOOKAHEAD_POOLMANAGER_TEST_SRC layers/randomLlm.cpp layers/lookaheadPoolManagerTest.cpp) add_gtest(lookaheadPoolManagerTest "${LOOKAHEAD_POOLMANAGER_TEST_SRC}") set(LOOKAHEAD_ALGORITHM_TEST_SRC layers/randomLlm.cpp layers/lookaheadAlgorithmTest.cpp) add_gtest(lookaheadAlgorithmTest "${LOOKAHEAD_ALGORITHM_TEST_SRC}") set(LOOKAHEAD_RANDOMLLM_TEST_SRC layers/randomLlm.cpp layers/lookaheadRandomLlmTest.cpp) add_gtest(lookaheadRandomLlmTest "${LOOKAHEAD_RANDOMLLM_TEST_SRC}") add_gtest(explicitDraftTokensLayerTest layers/explicitDraftTokensLayerTest.cpp) set(LOOKAHEAD_DECODING_TEST_SRC layers/randomLlm.cpp layers/lookaheadDecodingLayerTest.cpp) add_gtest(lookaheadDecodingLayerTest "${LOOKAHEAD_DECODING_TEST_SRC}") add_gtest( gemmSwigluRunnerTest kernels/fused_gated_gemm/gemmSwigluRunnerTest.cu ${PROJECT_SOURCE_DIR}/tensorrt_llm/cutlass_extensions/kernels/fused_gated_gemm/gemm_swiglu_e4m3.cu NO_GTEST_MAIN) add_gtest(gemmSwigluKernelTestSm90Fp8 kernels/fused_gated_gemm/gemmSwigluKernelTestSm90Fp8.cu NO_GTEST_MAIN NO_TLLM_LINKAGE) foreach(target_name gemmSwigluRunnerTest;gemmSwigluKernelTestSm90Fp8) set_property(TARGET ${target_name} PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON) # Note - we deliberately do not include 90a PTX (even when 9.0+PTX is # specified). This is because sm_90a has arch conditional instructions that # are not forward compatible. As a result, it does not make sense to embed PTX # into the binary anyway. if("90" IN_LIST CMAKE_CUDA_ARCHITECTURES_ORIG OR "90-real" IN_LIST CMAKE_CUDA_ARCHITECTURES_ORIG OR "90-real" IN_LIST CMAKE_CUDA_ARCHITECTURES_NATIVE) message(STATUS "MANUALLY APPENDING FLAG TO COMPILE FOR SM_90a.") target_compile_options( ${target_name} PRIVATE $<$:-gencode=arch=compute_90a,code=sm_90a -res-usage>) # Hopper kernels require cuda lib for TMA APIs target_link_libraries(${target_name} PRIVATE CUDA::cuda_driver) # No kernels should be parsed, unless hopper is specified. This is a build # time improvement target_compile_definitions(${target_name} PRIVATE COMPILE_HOPPER_TMA_GEMMS) endif() # Suppress GCC note: the ABI for passing parameters with 64-byte alignment has # changed in GCC 4.6 This note appears for kernels using TMA and clutters the # compilation output. if(NOT WIN32) target_compile_options( ${target_name} PRIVATE $<$:-Xcompiler=-Wno-psabi>) endif() endforeach() add_subdirectory(utils) if(BUILD_BATCH_MANAGER) if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/batch_manager) add_subdirectory(batch_manager) endif() endif() if(BUILD_EXECUTOR) if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/executor) add_subdirectory(executor) endif() endif()