TensorRT-LLMs/cpp/tensorrt_llm/thop/CMakeLists.txt
chenfeiz0326 7f5716ef83
Cherry-pick trtllm-gen from feat/llama4 to main (#4086)
* feat: TRT-LLM Gen FP8 MoE Llama4

Signed-off-by: Nikita Korobov <nkorobov@nvidia.com>

* feat: TRT-LLM Gen llama4 MoE Top1 routing

Signed-off-by: Jiqun Tu <jtu@nvidia.com>

* feat: add per tensor FP8 TRT-LLM Gen GEMMs

Signed-off-by: Nikita Korobov <nkorobov@nvidia.com>

* Update

Signed-off-by: Chenfei Zhang <chenfeiz@nvidia.com>

* Update

Signed-off-by: Chenfei Zhang <chenfeiz@nvidia.com>

* Add license for cpp/tensorrt_llm/kernels/trtllmGenKernels/blockScaleMoe/gemmCubins

Signed-off-by: Chenfei Zhang <chenfeiz@nvidia.com>

* Add guard for routingIndicesClusterKernel

Signed-off-by: Chenfei Zhang <chenfeiz@nvidia.com>

* Guard sm90+ for routingkernels

Signed-off-by: Chenfei Zhang <chenfeiz@nvidia.com>

* Guard sm90+ for routingkernels

Signed-off-by: Chenfei Zhang <chenfeiz@nvidia.com>

---------

Signed-off-by: Nikita Korobov <nkorobov@nvidia.com>
Signed-off-by: Jiqun Tu <jtu@nvidia.com>
Signed-off-by: Chenfei Zhang <chenfeiz@nvidia.com>
Co-authored-by: Nikita Korobov <nkorobov@nvidia.com>
Co-authored-by: Jiqun Tu <jtu@nvidia.com>
2025-05-08 14:13:01 -07:00

98 lines
3.0 KiB
CMake

# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION &
# AFFILIATES. All rights reserved. SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not
# use this file except in compliance with the License. You may obtain a copy of
# the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations under
# the License.
if(NOT WIN32)
# additional warnings
#
# Ignore overloaded-virtual warning. We intentionally change parameters of
# some methods in derived class.
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
if(WARNING_IS_ERROR)
message(STATUS "Treating warnings as errors in GCC compilation")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror")
endif()
else() # Windows
# warning level 4
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4")
endif()
add_library(th_utils STATIC thUtils.cpp)
set_property(TARGET th_utils PROPERTY POSITION_INDEPENDENT_CODE ON)
set_property(TARGET th_utils PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
target_link_libraries(th_utils PUBLIC ${TORCH_LIBRARIES} ${CUBLAS_LIB}
${CURAND_LIB})
add_library(
th_common SHARED
allgatherOp.cpp
allreduceOp.cpp
attentionOp.cpp
convertSpecDecodingMaskToPackedMaskOp.cpp
cutlassScaledMM.cpp
cublasScaledMM.cpp
deepseekAllreduceFusionOp.cpp
dynamicDecodeOp.cpp
fmhaPackMaskOp.cpp
fp8Op.cpp
fp8PerTensorScalingTrtllmGenGemm.cpp
fp4Op.cpp
fp4Gemm.cpp
fp4GemmTrtllmGen.cpp
fp8BatchedGemmTrtllmGen.cpp
fp4Quantize.cpp
fp4BatchedQuantize.cpp
fp8BlockScalingGemm.cpp
fp8Quantize.cpp
fusedTopkSoftmax.cpp
gatherTreeOp.cpp
groupRmsNormOp.cpp
logitsBitmaskOp.cpp
mambaConv1dOp.cpp
moeOp.cpp
moeCommOp.cpp
fp8BlockScaleMoe.cpp
fp8PerTensorScaleMoe.cpp
fp4BlockScaleMoe.cpp
noAuxTcOp.cpp
ncclCommunicatorOp.cpp
parallelDecodeKVCacheUpdateOp.cpp
redrafterCurandOp.cpp
reducescatterOp.cpp
relativeAttentionBiasOp.cpp
selectiveScanOp.cpp
userbuffersFinalizeOp.cpp
userbuffersTensor.cpp
weightOnlyQuantOp.cpp
mtpOp.cpp
loraOp.cpp)
set_property(TARGET th_common PROPERTY POSITION_INDEPENDENT_CODE ON)
target_link_libraries(th_common PRIVATE ${TORCH_LIBRARIES} th_utils
${Python3_LIBRARIES} ${SHARED_TARGET})
if(ENABLE_MULTI_DEVICE)
target_include_directories(th_common PUBLIC ${MPI_C_INCLUDE_DIRS})
target_link_libraries(th_common PRIVATE ${MPI_C_LIBRARIES} ${NCCL_LIB}
CUDA::nvml)
endif()
if(NOT WIN32)
set_target_properties(
th_common
PROPERTIES LINK_FLAGS
"-Wl,-rpath='$ORIGIN' ${AS_NEEDED_FLAG} ${UNDEFINED_FLAG}")
else()
target_link_libraries(th_common PRIVATE context_attention_src)
endif()