TensorRT-LLMs/cpp/tensorrt_llm/thop/CMakeLists.txt
benzh-2025 6df2c8a074
[None][feat] add fp4 gemm + allreduce (#9729)
Signed-off-by: benzh 
Signed-off-by: benzh-2025
2026-01-13 21:11:13 +08:00

147 lines
4.3 KiB
CMake

# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION &
# AFFILIATES. All rights reserved. SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not
# use this file except in compliance with the License. You may obtain a copy of
# the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations under
# the License.
if(NOT WIN32)
# additional warnings
#
# Ignore overloaded-virtual warning. We intentionally change parameters of
# some methods in derived class.
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
if(WARNING_IS_ERROR)
message(STATUS "Treating warnings as errors in GCC compilation")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror")
endif()
else() # Windows
# warning level 4
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4")
endif()
add_library(th_utils STATIC thUtils.cpp)
set_property(TARGET th_utils PROPERTY POSITION_INDEPENDENT_CODE ON)
set_property(TARGET th_utils PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
target_link_libraries(th_utils PUBLIC ${TORCH_LIBRARIES} ${CUBLAS_LIB}
${CURAND_LIB})
# TODO This does not compile with internal cutlass MOE gemm
add_library(
th_common SHARED
mlaPreprocessOp.cpp
allgatherOp.cpp
allreduceOp.cpp
alltoallOp.cpp
attentionOp.cpp
causalConv1dOp.cpp
convertSpecDecodingMaskToPackedMaskOp.cpp
cuteDslMoeUtilsOp.cpp
cutlassScaledMM.cpp
cublasScaledMM.cpp
cublasFp4ScaledMM.cpp
cudaNvfp4MM.cpp
cudaScaledMM.cpp
dynamicDecodeOp.cpp
fmhaPackMaskOp.cpp
fp8Op.cpp
fp8PerTensorScalingTrtllmGenGemm.cpp
fp4Op.cpp
fp4Gemm.cpp
fp4GemmTrtllmGen.cpp
fp8BatchedGemmTrtllmGen.cpp
fp4Quantize.cpp
fp4BatchedQuantize.cpp
fp4xFp8GemmTrtllmGen.cpp
fp8BlockScalingGemm.cpp
fp8RowwiseGemm.cpp
fp8Quantize.cpp
dsv3FusedAGemmOp.cpp
fusedQKNormRopeOp.cpp
fusedTopkSoftmax.cpp
gatherTreeOp.cpp
groupRmsNormOp.cpp
helixPostProcessOp.cpp
llama4MinLatency.cpp
logitsBitmaskOp.cpp
mambaConv1dOp.cpp
moeOp.cpp
moeUtilOp.cpp
moeCommOp.cpp
moeAlltoAllOp.cpp
moeLoadBalanceOp.cpp
moeAlignOp.cpp
mxFp4BlockScaleMoe.cpp
mxFp8Quantize.cpp
fp8BlockScaleMoe.cpp
fp8PerTensorScaleMoe.cpp
fp4BlockScaleMoe.cpp
noAuxTcOp.cpp
IndexerKCacheScatterOp.cpp
IndexerTopKOp.cpp
ncclCommunicatorOp.cpp
parallelDecodeKVCacheUpdateOp.cpp
redrafterCurandOp.cpp
reducescatterOp.cpp
relativeAttentionBiasOp.cpp
dsv3RouterGemmOp.cpp
customMoeRoutingOp.cpp
selectiveScanOp.cpp
userbuffersFinalizeOp.cpp
userbuffersTensor.cpp
virtualMemoryAllocator.cpp
weightOnlyQuantGemm.cpp
weightOnlyQuantOp.cpp
specDecOp.cpp
loraOp.cpp
finegrained_mixed_dtype_gemm_thop.cpp
tinygemm2.cpp
dsv3RopeOp.cpp
fusedGemmAllreduceOp.cpp)
set_property(TARGET th_common PROPERTY POSITION_INDEPENDENT_CODE ON)
target_link_libraries(
th_common PRIVATE ${TORCH_LIBRARIES} th_utils ${Python3_LIBRARIES}
${SHARED_TARGET} pg_utils)
if(USING_OSS_CUTLASS_LOW_LATENCY_GEMM)
target_compile_definitions(th_common
PUBLIC "USING_OSS_CUTLASS_LOW_LATENCY_GEMM")
message(STATUS "Enable open source Cutlass low latency gemm kernel")
endif()
if(USING_OSS_CUTLASS_FP4_GEMM)
target_compile_definitions(th_common PUBLIC USING_OSS_CUTLASS_FP4_GEMM)
endif()
if(USING_OSS_CUTLASS_MOE_GEMM)
target_compile_definitions(th_common PUBLIC USING_OSS_CUTLASS_MOE_GEMM)
endif()
if(ENABLE_CUBLASLT_FP4_GEMM)
target_compile_definitions(th_common PUBLIC ENABLE_CUBLASLT_FP4_GEMM)
target_link_libraries(th_common PRIVATE ${CUBLASLT_LIB})
endif()
if(ENABLE_MULTI_DEVICE)
target_include_directories(th_common PUBLIC ${MPI_C_INCLUDE_DIRS})
target_link_libraries(th_common PRIVATE ${MPI_C_LIBRARIES} ${NCCL_LIB}
CUDA::nvml)
endif()
if(NOT WIN32)
set_target_properties(
th_common PROPERTIES BUILD_RPATH "$ORIGIN;$ORIGIN/../../nvidia/nccl/lib")
set_target_properties(
th_common PROPERTIES LINK_FLAGS "${AS_NEEDED_FLAG} ${UNDEFINED_FLAG}")
else()
target_link_libraries(th_common PRIVATE context_attention_src)
endif()