TensorRT-LLMs/cpp/kernels/fmha_v2/train_ops/Makefile
tburt-nv 6147452158
[https://nvbugs/4141427][chore] Add more details to LICENSE file (#9881)
Signed-off-by: Tyler Burt <195370667+tburt-nv@users.noreply.github.com>
2025-12-13 08:35:31 +08:00

101 lines
3.8 KiB
Makefile

# ##################################################################################################
# SPDX-FileCopyrightText: Copyright (c) 2011-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ##################################################################################################
# #################################################################################################
# Compilers and build options.
# #################################################################################################
# The CUDA toolkit.
CUDA ?= /usr/local/cuda
# The path to cudnn.
CUDNN ?= /usr/local/cudnn
IS_CUDA11 ?= 1
# The C++ compiler.
CXX ?= g++
# The CUDA compiler.
NVCC ?= $(CUDA)/bin/nvcc
# Flags to compile C++ files.
CXX_FLAGS = $(CXXFLAGS) -O3 -std=c++14 -g -DSAMPLES
# Flags to compile CUDA files.
#NVCC_FLAGS = $(CUDAFLAGS) -O3 -std=c++14 -g -lineinfo -ccbin $(CXX) -Xptxas=-v -use_fast_math
NVCC_FLAGS = $(CUDAFLAGS) -O3 -std=c++14 -g -lineinfo -ccbin $(CXX) -use_fast_math
# The different preprocessor definitions.
PREPROCESSOR_FLAGS = -DMYGEN
# Do we want to enable the ordering for the softmax-summation to produce bit exact results.
PREPROCESSOR_FLAGS += -DUSE_SAME_SUM_ORDER_IN_SOFTMAX_AS_REF_CODE
# Do we want to enable the fast trick to skip F2I and I2F.
PREPROCESSOR_FLAGS += -DUSE_I2F_EMULATION_TRICK
PREPROCESSOR_FLAGS += -DUSE_F2I_EMULATION_TRICK
# Output the P matrix and/or S = softmax(P) for debugging.
# PREPROCESSOR_FLAGS += -DSTORE_P
# PREPROCESSOR_FLAGS += -DSTORE_S
# Append the preprocessor flags to the compilation flags.
CXX_FLAGS += $(PREPROCESSOR_FLAGS)
NVCC_FLAGS += $(PREPROCESSOR_FLAGS)
# The include directories.
INCLUDE_DIRS += -I../src
GENCODE_SM75 = -gencode=arch=compute_75,code=\"sm_75\"
GENCODE_SM80 = -gencode=arch=compute_80,code=\"sm_80\"
# #################################################################################################
# The object files.
# #################################################################################################
OBJECTS = obj/fmha_fprop_v2_fp16_128_64_kernel.sm80.cu.o
OBJECTS += obj/fmha_fprop_v2_fp16_256_64_kernel.sm80.cu.o
OBJECTS += obj/fmha_fprop_v2_fp16_384_64_kernel.sm80.cu.o
OBJECTS += obj/fmha_fprop_v2_fp16_512_64_kernel.sm80.cu.o
OBJECTS += obj/fmha_dgrad_v2_fp16_128_64_kernel.sm80.cu.o
OBJECTS += obj/fmha_dgrad_v2_fp16_256_64_kernel.sm80.cu.o
OBJECTS += obj/fmha_dgrad_v2_fp16_384_64_kernel.sm80.cu.o
OBJECTS += obj/fmha_dgrad_v2_fp16_512_64_kernel.sm80.cu.o
GENCODES = $(GENCODE_SM75) $(GENCODE_SM80)
# #################################################################################################
# R U L E S
# #################################################################################################
.PHONY: all
all:
$(MAKE) dirs
$(MAKE) $(OBJECTS)
dirs:
if [ ! -d obj ]; then mkdir -p obj; fi
clean:
rm -rf obj
###################################################################################################
obj/%.sm75.cu.o: ./%.sm75.cu ./*.h ../src/*.h ../src/fmha/*.h
$(NVCC) $(NVCC_FLAGS) $(GENCODE_SM75) $(INCLUDE_DIRS) -c -o $@ $<
obj/%.sm80.cu.o: ./%.sm80.cu ./*.h ../src/*.h ../src/fmha/*.h
$(NVCC) $(NVCC_FLAGS) $(GENCODE_SM80) $(INCLUDE_DIRS) -c -o $@ $<
###################################################################################################