mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
* add fmha repo Signed-off-by: Qidi Sang <200703406+qsang-nv@users.noreply.github.com> * fix format Signed-off-by: Qidi Sang <200703406+qsang-nv@users.noreply.github.com> * fix code style Signed-off-by: Qidi Sang <200703406+qsang-nv@users.noreply.github.com> * fix header Signed-off-by: Qidi Sang <200703406+qsang-nv@users.noreply.github.com> * fix header kernel_traits.h Signed-off-by: Qidi Sang <200703406+qsang-nv@users.noreply.github.com> * add .gitignore file Signed-off-by: Qidi Sang <200703406+qsang-nv@users.noreply.github.com> * add SLIDING_WINDOW_ATTENTION Signed-off-by: Qidi Sang <200703406+qsang-nv@users.noreply.github.com> * fix style Signed-off-by: Qidi Sang <200703406+qsang-nv@users.noreply.github.com> * fix format Signed-off-by: Qidi Sang <200703406+qsang-nv@users.noreply.github.com> * update setup.py Signed-off-by: Qidi Sang <200703406+qsang-nv@users.noreply.github.com> * update build_wheel.py Signed-off-by: Qidi Sang <200703406+qsang-nv@users.noreply.github.com> --------- Signed-off-by: Qidi Sang <200703406+qsang-nv@users.noreply.github.com> Signed-off-by: qsang-nv <200703406+qsang-nv@users.noreply.github.com>
101 lines
4.1 KiB
Makefile
101 lines
4.1 KiB
Makefile
# ##################################################################################################
|
|
# Copyright (c) 2011-2023, NVIDIA CORPORATION. All rights reserved.
|
|
#
|
|
# Redistribution and use in source and binary forms, with or without modification, are not permit-
|
|
# ted.
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
|
|
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
|
# FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
|
|
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFIT;
|
|
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
|
# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
#
|
|
# ##################################################################################################
|
|
|
|
# #################################################################################################
|
|
# Compilers and build options.
|
|
# #################################################################################################
|
|
|
|
# The CUDA toolkit.
|
|
CUDA ?= /usr/local/cuda
|
|
# The path to cudnn.
|
|
CUDNN ?= /usr/local/cudnn
|
|
|
|
IS_CUDA11 ?= 1
|
|
|
|
# The C++ compiler.
|
|
CXX ?= g++
|
|
# The CUDA compiler.
|
|
NVCC ?= $(CUDA)/bin/nvcc
|
|
|
|
# Flags to compile C++ files.
|
|
CXX_FLAGS = $(CXXFLAGS) -O3 -std=c++14 -g -DSAMPLES
|
|
# Flags to compile CUDA files.
|
|
#NVCC_FLAGS = $(CUDAFLAGS) -O3 -std=c++14 -g -lineinfo -ccbin $(CXX) -Xptxas=-v -use_fast_math
|
|
NVCC_FLAGS = $(CUDAFLAGS) -O3 -std=c++14 -g -lineinfo -ccbin $(CXX) -use_fast_math
|
|
|
|
# The different preprocessor definitions.
|
|
PREPROCESSOR_FLAGS = -DMYGEN
|
|
|
|
# Do we want to enable the ordering for the softmax-summation to produce bit exact results.
|
|
PREPROCESSOR_FLAGS += -DUSE_SAME_SUM_ORDER_IN_SOFTMAX_AS_REF_CODE
|
|
|
|
# Do we want to enable the fast trick to skip F2I and I2F.
|
|
PREPROCESSOR_FLAGS += -DUSE_I2F_EMULATION_TRICK
|
|
PREPROCESSOR_FLAGS += -DUSE_F2I_EMULATION_TRICK
|
|
|
|
# Output the P matrix and/or S = softmax(P) for debugging.
|
|
# PREPROCESSOR_FLAGS += -DSTORE_P
|
|
# PREPROCESSOR_FLAGS += -DSTORE_S
|
|
|
|
# Append the preprocessor flags to the compilation flags.
|
|
CXX_FLAGS += $(PREPROCESSOR_FLAGS)
|
|
NVCC_FLAGS += $(PREPROCESSOR_FLAGS)
|
|
|
|
# The include directories.
|
|
INCLUDE_DIRS += -I../src
|
|
|
|
GENCODE_SM75 = -gencode=arch=compute_75,code=\"sm_75\"
|
|
GENCODE_SM80 = -gencode=arch=compute_80,code=\"sm_80\"
|
|
|
|
# #################################################################################################
|
|
# The object files.
|
|
# #################################################################################################
|
|
OBJECTS = obj/fmha_fprop_v2_fp16_128_64_kernel.sm80.cu.o
|
|
OBJECTS += obj/fmha_fprop_v2_fp16_256_64_kernel.sm80.cu.o
|
|
OBJECTS += obj/fmha_fprop_v2_fp16_384_64_kernel.sm80.cu.o
|
|
OBJECTS += obj/fmha_fprop_v2_fp16_512_64_kernel.sm80.cu.o
|
|
|
|
OBJECTS += obj/fmha_dgrad_v2_fp16_128_64_kernel.sm80.cu.o
|
|
OBJECTS += obj/fmha_dgrad_v2_fp16_256_64_kernel.sm80.cu.o
|
|
OBJECTS += obj/fmha_dgrad_v2_fp16_384_64_kernel.sm80.cu.o
|
|
OBJECTS += obj/fmha_dgrad_v2_fp16_512_64_kernel.sm80.cu.o
|
|
|
|
GENCODES = $(GENCODE_SM75) $(GENCODE_SM80)
|
|
|
|
# #################################################################################################
|
|
# R U L E S
|
|
# #################################################################################################
|
|
|
|
.PHONY: all
|
|
all:
|
|
$(MAKE) dirs
|
|
$(MAKE) $(OBJECTS)
|
|
|
|
dirs:
|
|
if [ ! -d obj ]; then mkdir -p obj; fi
|
|
|
|
clean:
|
|
rm -rf obj
|
|
|
|
###################################################################################################
|
|
|
|
obj/%.sm75.cu.o: ./%.sm75.cu ./*.h ../src/*.h ../src/fmha/*.h
|
|
$(NVCC) $(NVCC_FLAGS) $(GENCODE_SM75) $(INCLUDE_DIRS) -c -o $@ $<
|
|
obj/%.sm80.cu.o: ./%.sm80.cu ./*.h ../src/*.h ../src/fmha/*.h
|
|
$(NVCC) $(NVCC_FLAGS) $(GENCODE_SM80) $(INCLUDE_DIRS) -c -o $@ $<
|
|
###################################################################################################
|