mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-02-04 18:21:52 +08:00
[None][fix] Proper conditional compilation of sm10x cubins (#10839)
Signed-off-by: Yuan Tong <13075180+tongyuantongyu@users.noreply.github.com>
This commit is contained in:
parent
c26a8f764c
commit
30348b2753
284
.cmake-format.json
Normal file
284
.cmake-format.json
Normal file
@ -0,0 +1,284 @@
|
||||
{
|
||||
"_help_parse": "Options affecting listfile parsing",
|
||||
"parse": {
|
||||
"_help_additional_commands": [
|
||||
"Specify structure for custom cmake functions"
|
||||
],
|
||||
"additional_commands": {
|
||||
"filter_source_cuda_architectures": {
|
||||
"flags": [
|
||||
"IMPLICIT_FAMILY"
|
||||
],
|
||||
"kwargs": {
|
||||
"SOURCE_LIST": "1",
|
||||
"TARGET": "1",
|
||||
"ARCHS": "+"
|
||||
}
|
||||
}
|
||||
},
|
||||
"_help_vartags": [
|
||||
"Specify variable tags."
|
||||
],
|
||||
"vartags": [],
|
||||
"_help_proptags": [
|
||||
"Specify property tags."
|
||||
],
|
||||
"proptags": []
|
||||
},
|
||||
"_help_format": "Options affecting formatting.",
|
||||
"format": {
|
||||
"_help_line_width": [
|
||||
"How wide to allow formatted cmake files"
|
||||
],
|
||||
"line_width": 80,
|
||||
"_help_tab_size": [
|
||||
"How many spaces to tab for indent"
|
||||
],
|
||||
"tab_size": 2,
|
||||
"_help_max_subgroups_hwrap": [
|
||||
"If an argument group contains more than this many sub-groups",
|
||||
"(parg or kwarg groups) then force it to a vertical layout."
|
||||
],
|
||||
"max_subgroups_hwrap": 2,
|
||||
"_help_max_pargs_hwrap": [
|
||||
"If a positional argument group contains more than this many",
|
||||
"arguments, then force it to a vertical layout."
|
||||
],
|
||||
"max_pargs_hwrap": 6,
|
||||
"_help_max_rows_cmdline": [
|
||||
"If a cmdline positional group consumes more than this many",
|
||||
"lines without nesting, then invalidate the layout (and nest)"
|
||||
],
|
||||
"max_rows_cmdline": 2,
|
||||
"_help_separate_ctrl_name_with_space": [
|
||||
"If true, separate flow control names from their parentheses",
|
||||
"with a space"
|
||||
],
|
||||
"separate_ctrl_name_with_space": false,
|
||||
"_help_separate_fn_name_with_space": [
|
||||
"If true, separate function names from parentheses with a",
|
||||
"space"
|
||||
],
|
||||
"separate_fn_name_with_space": false,
|
||||
"_help_dangle_parens": [
|
||||
"If a statement is wrapped to more than one line, than dangle",
|
||||
"the closing parenthesis on its own line."
|
||||
],
|
||||
"dangle_parens": false,
|
||||
"_help_dangle_align": [
|
||||
"If the trailing parenthesis must be 'dangled' on its on",
|
||||
"line, then align it to this reference: `prefix`: the start",
|
||||
"of the statement, `prefix-indent`: the start of the",
|
||||
"statement, plus one indentation level, `child`: align to",
|
||||
"the column of the arguments"
|
||||
],
|
||||
"dangle_align": "prefix",
|
||||
"_help_min_prefix_chars": [
|
||||
"If the statement spelling length (including space and",
|
||||
"parenthesis) is smaller than this amount, then force reject",
|
||||
"nested layouts."
|
||||
],
|
||||
"min_prefix_chars": 4,
|
||||
"_help_max_prefix_chars": [
|
||||
"If the statement spelling length (including space and",
|
||||
"parenthesis) is larger than the tab width by more than this",
|
||||
"amount, then force reject un-nested layouts."
|
||||
],
|
||||
"max_prefix_chars": 10,
|
||||
"_help_max_lines_hwrap": [
|
||||
"If a candidate layout is wrapped horizontally but it exceeds",
|
||||
"this many lines, then reject the layout."
|
||||
],
|
||||
"max_lines_hwrap": 2,
|
||||
"_help_line_ending": [
|
||||
"What style line endings to use in the output."
|
||||
],
|
||||
"line_ending": "unix",
|
||||
"_help_command_case": [
|
||||
"Format command names consistently as 'lower' or 'upper' case"
|
||||
],
|
||||
"command_case": "canonical",
|
||||
"_help_keyword_case": [
|
||||
"Format keywords consistently as 'lower' or 'upper' case"
|
||||
],
|
||||
"keyword_case": "unchanged",
|
||||
"_help_always_wrap": [
|
||||
"A list of command names which should always be wrapped"
|
||||
],
|
||||
"always_wrap": [],
|
||||
"_help_enable_sort": [
|
||||
"If true, the argument lists which are known to be sortable",
|
||||
"will be sorted lexicographicall"
|
||||
],
|
||||
"enable_sort": true,
|
||||
"_help_autosort": [
|
||||
"If true, the parsers may infer whether or not an argument",
|
||||
"list is sortable (without annotation)."
|
||||
],
|
||||
"autosort": false,
|
||||
"_help_require_valid_layout": [
|
||||
"By default, if cmake-format cannot successfully fit",
|
||||
"everything into the desired linewidth it will apply the",
|
||||
"last, most agressive attempt that it made. If this flag is",
|
||||
"True, however, cmake-format will print error, exit with non-",
|
||||
"zero status code, and write-out nothing"
|
||||
],
|
||||
"require_valid_layout": false,
|
||||
"_help_layout_passes": [
|
||||
"A dictionary mapping layout nodes to a list of wrap",
|
||||
"decisions. See the documentation for more information."
|
||||
],
|
||||
"layout_passes": {}
|
||||
},
|
||||
"_help_markup": "Options affecting comment reflow and formatting.",
|
||||
"markup": {
|
||||
"_help_bullet_char": [
|
||||
"What character to use for bulleted lists"
|
||||
],
|
||||
"bullet_char": "*",
|
||||
"_help_enum_char": [
|
||||
"What character to use as punctuation after numerals in an",
|
||||
"enumerated list"
|
||||
],
|
||||
"enum_char": ".",
|
||||
"_help_first_comment_is_literal": [
|
||||
"If comment markup is enabled, don't reflow the first comment",
|
||||
"block in each listfile. Use this to preserve formatting of",
|
||||
"your copyright/license statements."
|
||||
],
|
||||
"first_comment_is_literal": false,
|
||||
"_help_literal_comment_pattern": [
|
||||
"If comment markup is enabled, don't reflow any comment block",
|
||||
"which matches this (regex) pattern. Default is `None`",
|
||||
"(disabled)."
|
||||
],
|
||||
"literal_comment_pattern": null,
|
||||
"_help_fence_pattern": [
|
||||
"Regular expression to match preformat fences in comments",
|
||||
"default= ``r'^\\s*([`~]{3}[`~]*)(.*)$'``"
|
||||
],
|
||||
"fence_pattern": "^\\s*([`~]{3}[`~]*)(.*)$",
|
||||
"_help_ruler_pattern": [
|
||||
"Regular expression to match rulers in comments default=",
|
||||
"``r'^\\s*[^\\w\\s]{3}.*[^\\w\\s]{3}$'``"
|
||||
],
|
||||
"ruler_pattern": "^\\s*[^\\w\\s]{3}.*[^\\w\\s]{3}$",
|
||||
"_help_explicit_trailing_pattern": [
|
||||
"If a comment line matches starts with this pattern then it",
|
||||
"is explicitly a trailing comment for the preceeding",
|
||||
"argument. Default is '#<'"
|
||||
],
|
||||
"explicit_trailing_pattern": "#<",
|
||||
"_help_hashruler_min_length": [
|
||||
"If a comment line starts with at least this many consecutive",
|
||||
"hash characters, then don't lstrip() them off. This allows",
|
||||
"for lazy hash rulers where the first hash char is not",
|
||||
"separated by space"
|
||||
],
|
||||
"hashruler_min_length": 10,
|
||||
"_help_canonicalize_hashrulers": [
|
||||
"If true, then insert a space between the first hash char and",
|
||||
"remaining hash chars in a hash ruler, and normalize its",
|
||||
"length to fill the column"
|
||||
],
|
||||
"canonicalize_hashrulers": true,
|
||||
"_help_enable_markup": [
|
||||
"enable comment markup parsing and reflow"
|
||||
],
|
||||
"enable_markup": true
|
||||
},
|
||||
"_help_lint": "Options affecting the linter",
|
||||
"lint": {
|
||||
"_help_disabled_codes": [
|
||||
"a list of lint codes to disable"
|
||||
],
|
||||
"disabled_codes": [],
|
||||
"_help_function_pattern": [
|
||||
"regular expression pattern describing valid function names"
|
||||
],
|
||||
"function_pattern": "[0-9a-z_]+",
|
||||
"_help_macro_pattern": [
|
||||
"regular expression pattern describing valid macro names"
|
||||
],
|
||||
"macro_pattern": "[0-9A-Z_]+",
|
||||
"_help_global_var_pattern": [
|
||||
"regular expression pattern describing valid names for",
|
||||
"variables with global (cache) scope"
|
||||
],
|
||||
"global_var_pattern": "[A-Z][0-9A-Z_]+",
|
||||
"_help_internal_var_pattern": [
|
||||
"regular expression pattern describing valid names for",
|
||||
"variables with global scope (but internal semantic)"
|
||||
],
|
||||
"internal_var_pattern": "_[A-Z][0-9A-Z_]+",
|
||||
"_help_local_var_pattern": [
|
||||
"regular expression pattern describing valid names for",
|
||||
"variables with local scope"
|
||||
],
|
||||
"local_var_pattern": "[a-z][a-z0-9_]+",
|
||||
"_help_private_var_pattern": [
|
||||
"regular expression pattern describing valid names for",
|
||||
"privatedirectory variables"
|
||||
],
|
||||
"private_var_pattern": "_[0-9a-z_]+",
|
||||
"_help_public_var_pattern": [
|
||||
"regular expression pattern describing valid names for public",
|
||||
"directory variables"
|
||||
],
|
||||
"public_var_pattern": "[A-Z][0-9A-Z_]+",
|
||||
"_help_argument_var_pattern": [
|
||||
"regular expression pattern describing valid names for",
|
||||
"function/macro arguments and loop variables."
|
||||
],
|
||||
"argument_var_pattern": "[a-z][a-z0-9_]+",
|
||||
"_help_keyword_pattern": [
|
||||
"regular expression pattern describing valid names for",
|
||||
"keywords used in functions or macros"
|
||||
],
|
||||
"keyword_pattern": "[A-Z][0-9A-Z_]+",
|
||||
"_help_max_conditionals_custom_parser": [
|
||||
"In the heuristic for C0201, how many conditionals to match",
|
||||
"within a loop in before considering the loop a parser."
|
||||
],
|
||||
"max_conditionals_custom_parser": 2,
|
||||
"_help_min_statement_spacing": [
|
||||
"Require at least this many newlines between statements"
|
||||
],
|
||||
"min_statement_spacing": 1,
|
||||
"_help_max_statement_spacing": [
|
||||
"Require no more than this many newlines between statements"
|
||||
],
|
||||
"max_statement_spacing": 2,
|
||||
"max_returns": 6,
|
||||
"max_branches": 12,
|
||||
"max_arguments": 5,
|
||||
"max_localvars": 15,
|
||||
"max_statements": 50
|
||||
},
|
||||
"_help_encode": "Options affecting file encoding",
|
||||
"encode": {
|
||||
"_help_emit_byteorder_mark": [
|
||||
"If true, emit the unicode byte-order mark (BOM) at the start",
|
||||
"of the file"
|
||||
],
|
||||
"emit_byteorder_mark": false,
|
||||
"_help_input_encoding": [
|
||||
"Specify the encoding of the input file. Defaults to utf-8"
|
||||
],
|
||||
"input_encoding": "utf-8",
|
||||
"_help_output_encoding": [
|
||||
"Specify the encoding of the output file. Defaults to utf-8.",
|
||||
"Note that cmake only claims to support utf-8 so be careful",
|
||||
"when using anything else"
|
||||
],
|
||||
"output_encoding": "utf-8"
|
||||
},
|
||||
"_help_misc": "Miscellaneous configurations options.",
|
||||
"misc": {
|
||||
"_help_per_command": [
|
||||
"A dictionary containing any per-command configuration",
|
||||
"overrides. Currently only `command_case` is supported."
|
||||
],
|
||||
"per_command": {}
|
||||
}
|
||||
}
|
||||
@ -15,10 +15,207 @@
|
||||
# the License.
|
||||
#
|
||||
|
||||
#[=======================================================================[.rst:
|
||||
CudaConfiguration
|
||||
-----------------
|
||||
|
||||
CUDA compiler and architecture configuration for TensorRT-LLM.
|
||||
|
||||
This module provides functions and macros to configure the CUDA compiler,
|
||||
manage CUDA architectures, and filter source files based on target
|
||||
architectures. It is tailored to meet TensorRT-LLM's specific requirements
|
||||
for optimized kernel compilation across multiple GPU generations.
|
||||
|
||||
Macros
|
||||
^^^^^^
|
||||
|
||||
.. command:: setup_cuda_compiler
|
||||
|
||||
Detects and validates the CUDA compiler::
|
||||
|
||||
setup_cuda_compiler()
|
||||
|
||||
This macro determines the CUDA compiler version before enabling the CUDA
|
||||
language extension. It requires CUDA version 11.2 or later.
|
||||
|
||||
The macro sets ``CMAKE_CUDA_COMPILER_VERSION`` upon successful detection.
|
||||
|
||||
Functions
|
||||
^^^^^^^^^
|
||||
|
||||
.. command:: setup_cuda_architectures
|
||||
|
||||
Initializes and normalizes ``CMAKE_CUDA_ARCHITECTURES``::
|
||||
|
||||
setup_cuda_architectures()
|
||||
|
||||
This function processes the ``CMAKE_CUDA_ARCHITECTURES`` variable and
|
||||
configures architecture-specific compilation settings. This function should
|
||||
be called after enabling the CUDA language extension.
|
||||
|
||||
**Special Values for CMAKE_CUDA_ARCHITECTURES:**
|
||||
|
||||
``native``
|
||||
Resolves to the highest available architecture on the system.
|
||||
Falls back to ``all`` if detection fails.
|
||||
|
||||
``all`` or unset
|
||||
Resolves to architectures TensorRT-LLM is optimized for and the
|
||||
compiler supports (80, 86, 89, 90, 100, 103, 120 depending on CUDA version).
|
||||
|
||||
``all-major``
|
||||
Unsupported. Results in a fatal error.
|
||||
|
||||
**Architecture Processing:**
|
||||
|
||||
* PTX is never included in the result binary (``-virtual`` rejected).
|
||||
* The ``-real`` suffix is automatically added to exclude PTX.
|
||||
* Accelerated targets (``-a`` suffix) are used for SM 90+.
|
||||
* On CUDA 12.9+, family targets (``-f`` suffix) are used for SM 100+.
|
||||
|
||||
**Output Variables (set in parent scope):**
|
||||
|
||||
``CMAKE_CUDA_ARCHITECTURES``
|
||||
Normalized list with appropriate suffixes (e.g., ``80-real``, ``90a-real``,
|
||||
``100f-real``).
|
||||
|
||||
``CMAKE_CUDA_ARCHITECTURES_ORIG``
|
||||
Original list of enabled architectures without suffixes.
|
||||
|
||||
``CMAKE_CUDA_ARCHITECTURES_FAMILIES``
|
||||
List of family architectures (e.g., ``100f``, ``120f``).
|
||||
|
||||
``CMAKE_CUDA_ARCHITECTURES_HAS_FAMILIES``
|
||||
Boolean indicating if family targets are supported.
|
||||
|
||||
``CMAKE_CUDA_MIN_ARCHITECTURE_HAS_ACCEL``
|
||||
Minimum architecture supporting accelerated (``-a``) suffix.
|
||||
|
||||
``CMAKE_CUDA_MIN_ARCHITECTURE_HAS_FAMILY``
|
||||
Minimum architecture supporting family (``-f``) suffix.
|
||||
|
||||
.. command:: add_cuda_architectures
|
||||
|
||||
Appends CUDA architectures to an existing target::
|
||||
|
||||
add_cuda_architectures(<target> <arch1> [<arch2> ...])
|
||||
|
||||
Adds the specified architectures to ``<target>``'s ``CUDA_ARCHITECTURES``
|
||||
property. The ``-a`` suffix is automatically added for supported
|
||||
architectures. Architectures are only added if they were explicitly
|
||||
requested by the user in ``CMAKE_CUDA_ARCHITECTURES_ORIG``.
|
||||
|
||||
.. command:: set_cuda_architectures
|
||||
|
||||
Sets CUDA architectures for a target::
|
||||
|
||||
set_cuda_architectures(<target> <arch1> [<arch2> ...])
|
||||
|
||||
Replaces the ``CUDA_ARCHITECTURES`` property of ``<target>`` with the
|
||||
specified architectures.
|
||||
|
||||
**Architecture Specification:**
|
||||
|
||||
* Architectures may include the ``f`` suffix for family-conditional
|
||||
compilation (e.g., ``100f``).
|
||||
* Non-family architectures are only added if explicitly requested.
|
||||
* Family architectures are only added if requested architectures would
|
||||
enable compilation for that family.
|
||||
|
||||
If no architectures are enabled for the target, it compiles with
|
||||
``PLACEHOLDER_KERNELS`` macro defined. The kernel source shall compile
|
||||
with any architecture if ``PLACEHOLDER_KERNELS`` macro is defined.
|
||||
|
||||
.. command:: filter_source_cuda_architectures
|
||||
|
||||
Filters source files based on enabled CUDA architectures::
|
||||
|
||||
filter_source_cuda_architectures(
|
||||
SOURCE_LIST <variable>
|
||||
TARGET <target>
|
||||
ARCHS <arch1> [<arch2> ...]
|
||||
[IMPLICIT_FAMILY]
|
||||
)
|
||||
|
||||
Removes source files targeting disabled CUDA architectures from the
|
||||
source list. Files are matched by patterns like ``sm80``, ``sm_80``,
|
||||
``SM80``, etc. in their filenames (for ``.cu`` and ``cubin.cpp`` files).
|
||||
|
||||
``SOURCE_LIST <variable>``
|
||||
Name of the variable containing the list of source files.
|
||||
Modified in place to remove filtered files.
|
||||
|
||||
``TARGET <target>``
|
||||
Target to add compile definitions to. If the target does not exist,
|
||||
an INTERFACE library will be created.
|
||||
|
||||
``ARCHS <arch1> [<arch2> ...]``
|
||||
List of architectures to check. May include ``f`` suffix.
|
||||
|
||||
``IMPLICIT_FAMILY``
|
||||
When set, treats architectures >= ``CMAKE_CUDA_MIN_ARCHITECTURE_HAS_FAMILY``
|
||||
as implicitly family-enabled.
|
||||
|
||||
**Defined Macros:**
|
||||
|
||||
For each filtered architecture, a compile definition ``EXCLUDE_SM_<ARCH>``
|
||||
(or ``EXCLUDE_SM_<ARCH>F`` for family architectures) is added to ``<target>``.
|
||||
|
||||
Example
|
||||
^^^^^^^
|
||||
|
||||
.. code-block:: cmake
|
||||
|
||||
include(cuda_configuration)
|
||||
|
||||
# Setup compiler and detect version
|
||||
setup_cuda_compiler()
|
||||
|
||||
# enable_language, or project(project_name LANGUAGES CUDA)
|
||||
# must be called after setup_cuda_compiler() and before
|
||||
# setup_cuda_architectures()
|
||||
enable_language(CUDA)
|
||||
|
||||
# Configure architectures (uses CMAKE_CUDA_ARCHITECTURES if set)
|
||||
setup_cuda_architectures()
|
||||
|
||||
# Add additional architecture to compile for, if it is beneficial.
|
||||
# e.g. Utilizing native FP8 support available in sm89 (Ada)
|
||||
# but not in sm86 (Ampere)
|
||||
# Note: The kernel source must still compiles for all the architectures,
|
||||
# by using less performant implementation.
|
||||
add_library(my_kernels_fp8 STATIC kernels.cu)
|
||||
add_cuda_architectures(my_kernels_fp8 89)
|
||||
|
||||
# Set specific architecture this source should compile for.
|
||||
# e.g. Kernels using WGMMA instructions
|
||||
# Note: The kernel source must still compiles for other architectures when
|
||||
# ``PLACEHOLDER_KERNELS`` macro is defined.
|
||||
add_library(my_kernels_sm90_only STATIC kernels.cu)
|
||||
set_cuda_architectures(my_kernels_sm90_only 90)
|
||||
|
||||
# Filter sources for disabled architectures
|
||||
set(KERNEL_SOURCES
|
||||
kernel_sm80.cubin.cpp
|
||||
kernel_sm90.cubin.cpp
|
||||
kernel_sm100.cubin.cpp
|
||||
)
|
||||
filter_source_cuda_architectures(
|
||||
SOURCE_LIST KERNEL_SOURCES
|
||||
TARGET my_kernel_interface
|
||||
ARCHS 80 90 100
|
||||
)
|
||||
# ``my_kernel_interface`` target is created with definitions to exclude
|
||||
# disabled architectures.
|
||||
|
||||
#]=======================================================================]
|
||||
|
||||
#[[
|
||||
Determine CUDA version before enabling the language extension
|
||||
check_language(CUDA) clears CMAKE_CUDA_HOST_COMPILER if CMAKE_CUDA_COMPILER
|
||||
is not set
|
||||
#]]
|
||||
macro(setup_cuda_compiler)
|
||||
# Determine CUDA version before enabling the language extension
|
||||
# check_language(CUDA) clears CMAKE_CUDA_HOST_COMPILER if CMAKE_CUDA_COMPILER
|
||||
# is not set
|
||||
include(CheckLanguage)
|
||||
if(NOT CMAKE_CUDA_COMPILER AND CMAKE_CUDA_HOST_COMPILER)
|
||||
set(CMAKE_CUDA_HOST_COMPILER_BACKUP ${CMAKE_CUDA_HOST_COMPILER})
|
||||
@ -70,25 +267,28 @@ macro(setup_cuda_compiler)
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
function(setup_cuda_architectures)
|
||||
# cmake-format: off
|
||||
# Initialize and normalize CMAKE_CUDA_ARCHITECTURES.
|
||||
# Special values:
|
||||
# * `native` is resolved to HIGHEST available architecture.
|
||||
# * Fallback to `all` if detection failed.
|
||||
# * `all`/unset is resolved to a set of architectures we optimized for and compiler supports.
|
||||
# * `all-major` is unsupported.
|
||||
# Numerical architectures:
|
||||
# * PTX is never included in result binary.
|
||||
# * `*-virtual` architectures are therefore rejected.
|
||||
# * `-real` suffix is automatically added to exclude PTX.
|
||||
# * Always use accelerated (`-a` suffix) target for supported architectures.
|
||||
# * On CUDA 12.9 or newer, family (`-f` suffix) target will be used for supported architectures to reduce number of
|
||||
# targets to compile for.
|
||||
# * Extra architectures can be requested via add_cuda_architectures
|
||||
# for kernels that benefit from arch specific features.
|
||||
# cmake-format: on
|
||||
#[[
|
||||
Initialize and normalize CMAKE_CUDA_ARCHITECTURES.
|
||||
|
||||
Special values:
|
||||
|
||||
* `native` is resolved to HIGHEST available architecture.
|
||||
* Fallback to `all` if detection failed.
|
||||
* `all`/unset is resolved to a set of architectures we optimized for and compiler supports.
|
||||
* `all-major` is unsupported.
|
||||
|
||||
Numerical architectures:
|
||||
|
||||
* PTX is never included in result binary.
|
||||
* `*-virtual` architectures are therefore rejected.
|
||||
* `-real` suffix is automatically added to exclude PTX.
|
||||
* Always use accelerated (`-a` suffix) target for supported architectures.
|
||||
* On CUDA 12.9 or newer, family (`-f` suffix) target will be used for supported architectures to reduce number of
|
||||
targets to compile for.
|
||||
* Extra architectures can be requested via add_cuda_architectures
|
||||
for kernels that benefit from arch specific features.
|
||||
#]]
|
||||
function(setup_cuda_architectures)
|
||||
set(CMAKE_CUDA_ARCHITECTURES_RAW ${CMAKE_CUDA_ARCHITECTURES})
|
||||
if(CMAKE_CUDA_ARCHITECTURES_RAW STREQUAL "native")
|
||||
# Detect highest available compute capability
|
||||
@ -138,9 +338,6 @@ function(setup_cuda_architectures)
|
||||
message(FATAL_ERROR "Unrecognized CUDA architecture: ${CUDA_ARCH}")
|
||||
endif()
|
||||
endforeach()
|
||||
if("103" IN_LIST CMAKE_CUDA_ARCHITECTURES_CLEAN)
|
||||
list(APPEND CMAKE_CUDA_ARCHITECTURES_CLEAN "100")
|
||||
endif()
|
||||
list(REMOVE_DUPLICATES CMAKE_CUDA_ARCHITECTURES_CLEAN)
|
||||
set(CMAKE_CUDA_ARCHITECTURES_RAW ${CMAKE_CUDA_ARCHITECTURES_CLEAN})
|
||||
endif()
|
||||
@ -182,22 +379,29 @@ function(setup_cuda_architectures)
|
||||
endforeach()
|
||||
|
||||
# -a suffix supported from Hopper (90)
|
||||
set(MIN_ARCHITECTURE_HAS_ACCEL 90)
|
||||
set(CMAKE_CUDA_MIN_ARCHITECTURE_HAS_ACCEL 90)
|
||||
set(CMAKE_CUDA_MIN_ARCHITECTURE_HAS_ACCEL
|
||||
${CMAKE_CUDA_MIN_ARCHITECTURE_HAS_ACCEL}
|
||||
PARENT_SCOPE)
|
||||
# -f suffix supported from Blackwell (100) starting from CUDA 12.9.
|
||||
if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "12.9")
|
||||
set(MIN_ARCHITECTURE_HAS_FAMILY 100)
|
||||
set(CMAKE_CUDA_MIN_ARCHITECTURE_HAS_FAMILY 100)
|
||||
set(CMAKE_CUDA_ARCHITECTURES_HAS_FAMILIES
|
||||
ON
|
||||
PARENT_SCOPE)
|
||||
else()
|
||||
# -a provides no cross architecture compatibility, but luckily until CUDA
|
||||
# 12.8 We have only one architecture within each family >= 9.
|
||||
set(MIN_ARCHITECTURE_HAS_FAMILY 9999) # Effectively exclude all
|
||||
# architectures
|
||||
set(CMAKE_CUDA_MIN_ARCHITECTURE_HAS_FAMILY 9999) # Effectively exclude all
|
||||
# architectures
|
||||
set(CMAKE_CUDA_ARCHITECTURES_HAS_FAMILIES
|
||||
OFF
|
||||
PARENT_SCOPE)
|
||||
endif()
|
||||
set(CMAKE_CUDA_MIN_ARCHITECTURE_HAS_FAMILY
|
||||
${CMAKE_CUDA_MIN_ARCHITECTURE_HAS_FAMILY}
|
||||
PARENT_SCOPE)
|
||||
|
||||
# Compatibility low bounds: Always compile kernels for these architectures. 86
|
||||
# is enabled to avoid perf regression when using 80 kernels.
|
||||
set(ARCHITECTURES_COMPATIBILITY_BASE 80 86 90 100 120)
|
||||
@ -252,11 +456,11 @@ function(setup_cuda_architectures)
|
||||
set(CMAKE_CUDA_ARCHITECTURES_NORMALIZED)
|
||||
set(CMAKE_CUDA_ARCHITECTURES_FAMILIES)
|
||||
foreach(CUDA_ARCH IN LISTS CMAKE_CUDA_ARCHITECTURES_NORMALIZED_LIST)
|
||||
if(CUDA_ARCH GREATER_EQUAL ${MIN_ARCHITECTURE_HAS_FAMILY}
|
||||
if(CUDA_ARCH GREATER_EQUAL ${CMAKE_CUDA_MIN_ARCHITECTURE_HAS_FAMILY}
|
||||
AND NOT CUDA_ARCH IN_LIST ARCHITECTURES_NO_COMPATIBILITY)
|
||||
list(APPEND CMAKE_CUDA_ARCHITECTURES_NORMALIZED "${CUDA_ARCH}f-real")
|
||||
list(APPEND CMAKE_CUDA_ARCHITECTURES_FAMILIES "${CUDA_ARCH}f")
|
||||
elseif(CUDA_ARCH GREATER_EQUAL ${MIN_ARCHITECTURE_HAS_ACCEL})
|
||||
elseif(CUDA_ARCH GREATER_EQUAL ${CMAKE_CUDA_MIN_ARCHITECTURE_HAS_ACCEL})
|
||||
list(APPEND CMAKE_CUDA_ARCHITECTURES_NORMALIZED "${CUDA_ARCH}a-real")
|
||||
else()
|
||||
list(APPEND CMAKE_CUDA_ARCHITECTURES_NORMALIZED "${CUDA_ARCH}-real")
|
||||
@ -271,17 +475,15 @@ function(setup_cuda_architectures)
|
||||
PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
#[[
|
||||
Add CUDA architectures to target.
|
||||
-a suffix is added automatically for supported architectures.
|
||||
Architectures are added only if user explicitly requested support for that architecture.
|
||||
#]]
|
||||
function(add_cuda_architectures target)
|
||||
# cmake-format: off
|
||||
# Add CUDA architectures to target.
|
||||
# -a suffix is added automatically for supported architectures.
|
||||
# Architectures are added only if user explicitly requested support for that architecture.
|
||||
# cmake-format: on
|
||||
set(MIN_ARCHITECTURE_HAS_ACCEL 90)
|
||||
|
||||
foreach(CUDA_ARCH IN LISTS ARGN)
|
||||
if(${CUDA_ARCH} IN_LIST CMAKE_CUDA_ARCHITECTURES_ORIG)
|
||||
if(${CUDA_ARCH} GREATER_EQUAL ${MIN_ARCHITECTURE_HAS_ACCEL})
|
||||
if(${CUDA_ARCH} GREATER_EQUAL ${CMAKE_CUDA_MIN_ARCHITECTURE_HAS_ACCEL})
|
||||
set(REAL_CUDA_ARCH "${CUDA_ARCH}a-real")
|
||||
else()
|
||||
set(REAL_CUDA_ARCH "${CUDA_ARCH}-real")
|
||||
@ -294,18 +496,19 @@ function(add_cuda_architectures target)
|
||||
endforeach()
|
||||
endfunction()
|
||||
|
||||
function(set_cuda_architectures target)
|
||||
# cmake-format: off
|
||||
# Set CUDA architectures for a target.
|
||||
# -a suffix is added automatically for supported architectures.
|
||||
# Architectures passed in may be specified with -f suffix to build family conditional version of the kernel.
|
||||
# Non-family architectures are added only if user explicitly requested support for that architecture.
|
||||
# Family conditional architectures are only added if user requested architectures would enable compilation for it.
|
||||
# If user requested no architectures set on the target,
|
||||
# the target will be compiled with `PLACEHOLDER_KERNELS` macro defined.
|
||||
# cmake-format: on
|
||||
set(MIN_ARCHITECTURE_HAS_ACCEL 90)
|
||||
#[[
|
||||
Set CUDA architectures for a target.
|
||||
|
||||
-a suffix is added automatically for supported architectures.
|
||||
Architectures passed in may be specified with -f suffix to build family conditional version of the kernel.
|
||||
|
||||
Non-family architectures are added only if user explicitly requested support for that architecture.
|
||||
Family conditional architectures are only added if user requested architectures would enable compilation for it.
|
||||
|
||||
If user requested no architectures set on the target,
|
||||
the target will be compiled with `PLACEHOLDER_KERNELS` macro defined.
|
||||
#]]
|
||||
function(set_cuda_architectures target)
|
||||
set(CUDA_ARCHITECTURES "")
|
||||
foreach(CUDA_ARCH IN LISTS ARGN)
|
||||
if(${CUDA_ARCH} MATCHES "[0-9]+f")
|
||||
@ -326,7 +529,7 @@ function(set_cuda_architectures target)
|
||||
endforeach()
|
||||
endif()
|
||||
elseif(${CUDA_ARCH} IN_LIST CMAKE_CUDA_ARCHITECTURES_ORIG)
|
||||
if(${CUDA_ARCH} GREATER_EQUAL ${MIN_ARCHITECTURE_HAS_ACCEL})
|
||||
if(${CUDA_ARCH} GREATER_EQUAL ${CMAKE_CUDA_MIN_ARCHITECTURE_HAS_ACCEL})
|
||||
list(APPEND CUDA_ARCHITECTURES "${CUDA_ARCH}a-real")
|
||||
else()
|
||||
list(APPEND CUDA_ARCHITECTURES "${CUDA_ARCH}-real")
|
||||
@ -342,3 +545,153 @@ function(set_cuda_architectures target)
|
||||
${CUDA_ARCHITECTURES})
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
#[[
|
||||
Filter out source files targeting CUDA architectures not enabled.
|
||||
|
||||
Arguments:
|
||||
SOURCE_LIST - Name of the variable containing the list of source files to filter
|
||||
TARGET - Target to add compile definitions to. If the target does not exist,
|
||||
an INTERFACE library will be created.
|
||||
ARCHS - List of architectures to check and potentially filter
|
||||
IMPLICIT_FAMILY - Optional flag to enable implicit family mode
|
||||
|
||||
For each ARCH passed in:
|
||||
|
||||
- if IMPLICIT_FAMILY is not set:
|
||||
- if ARCH is not suffixed by f:
|
||||
if ARCH is not in CMAKE_CUDA_ARCHITECTURES_ORIG, source files containing "sm${ARCH}"
|
||||
but not "sm${ARCH}f" (case insensitive) will be excluded
|
||||
Macro "EXCLUDE_SM_${ARCH}" will be defined on TARGET
|
||||
- if ARCH is suffixed by f, NARCH is ARCH without f suffix:
|
||||
if ARCH is not in CMAKE_CUDA_ARCHITECTURES_FAMILIES, source files containing
|
||||
"sm${NARCH}f" (case insensitive) will be excluded
|
||||
Macro "EXCLUDE_SM_${NARCH}F" will be defined on TARGET
|
||||
|
||||
- if IMPLICIT_FAMILY is set:
|
||||
ARCH shall not suffixed by f.
|
||||
- if ARCH >= CMAKE_CUDA_MIN_ARCHITECTURE_HAS_FAMILY:
|
||||
if "${ARCH}f" is not in CMAKE_CUDA_ARCHITECTURES_FAMILIES,
|
||||
source files containing "sm${ARCH}" but not "sm${ARCH}a" (case insensitive) will be excluded
|
||||
Macro "EXCLUDE_SM_${ARCH}" (no F) will be defined on TARGET
|
||||
- else:
|
||||
if "${ARCH}" is not in CMAKE_CUDA_ARCHITECTURES_ORIG,
|
||||
source files containing "sm${ARCH}" (case insensitive) will be excluded
|
||||
Macro "EXCLUDE_SM_${ARCH}" will be defined on TARGET
|
||||
#]]
|
||||
function(filter_source_cuda_architectures)
|
||||
set(options IMPLICIT_FAMILY)
|
||||
set(oneValueArgs SOURCE_LIST TARGET)
|
||||
set(multiValueArgs ARCHS)
|
||||
|
||||
cmake_parse_arguments(PARSE_ARGV 0 arg "${options}" "${oneValueArgs}"
|
||||
"${multiValueArgs}")
|
||||
set(SOURCES "${${arg_SOURCE_LIST}}")
|
||||
|
||||
if(NOT TARGET ${arg_TARGET})
|
||||
add_library(${arg_TARGET} INTERFACE)
|
||||
endif()
|
||||
|
||||
# Determine if target is INTERFACE library to use correct visibility
|
||||
get_target_property(_target_type ${arg_TARGET} TYPE)
|
||||
if(_target_type STREQUAL "INTERFACE_LIBRARY")
|
||||
set(_compile_def_visibility INTERFACE)
|
||||
else()
|
||||
set(_compile_def_visibility PUBLIC)
|
||||
endif()
|
||||
|
||||
foreach(ARCH IN LISTS arg_ARCHS)
|
||||
set(SHOULD_FILTER FALSE)
|
||||
set(MATCH_PATTERN "")
|
||||
set(EXCLUDE_PATTERN "")
|
||||
set(ARCH_FOR_DEFINE "")
|
||||
|
||||
if(NOT arg_IMPLICIT_FAMILY)
|
||||
# Check if ARCH ends with 'f'
|
||||
string(REGEX MATCH "^(.+)f$" _has_f_suffix "${ARCH}")
|
||||
|
||||
if(_has_f_suffix)
|
||||
# ARCH is suffixed by 'f' (e.g., "100f")
|
||||
set(BASE_ARCH "${CMAKE_MATCH_1}")
|
||||
if(NOT "${ARCH}" IN_LIST CMAKE_CUDA_ARCHITECTURES_FAMILIES)
|
||||
set(SHOULD_FILTER TRUE)
|
||||
set(ARCH_FOR_DEFINE "${BASE_ARCH}F")
|
||||
# Match "sm${BASE_ARCH}f" - straightforward match, no exclusion
|
||||
# pattern needed
|
||||
set(MATCH_PATTERN ".*[Ss][Mm]_?${BASE_ARCH}f.*(cubin\.cpp|\.cu)$")
|
||||
endif()
|
||||
else()
|
||||
# ARCH is NOT suffixed by 'f' (e.g., "80")
|
||||
if(NOT "${ARCH}" IN_LIST CMAKE_CUDA_ARCHITECTURES_ORIG)
|
||||
set(SHOULD_FILTER TRUE)
|
||||
set(ARCH_FOR_DEFINE "${ARCH}")
|
||||
# Match "sm${ARCH}" but NOT "sm${ARCH}f"
|
||||
set(MATCH_PATTERN ".*[Ss][Mm]_?${ARCH}.*(cubin\.cpp|\.cu)$")
|
||||
set(EXCLUDE_PATTERN ".*[Ss][Mm]_?${ARCH}f.*(cubin\.cpp|\.cu)$")
|
||||
endif()
|
||||
endif()
|
||||
else()
|
||||
# IMPLICIT_FAMILY is set - ARCH shall not be suffixed by 'f'
|
||||
if(${ARCH} GREATER_EQUAL ${CMAKE_CUDA_MIN_ARCHITECTURE_HAS_FAMILY})
|
||||
# ARCH >= CMAKE_CUDA_MIN_ARCHITECTURE_HAS_FAMILY
|
||||
if(NOT "${ARCH}f" IN_LIST CMAKE_CUDA_ARCHITECTURES_FAMILIES)
|
||||
set(SHOULD_FILTER TRUE)
|
||||
set(ARCH_FOR_DEFINE "${ARCH}")
|
||||
# Match "sm${ARCH}" but NOT "sm${ARCH}a"
|
||||
set(MATCH_PATTERN ".*[Ss][Mm]_?${ARCH}.*(cubin\.cpp|\.cu)$")
|
||||
set(EXCLUDE_PATTERN ".*[Ss][Mm]_?${ARCH}a.*(cubin\.cpp|\.cu)$")
|
||||
endif()
|
||||
else()
|
||||
# ARCH < CMAKE_CUDA_MIN_ARCHITECTURE_HAS_FAMILY
|
||||
if(NOT "${ARCH}" IN_LIST CMAKE_CUDA_ARCHITECTURES_ORIG)
|
||||
set(SHOULD_FILTER TRUE)
|
||||
set(ARCH_FOR_DEFINE "${ARCH}")
|
||||
# Match "sm${ARCH}" - no exclusion pattern needed
|
||||
set(MATCH_PATTERN ".*[Ss][Mm]_?${ARCH}.*(cubin\.cpp|\.cu)$")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(SHOULD_FILTER)
|
||||
# Get files matching the main pattern
|
||||
set(SOURCES_TO_CHECK "${SOURCES}")
|
||||
list(FILTER SOURCES_TO_CHECK INCLUDE REGEX "${MATCH_PATTERN}")
|
||||
|
||||
if(NOT "${EXCLUDE_PATTERN}" STREQUAL "")
|
||||
# Find files matching the exclusion pattern (these should be kept)
|
||||
set(SOURCES_TO_KEEP "${SOURCES_TO_CHECK}")
|
||||
list(FILTER SOURCES_TO_KEEP INCLUDE REGEX "${EXCLUDE_PATTERN}")
|
||||
# Remove the files we want to keep from the check list
|
||||
if(SOURCES_TO_KEEP)
|
||||
list(REMOVE_ITEM SOURCES_TO_CHECK ${SOURCES_TO_KEEP})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(SOURCES_FILTERED "${SOURCES_TO_CHECK}")
|
||||
|
||||
list(LENGTH SOURCES_FILTERED SOURCES_FILTERED_LEN)
|
||||
message(
|
||||
STATUS
|
||||
"Excluding ${SOURCES_FILTERED_LEN} cubins for SM ${ARCH} from ${CMAKE_CURRENT_SOURCE_DIR}"
|
||||
)
|
||||
foreach(filtered_item IN LISTS SOURCES_FILTERED)
|
||||
message(VERBOSE "- ${filtered_item}")
|
||||
endforeach()
|
||||
|
||||
# Remove filtered files from sources
|
||||
if(SOURCES_FILTERED)
|
||||
list(REMOVE_ITEM SOURCES ${SOURCES_FILTERED})
|
||||
endif()
|
||||
|
||||
# Add compile definition to target
|
||||
target_compile_definitions(
|
||||
${arg_TARGET}
|
||||
${_compile_def_visibility}
|
||||
"EXCLUDE_SM_${ARCH_FOR_DEFINE}")
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
set(${arg_SOURCE_LIST}
|
||||
"${SOURCES}"
|
||||
PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
@ -36,6 +36,7 @@ add_library(common_src OBJECT ${SRCS} ${CU_SRCS})
|
||||
add_cuda_architectures(common_src 89)
|
||||
set_property(TARGET common_src PROPERTY POSITION_INDEPENDENT_CODE ON)
|
||||
set_property(TARGET common_src PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
|
||||
target_link_libraries(common_src PUBLIC trtllm_gen_fmha_interface)
|
||||
|
||||
if(ENABLE_CUBLASLT_FP4_GEMM)
|
||||
target_compile_definitions(common_src PRIVATE ENABLE_CUBLASLT_FP4_GEMM)
|
||||
|
||||
@ -15,6 +15,20 @@
|
||||
# the License.
|
||||
#
|
||||
|
||||
add_subdirectory(cutlass_kernels)
|
||||
add_subdirectory(cuteDslKernels)
|
||||
add_subdirectory(flashMLA)
|
||||
add_subdirectory(contextFusedMultiHeadAttention)
|
||||
add_subdirectory(decoderMaskedMultiheadAttention)
|
||||
add_subdirectory(selectiveScan)
|
||||
add_subdirectory(userbuffers)
|
||||
add_subdirectory(trtllmGenKernels)
|
||||
add_subdirectory(fusedLayernormKernels)
|
||||
add_subdirectory(groupRmsNormKernels)
|
||||
add_subdirectory(llama4MinLatencyKernels)
|
||||
add_subdirectory(dsv3MinLatencyKernels)
|
||||
add_subdirectory(causalConv1d)
|
||||
|
||||
file(GLOB_RECURSE SRC_CPP *.cpp)
|
||||
file(GLOB_RECURSE SRC_CU *.cu)
|
||||
|
||||
@ -38,28 +52,6 @@ list(FILTER SRC_CPP EXCLUDE REGEX "userbuffers/.*")
|
||||
list(FILTER SRC_CU EXCLUDE REGEX "userbuffers/.*")
|
||||
list(FILTER SRC_CU EXCLUDE REGEX "fusedLayernormKernels/.*")
|
||||
|
||||
function(filter_cuda_archs ARCH SOURCES_VAR)
|
||||
if(NOT "${ARCH}" IN_LIST CMAKE_CUDA_ARCHITECTURES_ORIG)
|
||||
set(FILTER_REGEX ".*[Ss][Mm]_?${ARCH}(af)?.*(cubin\.cpp|\.cu)$")
|
||||
list(APPEND SOURCES ${${SOURCES_VAR}})
|
||||
list(APPEND SOURCES_FILTERED ${SOURCES})
|
||||
list(FILTER SOURCES_FILTERED INCLUDE REGEX "${FILTER_REGEX}")
|
||||
list(LENGTH SOURCES_FILTERED SOURCES_FILTERED_LEN)
|
||||
message(
|
||||
STATUS
|
||||
"Excluding ${SOURCES_FILTERED_LEN} cubins for SM ${ARCH} from ${CMAKE_CURRENT_SOURCE_DIR}"
|
||||
)
|
||||
foreach(filtered_item ${SOURCES_FILTERED})
|
||||
message(VERBOSE "- ${filtered_item}")
|
||||
endforeach()
|
||||
list(FILTER SOURCES EXCLUDE REGEX "${FILTER_REGEX}")
|
||||
set(${SOURCES_VAR}
|
||||
"${SOURCES}"
|
||||
PARENT_SCOPE)
|
||||
add_compile_definitions("EXCLUDE_SM_${ARCH}")
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
if(NOT ENABLE_MULTI_DEVICE)
|
||||
list(FILTER SRC_CU EXCLUDE REGEX "customAllReduceKernels*.*cu$")
|
||||
endif()
|
||||
@ -72,18 +64,5 @@ target_include_directories(
|
||||
PUBLIC
|
||||
$<TARGET_PROPERTY:${INTERNAL_CUTLASS_KERNELS_TARGET},INTERFACE_INCLUDE_DIRECTORIES>
|
||||
)
|
||||
target_link_libraries(kernels_src PUBLIC trtllm_gen_fmha_interface)
|
||||
add_cuda_architectures(kernels_src 89)
|
||||
|
||||
add_subdirectory(cutlass_kernels)
|
||||
add_subdirectory(cuteDslKernels)
|
||||
add_subdirectory(flashMLA)
|
||||
add_subdirectory(contextFusedMultiHeadAttention)
|
||||
add_subdirectory(decoderMaskedMultiheadAttention)
|
||||
add_subdirectory(selectiveScan)
|
||||
add_subdirectory(userbuffers)
|
||||
add_subdirectory(trtllmGenKernels)
|
||||
add_subdirectory(fusedLayernormKernels)
|
||||
add_subdirectory(groupRmsNormKernels)
|
||||
add_subdirectory(llama4MinLatencyKernels)
|
||||
add_subdirectory(dsv3MinLatencyKernels)
|
||||
add_subdirectory(causalConv1d)
|
||||
|
||||
@ -19,42 +19,56 @@ file(GLOB_RECURSE SRC_CPP *.cpp)
|
||||
file(GLOB_RECURSE SRC_CU *.cu)
|
||||
list(FILTER SRC_CU EXCLUDE REGEX "fmha_v2_cu/.*")
|
||||
|
||||
filter_cuda_archs("80" SRC_CPP)
|
||||
filter_cuda_archs("86" SRC_CPP)
|
||||
filter_cuda_archs("89" SRC_CPP)
|
||||
filter_cuda_archs("90" SRC_CPP)
|
||||
filter_cuda_archs("100" SRC_CPP)
|
||||
filter_cuda_archs("120" SRC_CPP)
|
||||
add_library(context_attention_src OBJECT)
|
||||
|
||||
add_library(context_attention_src OBJECT ${SRC_CPP} ${SRC_CU})
|
||||
filter_source_cuda_architectures(
|
||||
SOURCE_LIST SRC_CPP
|
||||
ARCHS 80 86 89 90 100 120
|
||||
TARGET context_attention_src
|
||||
IMPLICIT_FAMILY)
|
||||
|
||||
target_sources(context_attention_src PRIVATE ${SRC_CPP} ${SRC_CU})
|
||||
target_compile_definitions(context_attention_src PRIVATE USE_DEMO_BERT_PARAMS=1
|
||||
GENERATE_CUBIN=1)
|
||||
set_target_properties(
|
||||
context_attention_src PROPERTIES POSITION_INDEPENDENT_CODE ON
|
||||
CUDA_RESOLVE_DEVICE_SYMBOLS ON)
|
||||
target_link_libraries(context_attention_src PUBLIC trtllm_gen_fmha_interface)
|
||||
|
||||
foreach(arch IN ITEMS 80 86 89 90 100 120)
|
||||
if("${arch}" IN_LIST CMAKE_CUDA_ARCHITECTURES_ORIG)
|
||||
file(GLOB arch_files "fmha_v2_cu/*_sm${arch}.cu")
|
||||
if(arch_files)
|
||||
set(TARGET_NAME _context_attention_kernels_${arch})
|
||||
add_library(${TARGET_NAME} OBJECT ${arch_files})
|
||||
target_compile_definitions(${TARGET_NAME} PRIVATE USE_DEMO_BERT_PARAMS=1
|
||||
GENERATE_CUBIN=1)
|
||||
set_target_properties(
|
||||
${TARGET_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON
|
||||
CUDA_RESOLVE_DEVICE_SYMBOLS ON)
|
||||
target_include_directories(
|
||||
${TARGET_NAME}
|
||||
PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../../kernels/fmha_v2/src/
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../../kernels/fmha_v2/generated/)
|
||||
if(${arch} GREATER_EQUAL 100)
|
||||
set_cuda_architectures(${TARGET_NAME} "${arch}f")
|
||||
else()
|
||||
set_cuda_architectures(${TARGET_NAME} ${arch})
|
||||
endif()
|
||||
target_sources(context_attention_src
|
||||
PUBLIC $<TARGET_OBJECTS:${TARGET_NAME}>)
|
||||
set(ARCH_ENABLED FALSE)
|
||||
set(TARGET_ARCH "")
|
||||
if(${arch} GREATER_EQUAL ${CMAKE_CUDA_MIN_ARCHITECTURE_HAS_FAMILY})
|
||||
if("${arch}f" IN_LIST CMAKE_CUDA_ARCHITECTURES_FAMILIES)
|
||||
set(ARCH_ENABLED TRUE)
|
||||
set(TARGET_ARCH "${arch}f")
|
||||
endif()
|
||||
else()
|
||||
if("${arch}" IN_LIST CMAKE_CUDA_ARCHITECTURES_ORIG)
|
||||
set(ARCH_ENABLED TRUE)
|
||||
set(TARGET_ARCH "${arch}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NOT ${ARCH_ENABLED})
|
||||
continue()
|
||||
endif()
|
||||
|
||||
file(GLOB arch_files "fmha_v2_cu/*_sm${arch}.cu")
|
||||
if(arch_files)
|
||||
set(TARGET_NAME _context_attention_kernels_${arch})
|
||||
add_library(${TARGET_NAME} OBJECT ${arch_files})
|
||||
target_compile_definitions(${TARGET_NAME} PRIVATE USE_DEMO_BERT_PARAMS=1
|
||||
GENERATE_CUBIN=1)
|
||||
set_target_properties(
|
||||
${TARGET_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON
|
||||
CUDA_RESOLVE_DEVICE_SYMBOLS ON)
|
||||
target_include_directories(
|
||||
${TARGET_NAME}
|
||||
PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../../kernels/fmha_v2/src/
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../../kernels/fmha_v2/generated/)
|
||||
set_cuda_architectures(${TARGET_NAME} ${TARGET_ARCH})
|
||||
target_sources(context_attention_src
|
||||
PUBLIC $<TARGET_OBJECTS:${TARGET_NAME}>)
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
@ -27,11 +27,12 @@ set(SRC_CU_EXTRA)
|
||||
list(FILTER SRC_CPP EXCLUDE REGEX ".*nvrtcWrapper/.*")
|
||||
add_subdirectory(decoderXQAImplJIT/nvrtcWrapper)
|
||||
|
||||
filter_cuda_archs("80" SRC_CPP)
|
||||
filter_cuda_archs("86" SRC_CPP)
|
||||
filter_cuda_archs("89" SRC_CPP)
|
||||
filter_cuda_archs("90" SRC_CPP)
|
||||
filter_cuda_archs("120" SRC_CPP)
|
||||
add_library(decoder_attention_src OBJECT)
|
||||
filter_source_cuda_architectures(
|
||||
SOURCE_LIST SRC_CPP
|
||||
ARCHS 80 86 89 90 120
|
||||
TARGET decoder_attention_src
|
||||
IMPLICIT_FAMILY)
|
||||
|
||||
set(basic_heads 32 64 128)
|
||||
foreach(HEAD ${basic_heads})
|
||||
@ -64,7 +65,7 @@ foreach(HEAD ${extra_heads})
|
||||
endforeach()
|
||||
|
||||
if(NOT WIN32)
|
||||
add_library(decoder_attention_src OBJECT ${SRC_CPP})
|
||||
target_sources(decoder_attention_src PRIVATE ${SRC_CPP})
|
||||
|
||||
# Split some sources to shared library for Linux
|
||||
add_library(${DECODER_SHARED_TARGET_0} SHARED ${SRC_CU})
|
||||
@ -78,10 +79,12 @@ if(NOT WIN32)
|
||||
set_property(TARGET ${DECODER_SHARED_TARGET_1}
|
||||
PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
|
||||
else()
|
||||
add_library(decoder_attention_src OBJECT ${SRC_CPP} ${SRC_CU} ${SRC_CU_EXTRA})
|
||||
target_sources(decoder_attention_src PRIVATE ${SRC_CPP} ${SRC_CU}
|
||||
${SRC_CU_EXTRA})
|
||||
endif()
|
||||
|
||||
target_link_libraries(decoder_attention_src PUBLIC nvrtc_wrapper_src)
|
||||
target_link_libraries(decoder_attention_src PUBLIC nvrtc_wrapper_src
|
||||
trtllm_gen_fmha_interface)
|
||||
set_property(TARGET decoder_attention_src PROPERTY POSITION_INDEPENDENT_CODE ON)
|
||||
set_property(TARGET decoder_attention_src PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS
|
||||
ON)
|
||||
|
||||
@ -18,9 +18,13 @@
|
||||
file(GLOB_RECURSE SRC_CPP *.cpp)
|
||||
file(GLOB_RECURSE SRC_CU *.cu)
|
||||
|
||||
filter_cuda_archs("100" SRC_CPP)
|
||||
add_library(trtllm_gen_batched_gemm OBJECT)
|
||||
filter_source_cuda_architectures(
|
||||
SOURCE_LIST SRC_CPP
|
||||
ARCHS 100 103 100f
|
||||
TARGET trtllm_gen_batched_gemm)
|
||||
|
||||
add_library(trtllm_gen_batched_gemm OBJECT ${SRC_CPP} ${SRC_CU})
|
||||
target_sources(trtllm_gen_batched_gemm PRIVATE ${SRC_CPP} ${SRC_CU})
|
||||
target_compile_definitions(trtllm_gen_batched_gemm
|
||||
PUBLIC TLLM_GEN_EXPORT_INTERFACE TLLM_ENABLE_CUDA)
|
||||
|
||||
|
||||
@ -670,7 +670,8 @@ public:
|
||||
size_t getNumBatchedGemmConfigs() const
|
||||
{
|
||||
#ifdef TLLM_GEN_EXPORT_INTERFACE
|
||||
return tensorrt_llm::kernels::tllmGenBatchedGemmListLen;
|
||||
return sizeof(tensorrt_llm::kernels::tllmGenBatchedGemmList)
|
||||
/ sizeof(tensorrt_llm::kernels::tllmGenBatchedGemmList[0]);
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -565,13 +565,13 @@ public:
|
||||
|
||||
public:
|
||||
// The MMA kind.
|
||||
tg::MmaKind mMmaKind;
|
||||
tg::MmaKind mMmaKind{};
|
||||
// Whether fuse Utccp into the MMA task.
|
||||
bool mFuseUtccpWithUtcmma;
|
||||
bool mFuseUtccpWithUtcmma{};
|
||||
// Whether use the max TMEM overlap trick.
|
||||
bool mUseMaxTmemOverlap;
|
||||
bool mUseMaxTmemOverlap{};
|
||||
// The number of epilogue warps.
|
||||
int32_t mNumEpilogueWarps;
|
||||
int32_t mNumEpilogueWarps{};
|
||||
// Helper for SMEM allocation.
|
||||
MemAllocatorHelper mSmemAllocatorHelper;
|
||||
// Helper for TMEM allocation.
|
||||
|
||||
@ -18,8 +18,12 @@
|
||||
file(GLOB_RECURSE SRC_CPP *.cpp)
|
||||
file(GLOB_RECURSE SRC_CU *.cu)
|
||||
|
||||
filter_cuda_archs("100" SRC_CPP)
|
||||
filter_source_cuda_architectures(
|
||||
SOURCE_LIST SRC_CPP
|
||||
ARCHS 100 103 100f
|
||||
TARGET trtllm_gen_fmha_interface)
|
||||
|
||||
add_library(trtllm_gen_fmha OBJECT ${SRC_CPP} ${SRC_CU})
|
||||
set_property(TARGET trtllm_gen_fmha PROPERTY POSITION_INDEPENDENT_CODE ON)
|
||||
set_property(TARGET trtllm_gen_fmha PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
|
||||
target_link_libraries(trtllm_gen_fmha PUBLIC trtllm_gen_fmha_interface)
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:5dbeef553388e130f1b41304228f629807dabac3c5e0c66a0c28ea9833340040
|
||||
size 630932
|
||||
oid sha256:616bdb23263627aca4ce3448e32e9b47b59439aac8f774e6957415d1be92a6e5
|
||||
size 620620
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:a90e201852a6f3c911e982a9f819a6be765814c39a24b0c9f53ce4dcb4d28c33
|
||||
size 565951
|
||||
oid sha256:f5b3750593eb35e8d451dc5842cbfb9cdd1e63345f7fda202eeff31e160d3839
|
||||
size 558895
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:7b3100a44dbe296bc51f7ff18efd9dd1b03649bd993a1cc54da0585208395ea0
|
||||
size 483501
|
||||
oid sha256:46463e24ad7663a32582c35cf61dc56da6b37feebefcbd5df51f2cf56b6a1a5f
|
||||
size 476471
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:8ee2df4ea7203351cd5d12be09234ea39cb0a0a3d1f6a7d8f87790f5f02991cf
|
||||
size 452197
|
||||
oid sha256:05f0e9c20a152b830151736c8fe075b786a2885497644ffee53c032704fb5975
|
||||
size 445981
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:70bef448c376a3e57294f21afc296ea1e7a1f21bca158cecb995de47fcf7a9e8
|
||||
size 471975
|
||||
oid sha256:4cb6bc8d55b63523388260b43cbb7b5f32d0925103f1e04d6896dd6215129578
|
||||
size 464181
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:abc46c06eba3f60de4c29a4d06b672c0fd406f0b94e82c4828919c3ad173e094
|
||||
size 447801
|
||||
oid sha256:a96c271e3b91ff63d68a9fc187e1ff980bc80822871dd5ab803dcec488e4c18d
|
||||
size 441585
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:707f1643ecd4243edbc2552b1c14e13154f659f9653122386c7d71fd97762626
|
||||
size 626982
|
||||
oid sha256:0b5202c7e41c5478e832ebfd56ebf374574ee0b643716dafe446c4943204307b
|
||||
size 619016
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:949736949a7d8db4eaff5171d6a280fc75e7597e78d5cf53cd698bf58143599c
|
||||
size 566023
|
||||
oid sha256:80486e68e84ed71716050db50073accd0eafdad79905f857f1ba01f82836d151
|
||||
size 558203
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:620338e0232c4bc83022026e927ad4337b3438bfe5225a771d71ae821708a67d
|
||||
size 452573
|
||||
oid sha256:b3df4cae191465930320aaaaaabd302040f41d1a17609902dd866656dbfb8aea
|
||||
size 454251
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:501bf6e66db0c3a774fc7505841ccf5b02da306e43522b0a694d61953cd56864
|
||||
size 394599
|
||||
oid sha256:04a36407b42e0d90fc30a20a21fe5fbfdc70c2fe64d667af6dabe5939f6d071e
|
||||
size 387593
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:f0f40f5a666044cca61e289b9c48c98977691b9e1d32a72179128ccbee15cc55
|
||||
size 436291
|
||||
oid sha256:39332cc0ebc5c940feb6821101fcc757100d5b89281c0ae3f588ad5a662dd9d3
|
||||
size 437179
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c59673a8be4362f46f2e9257ec7c4e38a4800c45b1945f9df0cb32ed0c6d43ae
|
||||
size 378317
|
||||
oid sha256:164593e1d5b88e251c405d70361ac4e74508e6ce65208ab3dd6bf7976350a103
|
||||
size 371311
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:34c1fd4fa79d1384876b968ffbf432fefdf981338a7956e296799295d1d0247c
|
||||
size 505637
|
||||
oid sha256:2b8887fed8b56ea827931b12e2452ac17901c674fa9aea0b19ff4fcf0dd951f4
|
||||
size 498607
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:055d697387425f909c5329b792c5db97a58cc847cc7755169de50f99d0ed5f73
|
||||
size 468807
|
||||
oid sha256:cd9058dd7b9dc2d549b698a84f95b868b51907d0ff517de6b1b25802f148a683
|
||||
size 462591
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:60c0b37d90c1fc5e3cb27e480d84d3b18d47cf7b1cd4e3a3af35f941100e54aa
|
||||
size 494111
|
||||
oid sha256:d36b4ac976263bf8ebce2b478021cea027568e7f93e9275e6124c5cebdede87f
|
||||
size 485527
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:a3b9f3aee4f1a1d79c4876a4fec9edb0baa4c78efec3c76c04ca8bd00af76963
|
||||
size 469935
|
||||
oid sha256:121ad8d9cc1eb1b4571cdb6da67c95a8bf94277fa3bc1f959a5b2f2fac9f7cdb
|
||||
size 462931
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:2f417704dbb04df29ad9019c106a1de5aa4f4415e7b7aa206600843b30645000
|
||||
size 670528
|
||||
oid sha256:15829041f11b292854fb76fed462349c720a0ef30a88c24bb6d1c33754fc8490
|
||||
size 663598
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b73aa21041034ab532412a51cf00dda4d8ffbb5d3970628dc5fc9fc0dbd39e24
|
||||
size 607571
|
||||
oid sha256:8e15720366d0d135f951818421edb4cef722909f693a80a8a5208446d9907010
|
||||
size 604167
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:dfac7eaa6285ea9b8e02a1ac7318a14a89bd04dfd9794d194c55e45583aaeb1d
|
||||
size 481715
|
||||
oid sha256:75f42cbb4afe3e9d57246039bd2cd711bf65fd95dbff64b38242f493d9b7d871
|
||||
size 473919
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b18b4478ee849f9f92ff59f68887710b0c9af43d57819821e64ab81b7c8fb1cf
|
||||
size 411875
|
||||
oid sha256:3eac4d690bc59e2024c92af584bb6e0f29455e3e4575efc37cbc96e3f3263a0e
|
||||
size 405659
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b39f08d8599260d351b6df4e142512bb75dbaa219a4a701a26863c0566d3e72a
|
||||
size 460697
|
||||
oid sha256:a3c937220ee455d17e5703bb35e82a00b27ccb12bd8861e15be014823d5f4619
|
||||
size 454481
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:59497957d8e27c087941c6bc5db6dbeabbd0e02f98b3d359d0ba347698927f8f
|
||||
size 394015
|
||||
oid sha256:8ef74aa9dabff7601631ccbcaeb5880c12bf391a1f30cd830f8bf272a218a484
|
||||
size 388587
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:55485e3210691cae102219ccc7c6dc4428a79a3115887447e8d584ac25103cf5
|
||||
size 663022
|
||||
oid sha256:9f017070eaffd5f5289a69cac49ace13da7f2cc0da94f6576b6fe38f76380bf4
|
||||
size 649580
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:7fd46e418a1d9ca3563211e581bf352207e88bef95293f71a15a63f516e008e4
|
||||
size 574435
|
||||
oid sha256:b102a4b49b61c21a3b1639159476fc01b977d10464bcdd50bcfed77e5ea0ec9a
|
||||
size 565703
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:8e9a80632812c1d98e05131359d8d0282a0219766f4f949257804e0da566c479
|
||||
size 593219
|
||||
oid sha256:3722c508670751474ea923f7be4bb7d4bcb4ffcdf72168b4c8abe5f9921e29b3
|
||||
size 586977
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:587b63d2d1e7fb17f0ff8ab7483e4fcba28e4944bcee169dd5d5ca2909b567c3
|
||||
size 569019
|
||||
oid sha256:dd3e414313b8aa48ca65260a855ce7e58266eb6603adf67aca52efdce74da394
|
||||
size 562803
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:7584eb0eb94587d360419bf58a4df610a61b99069b044f82c9bfa8669cc1ad2f
|
||||
size 580115
|
||||
oid sha256:ec71cbae8f5d86c3e8149b4aa095023082dc2cbcf465e41254c40db711b20566
|
||||
size 572319
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:a578404ca82c1fdf9be383d49d0f1e7664e935088fb33d2bc1bdb4baba166d41
|
||||
size 559885
|
||||
oid sha256:8345d6335559c4bf863948ac445bb1d6696aa38ae23cdbbc392083c329324064
|
||||
size 552091
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:01801e6a36b2ad80cc9bf4ff90586b853694271cef751f55d2c5ba5eeea401d8
|
||||
size 663760
|
||||
oid sha256:f25796409ecb2803e61f092484b88d7dc6b58539b77c48b59c69dbc11abdcc52
|
||||
size 649578
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d444b0c26d99f3dc92f58a172064f3f7c21252164e9cb127f6b5ea3cfd8f9af1
|
||||
size 608499
|
||||
oid sha256:c20f6bfb52e6f222bcbbb202a4b0fb0aab4b95e73634853119141b4ee4f63a1a
|
||||
size 596609
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:a2de5904a5271e374185323f2029b11fe07edbdf22666a562cec4e9868915e57
|
||||
size 558345
|
||||
oid sha256:be293e8aee13242adb29ac239f2ff332cca328949a69b0b3815e1c8116233d9f
|
||||
size 559233
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c4bc2d2f470b216d493ecd2a9d559fef26d27d34a5c9d5a44e5b890beedbc6eb
|
||||
size 501159
|
||||
oid sha256:b585477230f60b14873d5302711770bcbeff7410398792978f69caf37ef49b56
|
||||
size 492575
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:6bdec3801521171bf2fc0442ccda65a5d66e36be28ec620763974a7fd8d51c1a
|
||||
size 539695
|
||||
oid sha256:35f0da62936aecf7dbd160137f5dfc408284082c5e1751e21467ade0ef502969
|
||||
size 541371
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e29787b3c4dbe5adb56c163d3e8b89191afd58615add63acdedc8a760ca3814c
|
||||
size 483299
|
||||
oid sha256:32a0d306e94f90acd490969b36f28df7f3b00a91a25e1b97d28296d2505ec54c
|
||||
size 475503
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:f0584f5e2953b9827bb31c1533f9e8a36e2998bf952f30c87630c2db48362b6a
|
||||
size 616933
|
||||
oid sha256:ea93243da8bf5dff7d14328f5c13def92bd011cc25230fd7dc8b0e5e71e04d77
|
||||
size 610691
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:ce98ce92ba5132756ce4b5bd3ec350e47b80ef06e573baffdfae212ba0236d57
|
||||
size 591153
|
||||
oid sha256:ae33d408c456f0db40410e49c5575354ca2ddee7aa66fb64ed77083d5a5055f0
|
||||
size 585727
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:be20a25127519e3e4816c0722c1868f9afd8556118d0ede9668a7313b781c0be
|
||||
size 603039
|
||||
oid sha256:4586d5bb50e101f31f248608789bddcb0bbcb80abd828db5c6d531b3d527dd7d
|
||||
size 596033
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:a00e5f26406e9cfcf828c84c9cb054f096bf96d238989ddffef1da097243bb49
|
||||
size 582021
|
||||
oid sha256:64eebcb394addd848d705a111224979e41e202bad6e10a968ff246294c70ea27
|
||||
size 575015
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:bf56cc695ada25ad4b2707dadf77d0c98c3087e2f52ee6c89b2a9d59d438cc8b
|
||||
size 702670
|
||||
oid sha256:7235264cc8742a630cb0ef55dce3b20550fcbeea1292af972eeaac882afb3234
|
||||
size 688486
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:350653f2f4134348c19bdc0460dcff2f693023b783023a4ed6a1eacae1715dbc
|
||||
size 617191
|
||||
oid sha256:3b78df7d072083125979fb3e517a2bb564e6aec52e256091c127fcdb36f63a25
|
||||
size 610135
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:90dd023f70455b0788425890b8cd7b869b21f732e2073d6085a60fea86f8fe10
|
||||
size 586695
|
||||
oid sha256:9eb18afe39f7022496e7666125b68a6a83d1a300bbe88fba2c62584bbca5f952
|
||||
size 579691
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b77f111c0a72a1c478c4850102b729dba22c7d28ce72b950af96e6ca838e36b9
|
||||
size 512909
|
||||
oid sha256:123b2693b236e75a98978328bc5e84c4d4155e82c2de1fd693a61bb284b5197c
|
||||
size 507483
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:cbc91114ea6283743c584a2c908a431d0339d1f8f3c1b62d484e2147a6c91688
|
||||
size 564889
|
||||
oid sha256:20d78ba25b66081b8ca9bc81ac72d3db8700c35c71c5cbb7981cf03ffd723872
|
||||
size 559461
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:61472723f538d24c9acd2715d33bdec5b0b398c8721f760b2d1487a18f9c56db
|
||||
size 494259
|
||||
oid sha256:f43e322151d205cb7be44aa960e9c058b1130f746a0554189323dab1c088366e
|
||||
size 488833
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:4d573460b47edb6494ffaee16a7ad168fc838bc1f682276dcf883f3bf87dacd8
|
||||
size 561491
|
||||
oid sha256:bd3c554d3e609746f5c88052f610407e0ba7a77d218b98f23d1700db288d3672
|
||||
size 549849
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d04d9dc89e714b2fffd0e2c3584a5acfae5d8f09b372d73331bc6bbf8ff5e31e
|
||||
size 502949
|
||||
oid sha256:303745a8ad276cbf903057d90dc83c5d5d187b9a7fc8820896460789c04a4d55
|
||||
size 496463
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:357f912b4b3932fbd1715ff5d3630cf0f57ad1ccc8345319f66f59dfe64a244a
|
||||
size 456883
|
||||
oid sha256:322e38aed9acfc5ba5083010671a292a15866cfc7ada0a1ef04ca08d11402e0f
|
||||
size 450619
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e50308861808e757f76cb664d4834089006cb5897ca20ca7c62047681b38d58d
|
||||
size 442723
|
||||
oid sha256:ef75d9f8b4c1e439803fa420345fdd27884817117e761f3d655eea2f1d300487
|
||||
size 436507
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:07088afdd4892f3359c43cd71a036eacf99aaa7a61ff5be89cd1a14380d69f57
|
||||
size 450859
|
||||
oid sha256:f7ba48776a57bee1eaba279f88b455d71830353dd0f1c17982d38534ccc7bb79
|
||||
size 444643
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:cd3ae620c3742f516c4cd35b72773bc305e98b98b1cc4e63d44d5083d30f123f
|
||||
size 436747
|
||||
oid sha256:19cac5380575aff8a39767424d38b6b463b006bbf92927f24c96da6593cdffab
|
||||
size 430531
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:10dfcdeb3f26e142320f4deaa7d3100d3b411fc45696bb21232a102b0694efc1
|
||||
size 558333
|
||||
oid sha256:254581db67314ea198da7aee258596f04f6def6d01a41fd1ef71705b04087e20
|
||||
size 549083
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:bec6c7e277fa9f46c6e6650bab9c17611dc4cd1669b4d75211d233db672576b9
|
||||
size 502233
|
||||
oid sha256:18572512305e1229566e0a6eb81f88f22fa8dae442f08cfa419334b00b89e980
|
||||
size 495769
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:0b6ed54b526c428b08dbe9f7e80e7673fb8a2d985ce065d9ace6730c44dbcd28
|
||||
size 441719
|
||||
oid sha256:16bd9568835544733fdff2c27b988f109b15c638ab04cadf8e672fa9831fba33
|
||||
size 442607
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:fee499381c761edd8ed3c707e9e86af26b7d8112b7285d8562364445b4db15e3
|
||||
size 384533
|
||||
oid sha256:7ec6566eadcb6b1c559e140b150aa15380b69aa3a5ad6363a6f2f55e8e6a66bd
|
||||
size 378317
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:527451a03c38f8a12a2fd09c911c4ea54cc89bd7f688108ea30694f661bbb5f1
|
||||
size 424449
|
||||
oid sha256:a97ad0f83edda84db155ab182b6805ab79f1b7f1f3b9a33717aa14ffcdba02f6
|
||||
size 425337
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:1e3cb444f826882d1f9fdde1d49bae9235f36fa683a9ab2b7879f82a4207d689
|
||||
size 367265
|
||||
oid sha256:d8b3b3bd9790a04368061c248476f1921afaab33635117336f4fe3f8f7621e9c
|
||||
size 361049
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:32f4a45f95b8ae84a0e3ae5a43b3b5f5c7362b1ce223819f516f4b4e73170bb7
|
||||
size 473445
|
||||
oid sha256:471d613cc4f330de90ee3adaeb3ca60311cd5f5566f6c186347954cbc27f9536
|
||||
size 468017
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:3d8a3cb32086a1867cce707e2300b7dae6b3f47db799a77848534a205e6846c1
|
||||
size 458543
|
||||
oid sha256:3667df7be928d46644c990d5edf800b4267ca16d2b1af246c2ff1fcd185aa63f
|
||||
size 453117
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:aa25c1b87150f6db0800cebb0d799ec948d40e80384c5312eedb52f73a1d6618
|
||||
size 473783
|
||||
oid sha256:198938eb3da07564a649fc28ae9a1f347c4aa3b3b4d94c7df599e164c7c78153
|
||||
size 467567
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:89c8a875579b5d4ff2f53c6925a71f5f382ec4e0763dd56ec92423aab9166a94
|
||||
size 458883
|
||||
oid sha256:dba544890e95b2fad732d7de898a7bfe6bae8c9e91d0655fe7f092314a6f8956
|
||||
size 452667
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:a654c16808084190287ebf34535e68dc1f4d3d1f3c082912633e372f35df0648
|
||||
size 601089
|
||||
oid sha256:817cf4afb5bd708007464a22a2ae0ce270807ba2b7e5b259d1613f9380d4f9e5
|
||||
size 594947
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:1fdf46c99fb0c98db2c447a5e22075c9f3e037665a9945e61763adf2b3f5824f
|
||||
size 545605
|
||||
oid sha256:ce202d4c5578e1aad38916fb24e627f6f217c727bb3cbb201d268a59d6f1eb17
|
||||
size 538231
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:04b2dd3f1d6ce30d084c944caf3301c274424a004097efdff1a345f72083dff8
|
||||
size 470069
|
||||
oid sha256:8c14dce9f0ef34ba1bfa4dc4adc7d99b304910ca5fb785bd41b0612f9771c784
|
||||
size 462275
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:6fb4cc06f718df560c8de15140cabef4e98bd1d0f35e186cf37d5e94a8256863
|
||||
size 400231
|
||||
oid sha256:28c0a61e45a428546baada7385da98a5beba03372ec0bfd4282b8edd305ba7de
|
||||
size 394803
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:5b1d5f5ed80ed7d8d0c0dcf465f31ada29aeedaa85e4da693a684a5cf940d927
|
||||
size 449643
|
||||
oid sha256:dec9c27d8f9b4bd5db6f88275274c94adbd30ea2ce8220afe8fbd8b4f0dba5b6
|
||||
size 443427
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:06c57c87ed62e80008fb18fe5688f1f633c45aa0cc1a66f826c1efac62af8bdf
|
||||
size 382961
|
||||
oid sha256:d1c92e1c9d473d21e3a9bf07502e01eef0903b9183ee409ff40bfa089efa5ee1
|
||||
size 377535
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e2568162532c48e842319952122b3835e4ef014861cff4f4e37491d3703ef58a
|
||||
size 749338
|
||||
oid sha256:286fce5fedc98c25e31b2aa6f9e527364b1f634cebbcf4174a9e5c6e86d73226
|
||||
size 725312
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:07064296444d7ac6bfff94dcc790acd605fef68a760cbc5ead3296c880789567
|
||||
size 661516
|
||||
oid sha256:1e8d8a2baf9baa1eae8674c5f55a26346e6e004d90b0d035179de74362b79275
|
||||
size 636060
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:75be8ecca1ec7acabb23fe44d3cd08fa50d46a2600a921c8e2197ff731141607
|
||||
size 776790
|
||||
oid sha256:aaca490b5dd9c7990f603c992bb750e617d927caceb019397eb8eb4fa44194de
|
||||
size 749312
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b354769aa9f5659b436534f7a3cc4aa09a493858d43ab70bdc316f6b0d88d512
|
||||
size 688476
|
||||
oid sha256:645a80c48a4be552dc052a5ae103e69cf8a3b504a97d2a5b235005da1ad5261c
|
||||
size 663414
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:883f192f66396084fb325abbb832687f2f27887c0018a970e39bcae575b4aa95
|
||||
size 753232
|
||||
oid sha256:7922a772bf56cc4a42e3f8fa5ea6ea26a20092baa25e85254820ab7ebb872f0b
|
||||
size 722004
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b35e56a59c42ea0b6f30bb49076c0f93611fcb00cdd76dd4353b465adc0c75c9
|
||||
size 661810
|
||||
oid sha256:511da0ba6a3dda888cc95df62793f89fc236b630df0bd8a4e41921058a3ee1ec
|
||||
size 636650
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:dec9396cbb1bdd3ca6dfd831b9c957ad2fa31e0ddf2a6455b91b6aeab6c7d06b
|
||||
size 773336
|
||||
oid sha256:72c6ccb1e756e7429685f0e898e60b3b680b0893638054331b88322926109f36
|
||||
size 745610
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:0f445f0ff16b08249c7700137d350ee6b1964ddb183bf4d5646852504a38dc61
|
||||
size 688524
|
||||
oid sha256:f59be6bd1031842c71104dd7b520c66b661b24b8763dea1d663e0cf58fcaa9c7
|
||||
size 664696
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:98265cb4cc56fe7336995f39759b8a4dbd6d35b68f9c934d746ec9304c16e488
|
||||
size 818090
|
||||
oid sha256:e91b7bac7cf917b6003e608ef39fc92f5b7ae1ee6c4e16e56cae1b572f5f4781
|
||||
size 790118
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:80eaf7e695a9144f3e9ae58526dba4d62e19c374e3e3f5191d4216e2276ce68b
|
||||
size 731700
|
||||
oid sha256:4c52649dc2f57b803013a52f84db1d5789258817b6c34ca17917e1c38b94a64e
|
||||
size 706540
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:7919d7baf0bd29baa550b4c9331cb5b74c9184211648a2ae4309956fe4b707e1
|
||||
size 844952
|
||||
oid sha256:652213aa98b3b0141802acf20310dcf7a8f24a05e94062b4e8f82b3b1f272b38
|
||||
size 817078
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:9de7275c32c73482f2ae5e5336051bdbc172dfe02cd35950535363775a97636a
|
||||
size 760042
|
||||
oid sha256:4d9521c6adfcd41ee2730cd1e0ed4502661955c2c2b0a5e6355d150b16492b0b
|
||||
size 735128
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:5be32375b4f0595bfd691d4b01e5851737bda01669dcbd1a101db1e7fecb7f8c
|
||||
size 874036
|
||||
oid sha256:2c99f44a0f0c435436496cbe1d498f185604b701ea8fb67ad1a0e2c8ee9cc444
|
||||
size 864762
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:83558fa89e1d26bbf51093c42c3cc3af31711cb555fb496620a95deed45345a5
|
||||
size 706006
|
||||
oid sha256:09126ea87e05b001bfdcb4b410ca492a00f7f9b29cd63b021153eee0e582f8df
|
||||
size 696188
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:6e9ca086d9b2d1a1996b6b948efaeb61c609f8c052b901a1dee4e3992220784f
|
||||
size 775714
|
||||
oid sha256:bffb58dea79c2d285f3c70d5950a6e6af4213674df55ab97aa3bf597bbddbf6a
|
||||
size 763528
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:9849935550b57458ebbc1d8cc12f4db1b05c33af085aedd5df63b3c845ad342c
|
||||
size 773198
|
||||
oid sha256:a7d0c270d8b10533914550d7b0f251dd0ad666910bdaad35f2aa4cef257df932
|
||||
size 762986
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:42e8365c84d53022a6ef4ecfc0ac76a079fb3b4637ee7ebdb43d2f16da0f12e0
|
||||
size 660370
|
||||
oid sha256:f9e73caf1128411b4154623d89c0b18eeb08eefa1c83b0c62df124e9e035f885
|
||||
size 650306
|
||||
|
||||
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:586cb3a876e522f5968a14b01f412fdcc5dac8d4d14d1b00721b590ac02343c0
|
||||
size 919842
|
||||
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:5c38ae728025373804ca8bd535fea64a34bef25b3937d0b1c09beb5a42d7d425
|
||||
size 599913
|
||||
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:5833c71e1fe3984ac1ec0f9dc51c18131db4bb408760c8144e406f8e9118a053
|
||||
size 657042
|
||||
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:80b217cc222c46a69212e3408b94d7dd90e97e14cb7ddad07d520b295fc11989
|
||||
size 786344
|
||||
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:df279dde0c4be2f933ca078a633ad278365853f4ce3394b132b67687f34d0482
|
||||
size 558865
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:f2987e51b51870e7bcfdb3e631f4cbd0f5fbe47f9a8fb8f219fb6f716d11c27a
|
||||
size 834318
|
||||
oid sha256:707c90f34cff5ff1eb2efa9fb19f1167d12aab1120bd2c38025ff8682dff53ed
|
||||
size 808220
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user