mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-02-12 22:14:03 +08:00
Merge branch 'main' into fix_spec_gate
Signed-off-by: Zheyu Fu <zheyuf@nvidia.com>
This commit is contained in:
commit
b51ee2bb0d
@ -68,6 +68,7 @@ option(USING_OSS_CUTLASS_MOE_GEMM "Using open sourced Cutlass moe gemm kernel"
|
||||
ON)
|
||||
option(USING_OSS_CUTLASS_ALLREDUCE_GEMM
|
||||
"Using open sourced Cutlass AR gemm kernel" ON)
|
||||
option(SKIP_SOFTMAX_STAT "Enable Statistics of Skip-Softmax" OFF)
|
||||
|
||||
message(STATUS "ENABLE_NVSHMEM is ${ENABLE_NVSHMEM}")
|
||||
|
||||
@ -360,6 +361,11 @@ else()
|
||||
$<$<COMPILE_LANGUAGE:CUDA>:ENABLE_NVSHMEM=0>)
|
||||
endif()
|
||||
|
||||
if(SKIP_SOFTMAX_STAT)
|
||||
add_compile_definitions("SKIP_SOFTMAX_STAT")
|
||||
message(STATUS "SKIP_SOFTMAX_STAT is enabled")
|
||||
endif()
|
||||
|
||||
# Fix linking issue with TRT 10, the detailed description about `--mcmodel` can
|
||||
# be found in
|
||||
# https://gcc.gnu.org/onlinedocs/gcc/x86-Options.html#index-mcmodel_003dmedium-1
|
||||
|
||||
@ -69,6 +69,11 @@ PREPROCESSOR_FLAGS += -DUSE_SAME_SUM_ORDER_IN_SOFTMAX_AS_REF_CODE
|
||||
# Do we want to use half accumulation for flash attention
|
||||
PREPROCESSOR_FLAGS += -DHALF_ACCUMULATION_FOR_FLASH_ATTENTION
|
||||
|
||||
# Print the resulted sparsity given threshold in Skip-Softmax attention
|
||||
# Note: You only need to "python scripts/build_wheel.py -D SKIP_SOFTMAX_STAT=ON ..." to use it inside TRTLLM.
|
||||
# Turn this on manually only if you want to build&run the unittest (bin/fmha.exe) with SKIP_SOFTMAX_STAT.
|
||||
# PREPROCESSOR_FLAGS += -DSKIP_SOFTMAX_STAT
|
||||
|
||||
# Add FLAGS when generating cubins.
|
||||
ifdef GENERATE_CUBIN
|
||||
PREPROCESSOR_FLAGS += -DGENERATE_CUBIN
|
||||
|
||||
@ -154,7 +154,9 @@ spec_fields = (
|
||||
'head_size_v',
|
||||
'sage_block_sizes',
|
||||
'output_dtype',
|
||||
'is_mtp')
|
||||
'is_mtp',
|
||||
'enable_skip_softmax',
|
||||
)
|
||||
kernel_spec = namedtuple('kernel_spec', spec_fields)
|
||||
kernel_spec.__new__.__defaults__ = (
|
||||
1, # ctas_per_head
|
||||
@ -179,7 +181,9 @@ kernel_spec.__new__.__defaults__ = (
|
||||
0, # head size of V
|
||||
None, # sage_block_sizes
|
||||
None, # output_dtype, same as dtype by default.
|
||||
False) # use MTP or not
|
||||
False, # use MTP or not
|
||||
False, # enable skip softmax
|
||||
)
|
||||
|
||||
generate_cu_trtllm = os.environ.get('GENERATE_CU_TRTLLM',
|
||||
'False').lower() == 'true'
|
||||
@ -1435,6 +1439,7 @@ using Ktraits = {kernel_traits_header}
|
||||
USE_TMA_STORE,
|
||||
{enable_attn_logit_softcapping_flag},
|
||||
{return_softmax_stats_flag},
|
||||
{enable_skip_softmax_flag},
|
||||
{output_dtype_},
|
||||
{sage_block_size_q},
|
||||
{sage_block_size_k},
|
||||
@ -1458,6 +1463,7 @@ using Ktraits_causal = {kernel_traits_header}
|
||||
USE_TMA_STORE,
|
||||
{enable_attn_logit_softcapping_flag},
|
||||
{return_softmax_stats_flag},
|
||||
{enable_skip_softmax_flag},
|
||||
{output_dtype_}>;
|
||||
|
||||
using Ktraits_sliding_or_chunked_causal = {kernel_traits_header}
|
||||
@ -1478,6 +1484,7 @@ using Ktraits_sliding_or_chunked_causal = {kernel_traits_header}
|
||||
USE_TMA_STORE && false,
|
||||
{enable_attn_logit_softcapping_flag},
|
||||
{return_softmax_stats_flag},
|
||||
{enable_skip_softmax_flag},
|
||||
{output_dtype_}>;
|
||||
|
||||
using Ktraits_custom_mask = {kernel_traits_header}
|
||||
@ -1498,6 +1505,7 @@ using Ktraits_custom_mask = {kernel_traits_header}
|
||||
USE_TMA_STORE && false,
|
||||
{enable_attn_logit_softcapping_flag},
|
||||
{return_softmax_stats_flag},
|
||||
{enable_skip_softmax_flag},
|
||||
{output_dtype_}>;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -1835,6 +1843,8 @@ def encode_name(kernel_spec):
|
||||
|
||||
if kernel_spec.enable_attn_logit_softcapping:
|
||||
feature_tags += '_softcapping'
|
||||
if kernel_spec.enable_skip_softmax:
|
||||
feature_tags += '_skipSoftmax'
|
||||
if kernel_spec.sage_block_sizes:
|
||||
feature_tags += f"_sage_{'_'.join(map(str, kernel_spec.sage_block_sizes))}"
|
||||
if kernel_spec.output_dtype:
|
||||
@ -2131,6 +2141,8 @@ def get_kernel_code(kspec, kname, lname):
|
||||
|
||||
return_softmax_stats_flag = pythonBoolean2cpp[kspec.return_softmax_stats]
|
||||
|
||||
enable_skip_softmax_flag = pythonBoolean2cpp[kspec.enable_skip_softmax]
|
||||
|
||||
# needed by warpspec kernels.
|
||||
fp8_kernel = kspec.dtype in ["e4m3", "e4m3_fp32"]
|
||||
kernel_traits_header = "fmha::ws::Kernel_traits_Hopper_qgmma_e4m3_fp32<" if fp8_kernel \
|
||||
@ -2331,6 +2343,8 @@ def get_api_code(specs_names):
|
||||
f'&& sage_block_size_k == {sage_block_size_k} ' \
|
||||
f'&& sage_block_size_v == {sage_block_size_v} '
|
||||
|
||||
il_check += '&& enable_skip_softmax ' if kspec.enable_skip_softmax else '&& !enable_skip_softmax '
|
||||
|
||||
il_check += '&& params.use_int8_scale_max ' if kspec.has_scale_max else '&& !params.use_int8_scale_max '
|
||||
|
||||
slen = kspec.seq_len * kspec.ctas_per_head if not kspec.flash_attention else 0
|
||||
@ -2607,6 +2621,7 @@ const bool warp_specialization = launch_params.warp_specialization
|
||||
const bool use_tma = launch_params.use_tma;
|
||||
const bool use_flash_attention = launch_params.flash_attention;
|
||||
const bool enable_attn_logit_softcapping = launch_params.enable_attn_logit_softcapping;
|
||||
const bool enable_skip_softmax = launch_params.enable_skip_softmax;
|
||||
const int attention_input_layout = static_cast<int>(launch_params.attention_input_layout);
|
||||
// tiled variant uses ldgsts
|
||||
const bool use_tiled = launch_params.use_granular_tiling;
|
||||
@ -2785,6 +2800,8 @@ def get_kernel_traits_code(specs_names):
|
||||
enable_attn_logit_softcapping_flag = pythonBoolean2cpp[
|
||||
kspec.enable_attn_logit_softcapping]
|
||||
|
||||
enable_skip_softmax_flag = pythonBoolean2cpp[kspec.enable_skip_softmax]
|
||||
|
||||
tmp = dict(locals(), **kspec._asdict())
|
||||
|
||||
if effective_sm < 90:
|
||||
@ -2903,7 +2920,8 @@ def get_kernel_traits_code(specs_names):
|
||||
{input_layout_flag},
|
||||
__use_tma_store__ /* USE_TMA_STORE */,
|
||||
{enable_attn_logit_softcapping_flag},
|
||||
{return_softmax_stats_flag}>;
|
||||
{return_softmax_stats_flag},
|
||||
{enable_skip_softmax_flag}>;
|
||||
|
||||
printf("%s %d %d %s %d %d\\n",
|
||||
\"{kname}\",
|
||||
@ -3062,9 +3080,16 @@ def get_kernel_traits_code(specs_names):
|
||||
# For now:
|
||||
# 1. Hopper head_size 128 kernel uses cubins for performance regressions.
|
||||
# 2. Hopper sm89 with e4m3/e4m3_fp32 dtype uses cubins for accuracy regressions (will be fixed).
|
||||
# 3. For skip-softmax attention feature, we force not to use cubins.
|
||||
# You should set the condition `use_cubin_header` to false if you have modified the source codes of those kernels that use cubins.
|
||||
# This ensures that the kernels will be recompiled using the updated source code rather than relying on precompiled cubins.
|
||||
def use_cubin_header(sm, head_size, dtype, output_dtype=None):
|
||||
def use_cubin_header(sm,
|
||||
head_size,
|
||||
dtype,
|
||||
output_dtype=None,
|
||||
enable_skip_softmax=False):
|
||||
if enable_skip_softmax:
|
||||
return False
|
||||
if 'e4m3' in dtype and output_dtype in ['bf16', 'fp16']:
|
||||
return False
|
||||
return (sm == 90 and head_size == 128) or (sm == 89 and 'e4m3' in dtype)
|
||||
@ -3079,7 +3104,8 @@ def get_cubin_header(kernel_traits, specs_names):
|
||||
launchers_dict = {}
|
||||
for kspec, fname, lname, kname in specs_names:
|
||||
if generate_cu_trtllm and not use_cubin_header(
|
||||
kspec.sm, kspec.head_size, kspec.dtype, kspec.output_dtype):
|
||||
kspec.sm, kspec.head_size, kspec.dtype, kspec.output_dtype,
|
||||
kspec.enable_skip_softmax):
|
||||
continue
|
||||
name = fname.replace('.', '_')
|
||||
data = 'extern unsigned char cubin_{name}_cubin[];'.format(name=name)
|
||||
@ -3111,8 +3137,9 @@ def get_cubin_header(kernel_traits, specs_names):
|
||||
'q_kv_', '').replace('q_paged_kv_', '').replace(
|
||||
'q_k_v_', '').replace('ws_', '').replace(
|
||||
'softcapping_',
|
||||
'').replace('sage_',
|
||||
'').replace('output_', ''))
|
||||
'').replace('sage_', '').replace(
|
||||
'skipSoftmax_',
|
||||
'').replace('output_', ''))
|
||||
flash_attention = 'flash_attention' in kname
|
||||
warp_specialization = 'tma_ws' in kname
|
||||
toks = tname.split('_')
|
||||
@ -3209,6 +3236,8 @@ def get_cubin_header(kernel_traits, specs_names):
|
||||
return_softmax_stats_flag = pythonBoolean2cpp[sm != '90' or (
|
||||
sm == '90' and '_softmax' in kname)]
|
||||
|
||||
enable_skip_softmax_flag = pythonBoolean2cpp['_skipSoftmax' in kname]
|
||||
|
||||
# meta_unroll_step
|
||||
meta_unroll_step = unroll_step if ('_nl' in kname
|
||||
or '_ws' in kname) else '0'
|
||||
@ -3235,7 +3264,8 @@ def get_cubin_header(kernel_traits, specs_names):
|
||||
|
||||
def get_lname_from_kname(kname: str) -> str:
|
||||
if use_cubin_header(int(sm), int(head_size), prec.lower(),
|
||||
output_prec.lower()):
|
||||
output_prec.lower(),
|
||||
enable_skip_softmax_flag):
|
||||
return 'nullptr'
|
||||
lname = kname.replace('_kernel', '')
|
||||
mask_types = [
|
||||
@ -3253,15 +3283,15 @@ def get_cubin_header(kernel_traits, specs_names):
|
||||
{sage_block_sizes[0]}, {sage_block_sizes[1]}, {sage_block_sizes[2]}, kSM_{sm}, {cubin_name}, \
|
||||
{cubin_name}_len, \"{kname}\", {smem}, {threads}, {meta_unroll_step}, {attention_mask_type_value}, \
|
||||
{attention_input_layout_value}, {is_il}, {is_flash_atten}, {is_warp_specialization}, {is_fp32_accu}, \
|
||||
{is_alibi_supported}, {is_tiled}, {has_softcapping_scale}, {return_softmax_stats_flag}, {lname}}}\
|
||||
'''.format(**locals()) if use_cubin_header(int(sm),
|
||||
int(head_size), prec.lower(),
|
||||
output_prec.lower()) else '''\
|
||||
{is_alibi_supported}, {is_tiled}, {has_softcapping_scale}, {return_softmax_stats_flag}, {enable_skip_softmax_flag}, {lname}}}\
|
||||
'''.format(**locals()) if use_cubin_header(int(sm), int(head_size),
|
||||
prec.lower(), output_prec.lower(),
|
||||
enable_skip_softmax_flag) else '''\
|
||||
{{ DATA_TYPE_{prec}, DATA_TYPE_{output_prec}, {seq_len}, {q_step}, {kv_step}, {head_size}, {head_size_v}, \
|
||||
{sage_block_sizes[0]}, {sage_block_sizes[1]}, {sage_block_sizes[2]}, kSM_{sm}, nullptr, \
|
||||
0, \"{kname}\", {smem}, {threads}, {meta_unroll_step}, {attention_mask_type_value}, \
|
||||
{attention_input_layout_value}, {is_il}, {is_flash_atten}, {is_warp_specialization}, {is_fp32_accu}, \
|
||||
{is_alibi_supported}, {is_tiled}, {has_softcapping_scale}, {return_softmax_stats_flag}, {lname}}}\
|
||||
{is_alibi_supported}, {is_tiled}, {has_softcapping_scale}, {return_softmax_stats_flag}, {enable_skip_softmax_flag}, {lname}}}\
|
||||
'''.format(**locals())
|
||||
else:
|
||||
code = '''\
|
||||
@ -3269,7 +3299,7 @@ def get_cubin_header(kernel_traits, specs_names):
|
||||
{sage_block_sizes[0]}, {sage_block_sizes[1]}, {sage_block_sizes[2]}, kSM_{sm}, {cubin_name}, \
|
||||
{cubin_name}_len, \"{kname}\", {smem}, {threads}, {meta_unroll_step}, {attention_mask_type_value}, \
|
||||
{attention_input_layout_value}, {is_il}, {is_flash_atten}, {is_warp_specialization}, {is_fp32_accu}, \
|
||||
{is_alibi_supported}, {is_tiled}, {has_softcapping_scale}, {return_softmax_stats_flag}}}\
|
||||
{is_alibi_supported}, {is_tiled}, {has_softcapping_scale}, {return_softmax_stats_flag}, {enable_skip_softmax_flag}}}\
|
||||
'''.format(**locals())
|
||||
if sm in metadata_v2_dict:
|
||||
metadata_v2_dict[sm].append(code)
|
||||
@ -3377,7 +3407,8 @@ static const struct FusedMultiHeadAttentionKernelMetaInfoV2
|
||||
bool mAlibiSupported;
|
||||
bool mTiled;
|
||||
bool mEnableAttnLogitSoftcapping;
|
||||
bool mReturnSoftmaxStats;{launcher_line}
|
||||
bool mReturnSoftmaxStats;
|
||||
bool mEnableSkipSoftmax;{launcher_line}
|
||||
}} sMhaKernelMetaInfosV2[] = {{
|
||||
{metadata_v2}
|
||||
}};
|
||||
@ -3438,6 +3469,7 @@ static const struct TestMetaV2
|
||||
bool mTiled;
|
||||
bool mEnableAttnLogitSoftcapping;
|
||||
bool mReturnSoftmaxStats;
|
||||
bool mEnableSkipSoftmax;
|
||||
}} metaV2[] = {{
|
||||
{metadata_v2}
|
||||
}};
|
||||
@ -3484,7 +3516,8 @@ struct FusedMultiHeadAttentionKernelMetaInfoV2
|
||||
bool mAlibiSupported;
|
||||
bool mTiled;
|
||||
bool mEnableAttnLogitSoftcapping;
|
||||
bool mReturnSoftmaxStats;{launcher_line}
|
||||
bool mReturnSoftmaxStats;
|
||||
bool mEnableSkipSoftmax;{launcher_line}
|
||||
}};
|
||||
|
||||
extern const FusedMultiHeadAttentionKernelMetaInfoV2 sMhaKernelMetaInfosV2[];
|
||||
@ -3580,7 +3613,8 @@ struct FusedMultiHeadAttentionKernelMetaInfoV2
|
||||
bool mAlibiSupported;
|
||||
bool mTiled;
|
||||
bool mEnableAttnLogitSoftcapping;
|
||||
bool mReturnSoftmaxStats;{launcher_line}
|
||||
bool mReturnSoftmaxStats;
|
||||
bool mEnableSkipSoftmax;{launcher_line}
|
||||
}};
|
||||
|
||||
extern const FusedMultiHeadAttentionKernelMetaInfoV2 sMhaKernelMetaInfosV2[] = {{
|
||||
@ -3637,7 +3671,7 @@ extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_sm80_
|
||||
return '\n'.join(lines)
|
||||
|
||||
target = "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_causal_sm80_kernel_nl_tiled"
|
||||
new_line = '{ DATA_TYPE_FP16, DATA_TYPE_FP16, 0, 64, 128, 128, 128, 0, 0, 0, kSM_80, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, false, true, true, false, true, nullptr},'
|
||||
new_line = '{ DATA_TYPE_FP16, DATA_TYPE_FP16, 0, 64, 128, 128, 128, 0, 0, 0, kSM_80, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, false, true, true, false, true, false, nullptr},'
|
||||
result = modify_kernel_line(result, target, new_line)
|
||||
|
||||
# make sure only one empty line at the end
|
||||
@ -3801,7 +3835,10 @@ def enumerate_hgmma_ldgsts_kernels(specs, sm=90, dtype='fp16'):
|
||||
|
||||
|
||||
# Note this will be used in TRT-LLM.
|
||||
def enumerate_hgmma_flash_warpspec_kernels(specs, sm=90, dtype='fp16'):
|
||||
def enumerate_hgmma_flash_warpspec_kernels(specs,
|
||||
sm=90,
|
||||
dtype='fp16',
|
||||
enable_skip_softmax=False):
|
||||
|
||||
scheduling_mode = int(os.getenv('SCHEDULING_MODE', '1'))
|
||||
|
||||
@ -3851,7 +3888,8 @@ def enumerate_hgmma_flash_warpspec_kernels(specs, sm=90, dtype='fp16'):
|
||||
enable_attn_logit_softcapping=enable_attn_logit_softcapping,
|
||||
return_softmax_stats=return_softmax,
|
||||
scheduling_mode=scheduling_mode,
|
||||
input_layout=input_layout))
|
||||
input_layout=input_layout,
|
||||
enable_skip_softmax=enable_skip_softmax))
|
||||
|
||||
specs.append(
|
||||
kernel_spec(
|
||||
@ -3883,7 +3921,8 @@ def enumerate_hgmma_flash_warpspec_kernels(specs, sm=90, dtype='fp16'):
|
||||
enable_attn_logit_softcapping=enable_attn_logit_softcapping,
|
||||
return_softmax_stats=return_softmax,
|
||||
scheduling_mode=scheduling_mode,
|
||||
input_layout=input_layout))
|
||||
input_layout=input_layout,
|
||||
enable_skip_softmax=enable_skip_softmax))
|
||||
|
||||
specs.append(
|
||||
kernel_spec(
|
||||
@ -3915,7 +3954,8 @@ def enumerate_hgmma_flash_warpspec_kernels(specs, sm=90, dtype='fp16'):
|
||||
enable_attn_logit_softcapping=enable_attn_logit_softcapping,
|
||||
return_softmax_stats=return_softmax,
|
||||
scheduling_mode=scheduling_mode,
|
||||
input_layout=input_layout))
|
||||
input_layout=input_layout,
|
||||
enable_skip_softmax=enable_skip_softmax))
|
||||
'''
|
||||
smem size = (q_step * d * q_buffers * NUM_COMPUTE_GROUPS
|
||||
+ (kv_step * d + kv_step * dv) * kv_buffers) * ele_size
|
||||
@ -3967,7 +4007,8 @@ def enumerate_qgmma_flash_warpspec_kernels(specs,
|
||||
sm=90,
|
||||
dtype='e4m3',
|
||||
sage_block_sizes=None,
|
||||
output_dtype=None):
|
||||
output_dtype=None,
|
||||
enable_skip_softmax=False):
|
||||
|
||||
scheduling_mode = int(os.getenv('SCHEDULING_MODE', '1'))
|
||||
|
||||
@ -4021,7 +4062,8 @@ def enumerate_qgmma_flash_warpspec_kernels(specs,
|
||||
scheduling_mode=scheduling_mode,
|
||||
input_layout=input_layout,
|
||||
sage_block_sizes=sage_block_sizes,
|
||||
output_dtype=output_dtype))
|
||||
output_dtype=output_dtype,
|
||||
enable_skip_softmax=enable_skip_softmax))
|
||||
|
||||
# 64 < D <=128: KV_STEP = 128
|
||||
specs.append(
|
||||
@ -4056,7 +4098,8 @@ def enumerate_qgmma_flash_warpspec_kernels(specs,
|
||||
scheduling_mode=scheduling_mode,
|
||||
input_layout=input_layout,
|
||||
sage_block_sizes=sage_block_sizes,
|
||||
output_dtype=output_dtype))
|
||||
output_dtype=output_dtype,
|
||||
enable_skip_softmax=enable_skip_softmax))
|
||||
|
||||
# 128 < D <=256: KV_STEP = 128
|
||||
specs.append(
|
||||
@ -4092,7 +4135,8 @@ def enumerate_qgmma_flash_warpspec_kernels(specs,
|
||||
scheduling_mode=scheduling_mode,
|
||||
input_layout=input_layout,
|
||||
sage_block_sizes=sage_block_sizes,
|
||||
output_dtype=output_dtype))
|
||||
output_dtype=output_dtype,
|
||||
enable_skip_softmax=enable_skip_softmax))
|
||||
|
||||
if not skip_mla_combination:
|
||||
# context MLA (192x128)
|
||||
@ -6374,13 +6418,21 @@ def enumerate_kernels():
|
||||
enumerate_igmma_kernels(specs, sm=90)
|
||||
enumerate_qgmma_kernels(specs, sm=90)
|
||||
# need to add bf16 kernels if needed
|
||||
enumerate_hgmma_flash_warpspec_kernels(specs, sm=90, dtype='fp16')
|
||||
enumerate_hgmma_flash_warpspec_kernels(specs, sm=90, dtype='bf16')
|
||||
enumerate_qgmma_flash_warpspec_kernels(specs, sm=90, dtype='e4m3')
|
||||
enumerate_qgmma_flash_warpspec_kernels(specs,
|
||||
sm=90,
|
||||
dtype='e4m3',
|
||||
output_dtype="bf16")
|
||||
for enable_skip_softmax in [False, True]:
|
||||
if enable_skip_softmax and 'DISABLE_SKIP_SOFTMAX' in os.environ:
|
||||
continue
|
||||
enumerate_hgmma_flash_warpspec_kernels(
|
||||
specs, sm=90, dtype='fp16', enable_skip_softmax=enable_skip_softmax)
|
||||
enumerate_hgmma_flash_warpspec_kernels(
|
||||
specs, sm=90, dtype='bf16', enable_skip_softmax=enable_skip_softmax)
|
||||
enumerate_qgmma_flash_warpspec_kernels(
|
||||
specs, sm=90, dtype='e4m3', enable_skip_softmax=enable_skip_softmax)
|
||||
enumerate_qgmma_flash_warpspec_kernels(
|
||||
specs,
|
||||
sm=90,
|
||||
dtype='e4m3',
|
||||
output_dtype="bf16",
|
||||
enable_skip_softmax=enable_skip_softmax)
|
||||
|
||||
# For now SageAttention only needs BF16
|
||||
# block_size_q should be divisible by 64
|
||||
|
||||
@ -256,7 +256,8 @@ struct Compute
|
||||
actual_kv_seqlen, alibi_head_scale, \
|
||||
USE_CUSTOM_MASK ? (head_info.mask_sum_s + q_step_idx * STEP_Q + local_q_tile_offset) \
|
||||
: (q_step_idx * STEP_Q + head_info.q_tile_offset), \
|
||||
kv_step_idx * STEP_KV, sage_scale_row, cbr, cbr_v, mutex_accessor, kv_step_idx == kv_idx_end - 1);
|
||||
kv_step_idx * STEP_KV, sage_scale_row, cbr, cbr_v, mutex_accessor, \
|
||||
&shared->skip_softmax_votes[kv_step_idx & 1][warpgroup_id], kv_step_idx == kv_idx_end - 1);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
@ -360,6 +361,12 @@ struct Compute
|
||||
// Contiguous QKV FMHA assumes q, and kv have the same sequence length.
|
||||
int const actual_kv_seqlen = SEPARATE_Q_KV_BUFFER ? head_info.actual_kv_seqlen : actual_q_seqlen;
|
||||
|
||||
// Update threshold of Skip-Softmax
|
||||
if constexpr (Kernel_traits::ENABLE_SKIP_SOFTMAX)
|
||||
{
|
||||
softmax.skip_softmax_threshold = params.skip_softmax_threshold_scale_factor / actual_kv_seqlen;
|
||||
}
|
||||
|
||||
// Calculate the alibi head_scaling_factor.
|
||||
float alibi_head_scale
|
||||
= APPLY_ALIBI ? get_alibi_head_scaling_factor<AlibiParams>(head_info.bidh, params.alibi_params) : 0.f;
|
||||
@ -513,6 +520,13 @@ struct Compute
|
||||
}
|
||||
}
|
||||
}
|
||||
#ifdef SKIP_SOFTMAX_STAT
|
||||
if (tidx == 0)
|
||||
{
|
||||
atomicAdd(params.skip_softmax_total_blocks, softmax.total_blocks);
|
||||
atomicAdd(params.skip_softmax_skipped_blocks, softmax.skipped_blocks);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -522,8 +536,15 @@ struct Compute
|
||||
Compute_tile_o& ctile_o, float (&p_max)[Mma_tile_p::CORES_M], float (&p_sum)[Mma_tile_p::CORES_M],
|
||||
int const tidx, int const actual_kv_seqlen, float const alibi_head_scale, int const row_offset,
|
||||
int const col_offset, int const sage_scale_row, Circular_buffer_q_reader& cbr, Circular_buffer_kv_reader& cbr_v,
|
||||
OrderedMutexAccessor& mutex, bool complete = false)
|
||||
OrderedMutexAccessor& mutex, uint32_t* skip_softmax_vote, bool complete = false)
|
||||
{
|
||||
|
||||
// Skip-softmax vote initialization
|
||||
if (tidx == 0)
|
||||
{
|
||||
// Note that we need a named_barrier_wait in compute_single_tile to make sure init is before voting.
|
||||
*skip_softmax_vote = 1;
|
||||
}
|
||||
// load the scales of K/V from global memory
|
||||
#define LOAD_SCALES_KV(dst, which, blocks_per_step, block_size) \
|
||||
if constexpr (block_size > 0) \
|
||||
@ -557,6 +578,10 @@ struct Compute
|
||||
// Ctile_p is only used once by each n step.
|
||||
ctile_p.clear();
|
||||
|
||||
// If skip_softmax is enabled, make sure there is no racing between the initialization and writing of
|
||||
// skip_softmax_vote.
|
||||
named_barrier_wait(Kernel_traits::SKIP_SOFTMAX_BARRIER_ID + threadIdx.x / 128, 128);
|
||||
|
||||
// BMM1 (Q x K').
|
||||
warpgroup_arrive();
|
||||
|
||||
@ -626,8 +651,22 @@ struct Compute
|
||||
softmax.apply_alibi_and_mask<APPLY_MASK>(
|
||||
ctile_p, params.alibi_params, alibi_head_scale, actual_kv_seqlen, row_offset, col_offset);
|
||||
|
||||
// Softmax Exp, max/sum, and update scales.
|
||||
softmax.compute_and_update_scale<IS_FIRST_COL>(p_max, p_sum);
|
||||
// Softmax Exp, max/sum, and update scales. If returns false we skip the rest.
|
||||
if (!softmax.compute_and_update_scale<IS_FIRST_COL>(p_max, p_sum, skip_softmax_vote))
|
||||
{
|
||||
if constexpr (ENABLE_MUTEX && Kernel_traits::ELEMENT_BYTES == 1)
|
||||
{
|
||||
// Notify another warpgroup to execute QGMMA.
|
||||
mutex.named_bar_arrive();
|
||||
}
|
||||
// Need to wait V, otherwise compute-sanitizer synccheck will fail.
|
||||
int ready2 = cbr_v.peek();
|
||||
if (!ready2)
|
||||
{
|
||||
cbr_v.wait();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// experiments show that here is the best place to load scales of V
|
||||
float scales_v[SAGE_BLOCKS_PER_STEP_V];
|
||||
|
||||
@ -17,6 +17,8 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "fmha/hopper/arrive_wait.h"
|
||||
|
||||
#include <fmha/softmax.h>
|
||||
#include <fmha/traits.h>
|
||||
#include <fmha/utils.h>
|
||||
@ -104,6 +106,12 @@ struct Softmax_base
|
||||
CHECK_IF_NEG_INF_EXISTS = SLIDING_OR_CHUNKED_ATTENTION || USE_CUSTOM_MASK
|
||||
};
|
||||
|
||||
// There are 2 warpgroups so 0x3 and 0x4 are used
|
||||
enum
|
||||
{
|
||||
SKIP_SOFTMAX_BARRIER = Kernel_traits::SKIP_SOFTMAX_BARRIER_ID
|
||||
};
|
||||
|
||||
// Ctor.
|
||||
template <typename Params>
|
||||
inline __device__ Softmax_base(Params params, int tidx)
|
||||
@ -114,6 +122,11 @@ struct Softmax_base
|
||||
, log2_chunked_attention_size_(params.log2_chunked_attention_size)
|
||||
, packed_mask_ptr_{reinterpret_cast<uint32_t*>(params.packed_mask_ptr)}
|
||||
, params_packed_mask_stride_in_bytes_{params.packed_mask_stride_in_bytes}
|
||||
#ifdef SKIP_SOFTMAX_STAT
|
||||
, total_blocks(0)
|
||||
, skipped_blocks(0)
|
||||
#endif
|
||||
, skip_softmax_threshold(0)
|
||||
{
|
||||
|
||||
int warp = tidx / 32;
|
||||
@ -330,24 +343,22 @@ struct Softmax_base
|
||||
}
|
||||
|
||||
// Calculate max/sum, and update flash-attention scales.
|
||||
// Returns false if skipped due to skip-softmax attention feature.
|
||||
template <bool IS_FIRST_COL>
|
||||
inline __device__ void compute_and_update_scale(
|
||||
float (&global_max)[Mma_tile_p::CORES_M], float (&global_sum)[Mma_tile_p::CORES_M])
|
||||
inline __device__ bool compute_and_update_scale(
|
||||
float (&global_max)[Mma_tile_p::CORES_M], float (&global_sum)[Mma_tile_p::CORES_M], uint32_t* skip_softmax_vote)
|
||||
{
|
||||
float const scale = reinterpret_cast<float const&>(scale_bmm1_);
|
||||
|
||||
// whether this warpgroup skips the softmax
|
||||
constexpr bool may_skip = Kernel_traits::ENABLE_SKIP_SOFTMAX && !IS_FIRST_COL;
|
||||
bool skip = may_skip;
|
||||
|
||||
// Row-wise max of current tile.
|
||||
#pragma unroll
|
||||
for (int mi = 0; mi < Mma_tile_p::CORES_M; mi++)
|
||||
{
|
||||
if (IS_FIRST_COL)
|
||||
{
|
||||
local_max_[mi] = elt_[mi][0];
|
||||
}
|
||||
else
|
||||
{
|
||||
local_max_[mi] = fmaxf(global_max[mi], elt_[mi][0]);
|
||||
}
|
||||
local_max_[mi] = elt_[mi][0];
|
||||
#pragma unroll
|
||||
for (int ni = 1; ni < Mma_tile_p::CORES_N * 2; ni++)
|
||||
{
|
||||
@ -355,6 +366,56 @@ struct Softmax_base
|
||||
}
|
||||
local_max_[mi] = fmaxf(__shfl_xor_sync(uint32_t(-1), local_max_[mi], 1), local_max_[mi]);
|
||||
local_max_[mi] = fmaxf(__shfl_xor_sync(uint32_t(-1), local_max_[mi], 2), local_max_[mi]);
|
||||
|
||||
if constexpr (may_skip)
|
||||
{
|
||||
// AND(&) the CORES_M results, then `skip` means whether to skip
|
||||
// the CORES_M(=2) rows
|
||||
if constexpr (!EXP2F_OPTIMIZATION)
|
||||
{
|
||||
skip &= expf(local_max_[mi] - global_max[mi]) < skip_softmax_threshold;
|
||||
}
|
||||
else
|
||||
{
|
||||
skip &= exp2f((local_max_[mi] - global_max[mi]) * scale) < skip_softmax_threshold;
|
||||
}
|
||||
}
|
||||
|
||||
if (!IS_FIRST_COL)
|
||||
{
|
||||
local_max_[mi] = fmaxf(local_max_[mi], global_max[mi]);
|
||||
}
|
||||
}
|
||||
|
||||
if constexpr (Kernel_traits::ENABLE_SKIP_SOFTMAX)
|
||||
{
|
||||
#ifdef SKIP_SOFTMAX_STAT
|
||||
total_blocks++;
|
||||
#endif
|
||||
if constexpr (may_skip)
|
||||
{
|
||||
|
||||
// AND(&) the results together in a warp, then `skip` means whether to skip
|
||||
// all the 16 rows managed by this warp.
|
||||
// each 4 threads (e.g. T0~T3) have the same `skip`, only 0x11111111 is needed
|
||||
// instead of 0xffffffff. But the perf is the same.
|
||||
skip = __all_sync(0xffffffff, skip);
|
||||
if (threadIdx.x % 32 == 0)
|
||||
{
|
||||
// The leader of each warp votes.
|
||||
atomicAnd(skip_softmax_vote, uint32_t(skip));
|
||||
}
|
||||
// WG0 uses 0x3 barrier, WG1 uses 0x4 barrier
|
||||
named_barrier_wait(SKIP_SOFTMAX_BARRIER + threadIdx.x / 128, 128);
|
||||
skip = *((uint32_t volatile*) skip_softmax_vote);
|
||||
if (skip)
|
||||
{
|
||||
#ifdef SKIP_SOFTMAX_STAT
|
||||
skipped_blocks++;
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Softmax Exp.
|
||||
@ -436,6 +497,7 @@ struct Softmax_base
|
||||
global_max[mi] = max_new;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Update flash attention scales and pack elements for BMM2.
|
||||
@ -513,6 +575,13 @@ struct Softmax_base
|
||||
float correction_[Mma_tile_p::CORES_M];
|
||||
// The packed mask.
|
||||
uint4 packed_mask_;
|
||||
// Skip softmax when exp(local_max - global_max) < skip_softmax_threshold.
|
||||
float skip_softmax_threshold;
|
||||
#ifdef SKIP_SOFTMAX_STAT
|
||||
// Statistics of skip-softmax
|
||||
uint32_t total_blocks;
|
||||
uint32_t skipped_blocks;
|
||||
#endif
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -868,9 +937,10 @@ struct Softmax<Hopper_qgmma_e4m3_fp32_traits, Kernel_traits>
|
||||
}
|
||||
|
||||
// Calculate max/sum, and update flash-attention scales.
|
||||
// Returns false if skipped due to skip-softmax attention feature.
|
||||
template <bool IS_FIRST_COL>
|
||||
inline __device__ void compute_and_update_scale(
|
||||
float (&global_max)[Mma_tile_p::CORES_M], float (&global_sum)[Mma_tile_p::CORES_M])
|
||||
inline __device__ bool compute_and_update_scale(
|
||||
float (&global_max)[Mma_tile_p::CORES_M], float (&global_sum)[Mma_tile_p::CORES_M], uint32_t* skip_softmax_vote)
|
||||
{
|
||||
float const scale = reinterpret_cast<float const&>(this->scale_bmm1_);
|
||||
float(&local_max_)[Mma_tile_p::CORES_M] = this->local_max_;
|
||||
@ -878,18 +948,15 @@ struct Softmax<Hopper_qgmma_e4m3_fp32_traits, Kernel_traits>
|
||||
float(&correction_)[Mma_tile_p::CORES_M] = this->correction_;
|
||||
float(&elt_)[Mma_tile_p::CORES_M][Mma_tile_p::CORES_N * 2] = this->elt_;
|
||||
|
||||
// whether this warpgroup skips the softmax
|
||||
constexpr bool may_skip = Kernel_traits::ENABLE_SKIP_SOFTMAX && !IS_FIRST_COL;
|
||||
bool skip = may_skip;
|
||||
|
||||
// Row-wise max of current tile.
|
||||
#pragma unroll
|
||||
for (int mi = 0; mi < Mma_tile_p::CORES_M; mi++)
|
||||
{
|
||||
if (IS_FIRST_COL)
|
||||
{
|
||||
local_max_[mi] = elt_[mi][0];
|
||||
}
|
||||
else
|
||||
{
|
||||
local_max_[mi] = fmaxf(global_max[mi], elt_[mi][0]);
|
||||
}
|
||||
local_max_[mi] = elt_[mi][0];
|
||||
#pragma unroll
|
||||
for (int ni = 1; ni < Mma_tile_p::CORES_N * 2; ni++)
|
||||
{
|
||||
@ -897,6 +964,56 @@ struct Softmax<Hopper_qgmma_e4m3_fp32_traits, Kernel_traits>
|
||||
}
|
||||
local_max_[mi] = fmaxf(__shfl_xor_sync(uint32_t(-1), local_max_[mi], 1), local_max_[mi]);
|
||||
local_max_[mi] = fmaxf(__shfl_xor_sync(uint32_t(-1), local_max_[mi], 2), local_max_[mi]);
|
||||
// AND(&) the CORES_M results, then `skip` means whether to skip
|
||||
// the CORES_M(=2) rows
|
||||
if constexpr (may_skip)
|
||||
{
|
||||
// AND(&) the CORES_M results, then `skip` means whether to skip
|
||||
// the CORES_M(=2) rows
|
||||
if constexpr (!EXP2F_OPTIMIZATION)
|
||||
{
|
||||
skip &= expf(local_max_[mi] - global_max[mi]) < this->skip_softmax_threshold;
|
||||
}
|
||||
else
|
||||
{
|
||||
skip &= exp2f((local_max_[mi] - global_max[mi]) * scale) < this->skip_softmax_threshold;
|
||||
}
|
||||
}
|
||||
if (!IS_FIRST_COL)
|
||||
{
|
||||
local_max_[mi] = fmaxf(local_max_[mi], global_max[mi]);
|
||||
}
|
||||
}
|
||||
|
||||
if constexpr (Kernel_traits::ENABLE_SKIP_SOFTMAX)
|
||||
{
|
||||
#ifdef SKIP_SOFTMAX_STAT
|
||||
this->total_blocks++;
|
||||
#endif
|
||||
|
||||
if constexpr (may_skip)
|
||||
{
|
||||
// AND(&) the results together in a warp, then `skip` means whether to skip
|
||||
// all the 16 rows managed by this warp.
|
||||
// each 4 threads (e.g. T0~T3) have the same `skip`, only 0x11111111 is needed
|
||||
// instead of 0xffffffff. But the perf is the same.
|
||||
skip = __all_sync(0xffffffff, skip);
|
||||
if (threadIdx.x % 32 == 0)
|
||||
{
|
||||
// The leader of each warp votes.
|
||||
atomicAnd(skip_softmax_vote, uint32_t(skip));
|
||||
}
|
||||
// WG0 uses 0x3 barrier, WG1 uses 0x4 barrier
|
||||
named_barrier_wait(Base::SKIP_SOFTMAX_BARRIER + threadIdx.x / 128, 128);
|
||||
skip = *((uint32_t volatile*) skip_softmax_vote);
|
||||
if (skip)
|
||||
{
|
||||
#ifdef SKIP_SOFTMAX_STAT
|
||||
this->skipped_blocks++;
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Softmax Exp.
|
||||
@ -987,6 +1104,7 @@ struct Softmax<Hopper_qgmma_e4m3_fp32_traits, Kernel_traits>
|
||||
global_max[mi] = max_new;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Update flash attention scales and pack elements for BMM2.
|
||||
|
||||
@ -71,6 +71,8 @@ template <
|
||||
bool ENABLE_BMM1_SOFTCAPPING_SCALE_ = false,
|
||||
// Save softmax stats ?
|
||||
bool RETURN_SOFTMAX_STATS_ = false,
|
||||
// Enable skip softmax attention feature
|
||||
bool ENABLE_SKIP_SOFTMAX_ = false,
|
||||
// The output type (only used by fp8 kernels).
|
||||
typename OutputType = typename Instruction_traits<STEP_Q_, STEP_KV_, 0, false, false>::A_type,
|
||||
// The sage attention block size for Q, K and V
|
||||
@ -290,6 +292,12 @@ struct Kernel_traits
|
||||
USE_CUSTOM_MASK = ATTENTION_MASK_TYPE_ == 3
|
||||
};
|
||||
|
||||
// Are we enabling skip softmax attention feature?
|
||||
enum
|
||||
{
|
||||
ENABLE_SKIP_SOFTMAX = ENABLE_SKIP_SOFTMAX_
|
||||
};
|
||||
|
||||
static_assert(!USE_CUSTOM_MASK || STEP_KV == 64 || STEP_KV == 128 || STEP_KV == 256, "Not implemented!");
|
||||
|
||||
// Apply the exp2f optimization (fuse bmm1_scale and -max into FMAs).
|
||||
@ -384,6 +392,8 @@ struct Kernel_traits
|
||||
// Named barrier ids
|
||||
static constexpr int DMA_SYNC_BARRIER_ID = 0x1;
|
||||
static constexpr int MMA_SYNC_BARRIER_ID = 0x2;
|
||||
// There are 2 warpgroups so 0x3 and 0x4 are used for skip-softmax
|
||||
static constexpr int SKIP_SOFTMAX_BARRIER_ID = 0x3;
|
||||
|
||||
// How many threads get involved in the dma group.
|
||||
enum
|
||||
@ -518,6 +528,10 @@ struct Kernel_traits
|
||||
// Mutex
|
||||
OrderedMutex compute_mutex;
|
||||
|
||||
// 4 warps in a warpgroup vote to an atomic variable in shared memory
|
||||
// to decide whether to skip this STEP_KV. Double-buffered to avoid races between consecutive KV_STEPS.
|
||||
uint32_t skip_softmax_votes[2][NUM_COMPUTE_GROUPS];
|
||||
|
||||
inline __device__ void init(int tid0)
|
||||
{
|
||||
|
||||
@ -580,6 +594,8 @@ template < // The step size in query sequence dimension (M of BMM1 and BMM2).
|
||||
bool ENABLE_BMM1_SOFTCAPPING_SCALE_ = false,
|
||||
// Save softmax stats ?
|
||||
bool RETURN_SOFTMAX_STATS_ = false,
|
||||
// Enable skip softmax attention feature
|
||||
bool ENABLE_SKIP_SOFTMAX_ = false,
|
||||
// The output type (only used by fp8 kernels).
|
||||
typename OutputType = e4m3_t,
|
||||
// The sage attention block size for Q, K and V
|
||||
@ -588,14 +604,15 @@ struct Kernel_traits_Hopper_qgmma_e4m3_fp32
|
||||
: public Kernel_traits<Hopper_qgmma_e4m3_fp32_traits, STEP_Q_, STEP_KV_, D_, DV_, Q_BUFFERS_, KV_BUFFERS_,
|
||||
NUM_COMPUTE_GROUPS_, DMA2COMPUTE_DEPTH_, ATTENTION_MASK_TYPE_, HEADS_INTERLEAVED_, APPLY_ALIBI_,
|
||||
ENABLE_MUTEX_, SCHEDULING_MODE_, INPUT_LAYOUT_, USE_TMA_STORE_, ENABLE_BMM1_SOFTCAPPING_SCALE_,
|
||||
RETURN_SOFTMAX_STATS_, OutputType, SAGE_BLOCK_SIZE_Q_, SAGE_BLOCK_SIZE_K_, SAGE_BLOCK_SIZE_V_>
|
||||
RETURN_SOFTMAX_STATS_, ENABLE_SKIP_SOFTMAX_, OutputType, SAGE_BLOCK_SIZE_Q_, SAGE_BLOCK_SIZE_K_,
|
||||
SAGE_BLOCK_SIZE_V_>
|
||||
{
|
||||
|
||||
// Base class.
|
||||
using Base = Kernel_traits<Hopper_qgmma_e4m3_fp32_traits, STEP_Q_, STEP_KV_, D_, DV_, Q_BUFFERS_, KV_BUFFERS_,
|
||||
NUM_COMPUTE_GROUPS_, DMA2COMPUTE_DEPTH_, ATTENTION_MASK_TYPE_, HEADS_INTERLEAVED_, APPLY_ALIBI_, ENABLE_MUTEX_,
|
||||
SCHEDULING_MODE_, INPUT_LAYOUT_, USE_TMA_STORE_, ENABLE_BMM1_SOFTCAPPING_SCALE_, RETURN_SOFTMAX_STATS_,
|
||||
OutputType, SAGE_BLOCK_SIZE_Q_, SAGE_BLOCK_SIZE_K_, SAGE_BLOCK_SIZE_V_>;
|
||||
ENABLE_SKIP_SOFTMAX_, OutputType, SAGE_BLOCK_SIZE_Q_, SAGE_BLOCK_SIZE_K_, SAGE_BLOCK_SIZE_V_>;
|
||||
|
||||
enum
|
||||
{
|
||||
@ -693,6 +710,10 @@ struct Kernel_traits_Hopper_qgmma_e4m3_fp32
|
||||
// Mutex
|
||||
OrderedMutex compute_mutex;
|
||||
|
||||
// 4 warps in a warpgroup vote to an atomic variable in shared memory
|
||||
// to decide whether to skip this STEP_KV. Double-buffered to avoid races between consecutive STEP_KVs.
|
||||
uint32_t skip_softmax_votes[2][Base::NUM_COMPUTE_GROUPS];
|
||||
|
||||
inline __device__ void init(int tid0)
|
||||
{
|
||||
|
||||
|
||||
@ -276,7 +276,8 @@ static inline void set_params(bert::Fused_multihead_attention_params_v2& params,
|
||||
// scale factors
|
||||
float const scale_bmm1, float const scale_softmax, float const scale_bmm2, float const softcapping_scale_bmm1,
|
||||
// flags
|
||||
bool const use_int8_scale_max, bool const interleaved, bool const is_s_padded, bool const has_alibi)
|
||||
bool const use_int8_scale_max, bool const interleaved, bool const is_s_padded, bool const has_alibi,
|
||||
float const skip_softmax_threshold_scale_factor)
|
||||
{
|
||||
|
||||
memset(¶ms, 0, sizeof(params));
|
||||
@ -421,6 +422,9 @@ static inline void set_params(bert::Fused_multihead_attention_params_v2& params,
|
||||
params.enable_i2f_trick
|
||||
= -double(1 << 22) * double(scale_bmm2) <= -128.f && double(1 << 22) * double(scale_bmm2) >= 127.f;
|
||||
}
|
||||
|
||||
// Skip-softmax attention
|
||||
params.skip_softmax_threshold_scale_factor = skip_softmax_threshold_scale_factor;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -429,7 +433,7 @@ static inline void determine_launch_params(Launch_params& launch_params, Data_ty
|
||||
const size_t d, const Attention_mask_type attention_mask_type, const Attention_input_layout input_layout,
|
||||
bool const interleaved, bool const ignore_b1opt, bool const force_unroll, bool const use_tma,
|
||||
bool const force_non_flash_attention, bool const force_non_warp_specialization,
|
||||
bool const force_non_granular_tiling, bool const force_fp32_acc,
|
||||
bool const force_non_granular_tiling, bool const force_fp32_acc, float const skip_softmax_threshold_scale_factor,
|
||||
// device props
|
||||
const cudaDeviceProp props)
|
||||
{
|
||||
@ -470,6 +474,9 @@ static inline void determine_launch_params(Launch_params& launch_params, Data_ty
|
||||
"are not supported on Ada currently.\n");
|
||||
launch_params.use_granular_tiling = false;
|
||||
}
|
||||
|
||||
// Enable skip softmax attention or not.
|
||||
launch_params.enable_skip_softmax = skip_softmax_threshold_scale_factor > 0.f;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -589,6 +596,9 @@ int main(int argc, char** argv)
|
||||
// Use attention sinks (added to the denominator of softmax)
|
||||
bool use_attention_sinks = false;
|
||||
|
||||
// Skip-softmax attention
|
||||
float skip_softmax_threshold_scale_factor = 0;
|
||||
|
||||
// Read the parameters from the command-line.
|
||||
for (int ii = 1; ii < argc; ++ii)
|
||||
{
|
||||
@ -885,6 +895,10 @@ int main(int argc, char** argv)
|
||||
{
|
||||
use_attention_sinks = true;
|
||||
}
|
||||
else if (!strcmp(argv[ii], "-skip-softmax-threshold-scale-factor") && ++ii < argc)
|
||||
{
|
||||
skip_softmax_threshold_scale_factor = strtof(argv[ii], nullptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "Unrecognized option: %s. Aborting!\n", argv[ii]);
|
||||
@ -1057,7 +1071,7 @@ int main(int argc, char** argv)
|
||||
Launch_params launch_params;
|
||||
determine_launch_params(launch_params, data_type, sm, s, d, attention_mask_type, input_layout, interleaved,
|
||||
ignore_b1opt, force_unroll, use_tma, force_non_flash_attention, force_non_warp_specialization,
|
||||
force_non_granular_tiling, force_fp32_acc, props);
|
||||
force_non_granular_tiling, force_fp32_acc, skip_softmax_threshold_scale_factor, props);
|
||||
|
||||
// The Q, K and V matrices are packed into one big matrix of size S x B x H x 3 x D.
|
||||
const size_t qkv_size = s * b * h * (2 * d + dv);
|
||||
@ -1713,7 +1727,13 @@ int main(int argc, char** argv)
|
||||
tokens_per_block, qkv_d_view, q_d, k_d, v_d, contiguous_kv_d, kv_cache_pool_ptr, kv_cache_block_offsets_d,
|
||||
packed_mask_d, cu_mask_rows_d, attention_sinks_d, cu_seqlens_d, cu_q_seqlens_d, o_d_view, p_d, s_d,
|
||||
softmax_stats_ptr, scale_bmm2_d, scale_bmm1, scale_softmax, scale_bmm2, softcapping_scale_bmm1,
|
||||
use_int8_scale_max, interleaved, is_s_padded, has_alibi);
|
||||
use_int8_scale_max, interleaved, is_s_padded, has_alibi, skip_softmax_threshold_scale_factor);
|
||||
#ifdef SKIP_SOFTMAX_STAT
|
||||
FMHA_CHECK_CUDA(cudaMalloc(¶ms_v2.skip_softmax_total_blocks, sizeof(uint32_t)));
|
||||
FMHA_CHECK_CUDA(cudaMalloc(¶ms_v2.skip_softmax_skipped_blocks, sizeof(uint32_t)));
|
||||
FMHA_CHECK_CUDA(cudaMemset(params_v2.skip_softmax_total_blocks, 0, sizeof(uint32_t)));
|
||||
FMHA_CHECK_CUDA(cudaMemset(params_v2.skip_softmax_skipped_blocks, 0, sizeof(uint32_t)));
|
||||
#endif
|
||||
|
||||
// total number of tokens is needed to set TMA desc on the host.
|
||||
launch_params.total_q_seqlen = q_seqlens[b];
|
||||
@ -2101,6 +2121,18 @@ int main(int argc, char** argv)
|
||||
non_fused_elapsed / fused_elapsed, total_flops / (fused_elapsed / float(runs) / 1e-9),
|
||||
total_bytes / (fused_elapsed / float(runs) / 1e-6));
|
||||
}
|
||||
#ifdef SKIP_SOFTMAX_STAT
|
||||
if (skip_softmax_threshold_scale_factor > 0)
|
||||
{
|
||||
uint32_t total_blocks, skipped_blocks;
|
||||
FMHA_CHECK_CUDA(
|
||||
cudaMemcpy(&total_blocks, params_v2.skip_softmax_total_blocks, sizeof(uint32_t), cudaMemcpyDeviceToHost));
|
||||
FMHA_CHECK_CUDA(cudaMemcpy(
|
||||
&skipped_blocks, params_v2.skip_softmax_skipped_blocks, sizeof(uint32_t), cudaMemcpyDeviceToHost));
|
||||
printf("Skip-Softmax .: %u / %u = %.2f%%\n", skipped_blocks, total_blocks,
|
||||
total_blocks ? 100.f * skipped_blocks / total_blocks : 0.f);
|
||||
}
|
||||
#endif
|
||||
#if defined(DEBUG_HAS_PRINT_BUFFER)
|
||||
FMHA_CHECK_CUDA(cuda_memcpy_d2h(print_buffer.data(), params.print_ptr, print_buffer.size(), DATA_TYPE_FP32));
|
||||
|
||||
@ -2141,6 +2173,11 @@ int main(int argc, char** argv)
|
||||
FMHA_CHECK_CUDA(cudaFree(kv_cache_block_offsets_d));
|
||||
FMHA_CHECK_CUDA(cudaFree(contiguous_kv_d));
|
||||
FMHA_CHECK_CUDA(cudaFree(softmax_stats_d));
|
||||
FMHA_CHECK_CUDA(cudaFree(attention_sinks_d));
|
||||
#ifdef SKIP_SOFTMAX_STAT
|
||||
FMHA_CHECK_CUDA(cudaFree(params_v2.skip_softmax_total_blocks));
|
||||
FMHA_CHECK_CUDA(cudaFree(params_v2.skip_softmax_skipped_blocks));
|
||||
#endif
|
||||
|
||||
free(qkv_h);
|
||||
free(mask_h);
|
||||
|
||||
@ -283,6 +283,16 @@ struct Fused_multihead_attention_params_v2 : Fused_multihead_attention_params_ba
|
||||
float* scales;
|
||||
} q, k, v;
|
||||
} sage;
|
||||
|
||||
// Skip softmax when exp(local_max - global_max) < skip_softmax_threshold_scale_factor / seqlen.
|
||||
// A positive value means skip-softmax is enabled.
|
||||
float skip_softmax_threshold_scale_factor = 0;
|
||||
|
||||
#ifdef SKIP_SOFTMAX_STAT
|
||||
// Statistics of skip-softmax, pointers of device memory for output
|
||||
uint32_t* skip_softmax_total_blocks;
|
||||
uint32_t* skip_softmax_skipped_blocks;
|
||||
#endif
|
||||
};
|
||||
|
||||
#endif
|
||||
@ -322,6 +332,8 @@ struct Fused_multihead_attention_launch_params
|
||||
// harward properties to determine how to launch blocks
|
||||
int multi_processor_count = 0;
|
||||
int device_l2_cache_size = 0;
|
||||
// skip softmax attention
|
||||
bool enable_skip_softmax = false;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
@ -177,4 +177,13 @@ struct Fused_multihead_attention_params_v2
|
||||
float* scales;
|
||||
} q, k, v;
|
||||
} sage;
|
||||
|
||||
// Skip softmax when exp(local_max - global_max) < skip_softmax_threshold_scale_factor / seqlen.
|
||||
// A positive value means skip-softmax is enabled.
|
||||
float skip_softmax_threshold_scale_factor = 0;
|
||||
#ifdef SKIP_SOFTMAX_STAT
|
||||
// Statistics of skip-softmax, pointers of device memory for output
|
||||
uint32_t* skip_softmax_total_blocks;
|
||||
uint32_t* skip_softmax_skipped_blocks;
|
||||
#endif
|
||||
};
|
||||
|
||||
@ -296,7 +296,8 @@ bool AttentionOp::convertMMHAParamsToXQAParams(tensorrt_llm::kernels::XQAParams&
|
||||
// Parameters for sparse attention
|
||||
xqaParams.sparse_params = mRuntimeSparseAttentionParams;
|
||||
xqaParams.use_sparse_attention = useTllmGenSparseAttention();
|
||||
|
||||
// Skip softmax threshold.
|
||||
xqaParams.skip_softmax_threshold_scale_factor = mSkipSoftmaxThresholdScaleFactorDecode;
|
||||
// Cross attention parameters.
|
||||
xqaParams.encoder_input_lengths = generationsParams.encoder_input_lengths;
|
||||
|
||||
@ -1313,6 +1314,8 @@ int AttentionOp::mlaGeneration(
|
||||
fmhaParams.sparse_params = mRuntimeSparseAttentionParams;
|
||||
}
|
||||
|
||||
// MLA does not support skip-softmax attention right now
|
||||
|
||||
// Run the fmha kernel
|
||||
mDecoderFMHARunner->run(fmhaParams);
|
||||
}
|
||||
@ -1885,6 +1888,18 @@ int AttentionOp::enqueueContext(EnqueueContextParams<T> const& params, cudaStrea
|
||||
fmhaParams.sparse_params = mRuntimeSparseAttentionParams;
|
||||
}
|
||||
|
||||
// Skip-softmax attention parameters
|
||||
fmhaParams.skipSoftmaxThresholdScaleFactor = mSkipSoftmaxThresholdScaleFactorPrefill;
|
||||
#ifdef SKIP_SOFTMAX_STAT
|
||||
fmhaParams.skipSoftmaxTotalBlocks = mSkipSoftmaxTotalBlocks;
|
||||
fmhaParams.skipSoftmaxSkippedBlocks = mSkipSoftmaxSkippedBlocks;
|
||||
#else
|
||||
if (tensorrt_llm::common::getEnvPrintSkipSoftmaxStat())
|
||||
{
|
||||
TLLM_THROW("To print skip softmax stat, please run build_wheel.py with -DSKIP_SOFTMAX_STAT");
|
||||
}
|
||||
#endif
|
||||
|
||||
if (mAttentionChunkSize)
|
||||
{
|
||||
fmhaParams.chunkedAttentionSize = *mAttentionChunkSize;
|
||||
|
||||
@ -494,6 +494,14 @@ public:
|
||||
// See [Chunked Attention] in _torch/modules/attention.py
|
||||
std::optional<int64_t> mAttentionChunkSize = std::nullopt;
|
||||
|
||||
// Skip softmax threshold scale factor.
|
||||
float mSkipSoftmaxThresholdScaleFactorPrefill = 0;
|
||||
float mSkipSoftmaxThresholdScaleFactorDecode = 0;
|
||||
#ifdef SKIP_SOFTMAX_STAT
|
||||
uint32_t* mSkipSoftmaxTotalBlocks;
|
||||
uint32_t* mSkipSoftmaxSkippedBlocks;
|
||||
#endif
|
||||
|
||||
[[nodiscard]] auto data() const
|
||||
{
|
||||
return std::make_tuple(mLayerIdx, mNumHeads, mVisionStart, mVisionLength, mNumKVHeads, mHeadSize,
|
||||
@ -510,7 +518,8 @@ public:
|
||||
mMLAParams.data(), mCpSize, mCpRank, mCpGroup, mNumAttnHeads, mNumAttnKVHeads, mNumKVHeadsOrigin,
|
||||
mAttnTpSize, mAttnTpRank, mAttnCpSize, mAttnCpRank, mUlyssesMQABroadcast, mEnableContextFMHA,
|
||||
mFMHAForceFP32Acc, mMultiBlockMode, mEnableXQA, mUseKVCache, mSkipAttn, mFuseFp4Quant,
|
||||
mNbMultiBlockSemaphores, mAttentionChunkSize.value_or(-1));
|
||||
mNbMultiBlockSemaphores, mAttentionChunkSize.value_or(-1), mSkipSoftmaxThresholdScaleFactorPrefill,
|
||||
mSkipSoftmaxThresholdScaleFactorDecode);
|
||||
};
|
||||
|
||||
private:
|
||||
|
||||
@ -554,6 +554,11 @@ bool getEnvEplbForceGdrcopy()
|
||||
return getBoolEnv("TRTLLM_EPLB_FORCE_GDRCOPY");
|
||||
}
|
||||
|
||||
bool getEnvPrintSkipSoftmaxStat()
|
||||
{
|
||||
return getBoolEnv("TRTLLM_PRINT_SKIP_SOFTMAX_STAT");
|
||||
}
|
||||
|
||||
} // namespace common
|
||||
|
||||
TRTLLM_NAMESPACE_END
|
||||
|
||||
@ -161,6 +161,8 @@ bool getEnvKVCacheTransferAllBlocksForWindow();
|
||||
|
||||
bool getEnvEplbForceGdrcopy();
|
||||
|
||||
bool getEnvPrintSkipSoftmaxStat();
|
||||
|
||||
} // namespace common
|
||||
|
||||
TRTLLM_NAMESPACE_END
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:f6509dd36fb92554c6078595951a8de698d7bdaa07b9b817bfcdd255d4303bca
|
||||
size 687070
|
||||
oid sha256:4f1f3679968b8f6dea77f53534af9eb1348b6f476d4c3880833b41dd4cc9c803
|
||||
size 687860
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b22d606e19b52047ae67319d61f138562f2b81df08ccde3f8fa04f040d408d7a
|
||||
size 669688
|
||||
oid sha256:a0d7061b400ab387309af00ae12f7a840b5abb91757183f415ca18329bbdb358
|
||||
size 670478
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:2a70e335677a1b0f9d98267fe7701735e42f105720403489276d48a4247ea1b5
|
||||
size 423835
|
||||
oid sha256:4a91ff0238b0c8f1d40f8441f22a60a2c64d344b8550de68737292ff449d1d7e
|
||||
size 426203
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:8289200bf78517033295966e9dbdf5c647da9aa7089669ff473ba436fef6a798
|
||||
size 1230152
|
||||
oid sha256:4d094c39dbdd372166facb297a4a91be80fb231bf3cca89afa97e61cc725f67e
|
||||
size 1228572
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:97cc5f8d42d92332a92fa216847bbacccc7ef9f9d5208bd26585cd702d03fe57
|
||||
size 1725040
|
||||
oid sha256:1fe830d32459fd9a25d54e1d00a98720afd938d9e9042e2b5903f969e991d72d
|
||||
size 1721882
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:1264927817c08da144e387a7258f6c6fe424c0ff159f3ab0d6ffa3c4e3947598
|
||||
size 375671
|
||||
oid sha256:09af1ef9197c628c4a31cc58276ee6dcfad03f751069a78b5242594f93ea8c97
|
||||
size 378039
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:950fb45e94ffc8e2ec9f5a4b682075be55cb85d6415b3eeb172ce2cf7d53220d
|
||||
size 1140954
|
||||
oid sha256:9e93bb514c30bc5a4cda8f402a386ab85d079f9b97aeff04788cf3c8a8cc87a6
|
||||
size 1137008
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:ba97e1bf342788eaf74a78f542f870d3967214aed98b98600fae772aad5bad5f
|
||||
size 653960
|
||||
oid sha256:0dc47824dfc41004c5b243ce9f40eefeee15c69b88474e33ec13137ef56604e8
|
||||
size 651592
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:337cc83d1880b1496e2f054285472b693c181e081819f425ddf2ea45a5dfe9f4
|
||||
size 1130682
|
||||
oid sha256:c0f042eabb29ee9db7ddf9791840337a7544653b295e4b2a5068b7f80bcd8251
|
||||
size 1128314
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:859ffffa18f1c9c8068a1cfedec487c2e0eab84af2c3720eaa7bb2a044ea16f6
|
||||
size 1534006
|
||||
oid sha256:7a9d887dd0acea6d82a25e0dda908f4c5421eaa1ddbfeeb49d382c079156d67e
|
||||
size 1535586
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:02bc55faacb50d0501c590ed11b40d802b374618cbde58db725cc67495762064
|
||||
size 698136
|
||||
oid sha256:22a7eaab8e44194acd83621e5546f164ad9cbeda8b67867f864a235036a03931
|
||||
size 690242
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:510d6c9942dea4bef976c2307fc63f1d7341d78ad8b41cca3bf80bae0a377575
|
||||
size 380847
|
||||
oid sha256:e22fe2dde7f5542975db7517b37cdce0eaa656fed2bc58378b37a872c54a43ef
|
||||
size 374533
|
||||
|
||||
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d0e0d34e15f533f756ac4ad6ef8889e5ed7556d859b6263509f608f2e7194e0a
|
||||
size 964134
|
||||
@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:6fd7941b92a10c3116b3d93b50ce94d90627ed020e1aa4263b2c46926db60250
|
||||
size 1008328
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:04439f4bdd5bf15dce0d59e455545236ed5b98c963a9b491c40d473eb766a04f
|
||||
size 988580
|
||||
oid sha256:ec624d7dceea5234b9dd4e43125f271e46ed4f2a4118837a23e00eb89571dcb2
|
||||
size 985422
|
||||
|
||||
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:46413d67059a15237e0f7f26b4d75c1965d135c4b28a1effe3b6f40a51dbe543
|
||||
size 606983
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:0c526229c1eea9eec08dd2c4a6d9f2052e54d6ece9f4fdf0b9a73f371e72ae36
|
||||
size 614063
|
||||
oid sha256:d33f3798292038d22d4e61732da397b3466a8983892fcc14448f63e5705d2dd0
|
||||
size 629062
|
||||
|
||||
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:2d07d4142403fc5d3004e6831b12f1cf3236c397e61448cbe49e7c7e47a5aef4
|
||||
size 2482034
|
||||
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:26232545325ecf363f12b49db62c92a1294dc155ea22cb6e6593fc920b734aec
|
||||
size 1862432
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:ba18343579abe3aee6e75545c8ec25a244d24864ff69c23146ee2a13b5eccdd4
|
||||
size 1916872
|
||||
oid sha256:41df1bdb745c0efd7961c44dbcd30a1bad202103d301ca785b5b7cdef3cd0ce9
|
||||
size 1882140
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:0e6ba601df471fff8f5beb1bdb9af6b8f41f57903ee762bb042b023917882d95
|
||||
size 2608304
|
||||
oid sha256:053ddc81e3885a583adb9bfbfea6a263f023638a2162430dc62faeba1b101d37
|
||||
size 2527002
|
||||
|
||||
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:25f59e66bbafb18273bf7fc34eade730ef12e805e59abb6ef345b6d8574b8eb8
|
||||
size 565135
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:91906c0234a97455f5733ec6e359d61e9b9a0481aa72fd5eec72ae5cc04b8d22
|
||||
size 571425
|
||||
oid sha256:2194a3863b3dd880c0788260e6368d8453050e7c02e7055eb8d9b87f4ce32429
|
||||
size 588001
|
||||
|
||||
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:19a154061aa471d1ef60d6a6e6cc32afe7d5fc9e0856059c22220d333f855318
|
||||
size 2291002
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:6233042269b80360ec9b76dff1204188338c85d4c647c234df21405138e8e584
|
||||
size 704076
|
||||
oid sha256:3fbf61a84913ea7865981c9d2df49a2c4db4aff6959e0864ba619878af8894dd
|
||||
size 641720
|
||||
|
||||
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:73c371164cb89be22699cfc3973d6b3bc03a892fed504f74e89f17b7130deb12
|
||||
size 1765330
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:ee37ada8d3e1d32b5b7227008e29a73e1b2e2dcfcd9d63a25f818a607445d4ca
|
||||
size 1798458
|
||||
oid sha256:17b06132679a9db8eb555012bfb53fe941ea092126af235837deff4848b3b13b
|
||||
size 1786618
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:1c69925c289bbda6bb7579eb6c84d1432612a96485ee97bdc04dcbba035c93da
|
||||
size 2342284
|
||||
oid sha256:f2ffd14c273aeb544cf055e6b984f25b03116eb30d067c98bf00af306ec55962
|
||||
size 2335970
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:0f310dc88b134cfee3c3ef703bb764c175bfeacbef3845ad8e75fbf3bbe9d75c
|
||||
size 604267
|
||||
oid sha256:0bb606a262a25c8cdb18ee9beff02931a133ebebe7777600479241d291825b9e
|
||||
size 602689
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:547ad9a31f84c26651688c8911e566c9a05ac88283de8d54c8031017a4f51105
|
||||
size 917634
|
||||
oid sha256:90c07881943263544ffc233453b9b5613351e07fdef3dd21bb893134fecc304f
|
||||
size 916844
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:05b540e55a0124ab1599f69dae20172b17ef20688a24edc8db841f90a1952e8f
|
||||
size 1384932
|
||||
oid sha256:b6d7ee26961634f6a7b62d8adae6c97927e85d9fbc8182ef0b1d59ee9d5e2cfb
|
||||
size 1378616
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:3214745c4089e516ff3e8d5584773b2edb697151f3872d3a0fb7def131ccb48e
|
||||
size 1432292
|
||||
oid sha256:962dfabab27f08bbc8be8e9b8ad3e91f56917a66ef96f7fab4d16f6e1959ed4a
|
||||
size 1426766
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:105d01d606d4e52b8646cedba3de42be7edb4936326d5bf803fc31f687da060f
|
||||
size 1432292
|
||||
oid sha256:951fba3609c416eb6f510801cea84799fe0cfcaa1eff2e2fc2819a0f3baf27ef
|
||||
size 1426766
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:9d4f00e3d3d9da69f8dab35b2ccaae523ae3776d5f0a9dc9847ec9a913001aa6
|
||||
size 1976932
|
||||
oid sha256:462a75cf4729835b28cd2ed018f7f8efbf7483e2b3e17c2212f126ee923de7dd
|
||||
size 1971406
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:7ef6b870160c17dd50a94e854ff3e80dea49a4d00e00cd8afb44fea987971f42
|
||||
size 1395968
|
||||
oid sha256:112ccd86150a8a1c3089ebd51f936acebd9ed43f43a98f6cf64b84aaab880b84
|
||||
size 1392810
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c29f502fd3045dde0fed26bfdfd02d982088c45adbf35d256192f5a3acd4b745
|
||||
size 1417280
|
||||
oid sha256:627d836744c606dc8fe38739714c866df81d6fa5bb24be5c1492198ce2980f54
|
||||
size 1414122
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:a4e4efc619f4f40ed4545d93fef155ad1e91b78278dac39a93ac57e6232d8854
|
||||
size 1417280
|
||||
oid sha256:234eb49ca0767ba2eb21328a056aae01d58924c34f3a88ae65f0ca0eb7f5a63d
|
||||
size 1414122
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:9f5f6e59a6ea43f83e1ee80fe304ee0b2315b149b91e61b1f959d91bf1d0da0c
|
||||
size 1954816
|
||||
oid sha256:d44e58432a6a36e099da6186eac14cc458717a99643467176217f8dc23b677cd
|
||||
size 1950080
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:afe2b2f66132680635127950d28848efdc6f9d09760ddaa747773549eca03c4e
|
||||
oid sha256:6da2ec5063988ec246e767476637010ec3721b69afe9df9c7b1aae679a5ee64f
|
||||
size 309055
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c5bc3108c268b2d7de40a2924c6211180ff7b70b0244d21fa94d0d482a0ca1a3
|
||||
size 296423
|
||||
oid sha256:0b1bbefa7f2d6966d5a12474cc98970054dc1070198504d9cfb23e67aa93cbdd
|
||||
size 295635
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:bf15a32bbbba2a60f79935da04afd0a6e30a842a82834d8a1dc5065743514aae
|
||||
oid sha256:fcf4fa1a5f6471aee9e193ef4a20cd2d8ab707b270b909da55c12af7151cd4da
|
||||
size 504821
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:87b5a8853cc0e07726e781f22efd704d3855add2d9af71bd40566fbb4d5d81b1
|
||||
size 676898
|
||||
oid sha256:650e03222f3cf2bd313b076e4ea6ed8d6cda4aa65f937e425919d6e370e05776
|
||||
size 678476
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:8c5d711863c33b007d0771d82cc64fa3e067d375cf07b0b3c2a35ab628863335
|
||||
size 713996
|
||||
oid sha256:d0152f6ba5ce04b2daa7672ec379ef7e17c652865b8b5d3b07037f3cc9da2578
|
||||
size 715574
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:2793d457837759f626cb80f35524725896b4f627443029259b201a9b56596839
|
||||
oid sha256:5e5c9dd60b38610f5bb2be8242bcf4d8a83ff07cacad2ad5840bb1c39d8ce5de
|
||||
size 756644
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:8cf91a1a28b8a82be78030364d508c1b5020f98a29a04a7b36625cd0a5e110e3
|
||||
size 758198
|
||||
oid sha256:26636e4e4faa0ead121a48d28ed92ed8fe596cbb33c2475e059df25213994ad4
|
||||
size 754252
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:43198c508411ad462f51d7305c3c486cb89e910e6395744b6ee2fc0f18033eba
|
||||
size 957900
|
||||
oid sha256:7cb58307f7d64ff48883a6aa8d080dc407b41854e0674186113b1c2267b4a09a
|
||||
size 957110
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:24bf19e4329af834296cd06ba38386e072275f431d77282866c4a869c93e28c2
|
||||
size 498505
|
||||
oid sha256:6a5b811c7fad2ca3e96109d1eb3f0a616972ac1e515029a6b0ccdfc2da49b86f
|
||||
size 496137
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:a7189aad0d5f9074d819978392ab5189dda6eec7d8357d1aac6dd6dd13db1fe3
|
||||
size 498505
|
||||
oid sha256:5a1660e00015f318ecbd4a94b152df85350501c5ff2b785bf853a54341ff3be0
|
||||
size 496137
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:95019b3b08b2a1edf7a6060fbd4374be3bdb9761403d508d5501ad04d7ec4948
|
||||
size 508767
|
||||
oid sha256:3e24f0a97fc1e34502bef25a6e352850a56acf352c5c91f2c9143817b91480ef
|
||||
size 505609
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:02cd29bc83b5bbc1dba474f9291bf7703489265bb95a7e3c15ef3250fc560727
|
||||
size 504807
|
||||
oid sha256:2e9c07ad0ed4e3dc628a0f869e8fb251c7d888bc1f2293813a490d98cac3de66
|
||||
size 503229
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:fa854e128ad1030a6111c558882ec825cf3f2cec3dd5478efc7af7f6c7def6b2
|
||||
size 186759
|
||||
oid sha256:85a6f1698c9121d3d46a655196426f685eff59fcafd1ce7a0726c9c534e4c185
|
||||
size 185969
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e92e5a6878df5e5f8d599498dd161222e51e904fa92ce00bfb83573dc3267f4e
|
||||
size 186759
|
||||
oid sha256:b3ba5d378487514688d8243c233304bfc474e2605eee39ba041f2d66bbee9cac
|
||||
size 185969
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:0feae28ea044d43628c20de7546f27ea63b86ce401e34c5d0b83b5ee55b2179d
|
||||
size 671358
|
||||
oid sha256:b71a96899d5f82ef3d739a038786796c21e730336aa903f2a0b349debd98582e
|
||||
size 670568
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:3ee51f64ca9f4aa9ce726813ff714dbf02f41b67238c5a0079be2b81938ea470
|
||||
size 681620
|
||||
oid sha256:f898fc6ec92441e61de5a80e7efda5d700112a9753f9b7b79ceee5e505241725
|
||||
size 672936
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:f197cb5fb0cbe8ae3830b60f1b5caf29c9a51030c18ec4433cd91522302d6f3d
|
||||
size 726636
|
||||
oid sha256:812e90f56446d8595de0189638db259aa5d59c7b2110a5e7317209af578f4811
|
||||
size 728214
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e53abec3bc15e7a401ef2f33d4cc603a16de09310b84a21f0da29b97635cec36
|
||||
size 728190
|
||||
oid sha256:b0f2cffd832b0c8186b490dc6673f4fe959e23cc5d42a4e47b07844295c4ac3d
|
||||
size 725822
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:43c7eed764f0558af9142e804dda4a3e3e6fe2e32ef12648c838e70d4f16a8e5
|
||||
size 942888
|
||||
oid sha256:53ca2064f14a0739bf86cce9f2675417dc3f3344eaa7f3224873f28a5b07bd9f
|
||||
size 936574
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:a34940bb17fb8b47898fcffe4647b89d1cb1cc0db2c6daa707b41e3c27d77974
|
||||
size 642940
|
||||
oid sha256:73ebcc6bb3182ba0f3fdc6cdeeb30ffa5899b55ac51a90e30ce82b03e4c4e383
|
||||
size 619260
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:95b4a97f34e27a549c0e45b5fcd4f70ea886528485d3f81399a702d6efa4af7b
|
||||
size 164655
|
||||
oid sha256:d03230a130470ae336de35f8406b18d3e5ac8f67d89b06f250deb296478582aa
|
||||
size 171759
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:6ab7a8f5ecf3eff80160a50c400b2aaaa770dcdcb8f0d0ba1f6dd4907882954e
|
||||
size 164655
|
||||
oid sha256:74d5ad7353a2bdc7b3b9f78e12dcbbe6d1f763fcede6dd67859ee685bacf456d
|
||||
size 171759
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:a800789202ce7b2b45f2c5fb802757736d1826ca1806776cb52104dd57594a15
|
||||
size 642940
|
||||
oid sha256:b149f7e48a1b929559b949ba13a63f32c1e79a1b46d4c22525ea4f8a78f83dd0
|
||||
size 619260
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:842bc25f77747de3d2dc62188106809fe661c034ba34325dae56f26ab17c5e77
|
||||
size 500069
|
||||
oid sha256:f1ca65530dd837ec88348a09517ba942572ba6da39d10f98ced2670944aad7a8
|
||||
size 489019
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:fda4fe88bc5228428f8e8e0a2f0dd77efe80fef8c37f123c11eb46cc83ebc2e5
|
||||
size 715486
|
||||
oid sha256:15824f4d720f5fdaab3dc4428d236e6a1d117da368252588c40df39991fef589
|
||||
size 720222
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:2df4194edb6df203d67fd984749e48467f94e4ef5fd1e8f70b652cd571ce411a
|
||||
size 700472
|
||||
oid sha256:e4b3c0421958a2765e073ee6bb892271cc3386915b08a9e9f7faa83d0c0da2d2
|
||||
size 698894
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:3fd8099f7f2044b092a7c8d333ac24f6c7bfc5f68941a6d9567f61ee42b2a1af
|
||||
size 456987
|
||||
oid sha256:1dbabdbf3f6dc0cbb4a0b1beb88997c33dd8342a3d5c8aa21bdcdb234431efe1
|
||||
size 458565
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:31e17e46754dababe6432e5b39480a467649804d3bbaadf21e17f2ad4be92338
|
||||
size 1249946
|
||||
oid sha256:f10e2e699e969f1246ec7255addee2d3af254183a02994ede81917b7d4236447
|
||||
size 1252314
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:643db350b63b6a0b61c7a539e764d9ef3318ca87b029564d01dfbd6795cec37d
|
||||
size 1265672
|
||||
oid sha256:fc636fc9f49244335817c0863c7d917c50be20ed6c477bb4cd415229239614ad
|
||||
size 1248306
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d7e1a558e10eb20d43c421dde8608f750c984fde61df7f3b40d4b783c9ca65f4
|
||||
size 1800816
|
||||
oid sha256:f265c9ef95df4aeab96a1030d057ec6d28cb1d62a64e5dfadb1d2a0b041f3b8f
|
||||
size 1790554
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:82eda655528a11824c39dd4a8383230a43c5010b3b07de66a189cf757de0f50b
|
||||
size 416717
|
||||
oid sha256:5a966bc2bfeadeb3c98fabd16f2e4fd873438b163a2444d2c408481ebd9061f7
|
||||
size 418295
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:411f56f27f634640f8be0601dab59bc4f377d639653b0a0434d68bd0209f1252
|
||||
size 1116484
|
||||
oid sha256:dbfdd2e41aed8319810cb0b11eae510365f8193bb8efc3c6eb432face74c61df
|
||||
size 1117274
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:2fad29994b66ce57ac2ca5d5a8d7c34f79b5c9836f10c46f05c96bb4323dcf6c
|
||||
size 640540
|
||||
oid sha256:26a4948d90b87a212155feaf9107595717b18e02f76b0a17eaae7da1198d60c5
|
||||
size 641330
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:77d5db630efc5423aed2d68a4ed9b17c0ec008d9aafbe1dc40a1d40dc9d6d1d8
|
||||
size 1168570
|
||||
oid sha256:fda34fb9af0c3da4a27afe65cbabaedc20d7397641d7ecf742bbbec6a765d5de
|
||||
size 1179620
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c4ef5092061cccedf9161c46391af469fbf2f539b3b0ac33abb4107ae937e85e
|
||||
size 1631884
|
||||
oid sha256:d1d684b7f0b1c68fcf8117c5326fca8a50a7f6e6b2228905f4b0e7a7d35f79a3
|
||||
size 1646882
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:6cbb1e5e5ad3d96c3cd1c252bddcfb63ded6b176944b220d219c21337e4dc503
|
||||
size 618414
|
||||
oid sha256:8871bb4a42aa388a81a2715d1f7f9468b763da17d6034692d2c788b92665585e
|
||||
size 614465
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:ce99295c7cf1c2f5f083ce5c89cd1f3bc9413f4d312ba19aa1a3a4cf7ab34500
|
||||
size 338223
|
||||
oid sha256:97aadedec04a46bc71c549003416d2f547f4e50fdf3828c73ab8536563aca99a
|
||||
size 336645
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:23581eecd2559333655f5fa591debf45f206e4886e7e01974ca6948b24f49e6d
|
||||
size 688658
|
||||
oid sha256:37a95b7847ba2b77c8ccef3051ce7cf27216a47d6541074230c76176bf2e6084
|
||||
size 689448
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:85d585b303ab8f2d87fdcb4fde7a60785a023ae0b5059c2625a964d8ca3198f1
|
||||
oid sha256:2e4788c3c3fb03defe163e31675eb4fd965ef7dc665af3ee9aa73009dd9f9dde
|
||||
size 672066
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:aeec57989671707758378702be6f934669173b6a6cd572de050129fe52ad00d6
|
||||
size 424633
|
||||
oid sha256:4b7b96c1f392966bbaa090f54df017a41852e25d76930847643606bf91ee4ad7
|
||||
size 427001
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b93c716b918a663df3399b7ffd05b77d2dc6bb0ebc5d5722ef46d9ae198d95c4
|
||||
size 1231740
|
||||
oid sha256:385e79fbc4b57a29c809fae1185217ddbcc657cc9ff23fc57ff35034668f2274
|
||||
size 1230162
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:72b5d9098bb48ab12e149a07a969693b6d1bed5c21267c9a6146427b7adf37b6
|
||||
size 1727418
|
||||
oid sha256:934396e10b06250e6bf436e2f47747f4038f98fe084792e05edb5923f2e34e01
|
||||
size 1724260
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:91fd342608928df24da3ff3acb4e498a1e51d110c53c9d7257c470c151a27b54
|
||||
size 377259
|
||||
oid sha256:1b164663219eda6704b876c10b9794ffe473b76a9e69efc556a91fa86012067d
|
||||
size 379627
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:0b01f483fafdc17b16c664a939367a71f59883a2886f015ad969c194c1cc4a29
|
||||
size 1142542
|
||||
oid sha256:3044f3dca75e5992142ea2bf43f27736212452e449a9480c6f6207c99533b103
|
||||
size 1138596
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:9a043dada38b23b1abddac2b437fad372ee1f34f5104e0ddb2e7028772cddde0
|
||||
size 654758
|
||||
oid sha256:a807e0ae3448e63fa7c37f6de16e10b488c59399faf0e2dd9584f1a78149b983
|
||||
size 652390
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:309f0fa8fd451c63c731ea1808a9a008656a6fbb0c461b9b040adcdf2f646c6f
|
||||
size 1132270
|
||||
oid sha256:0f5bd71775f0189439f2f0e00fdeed773b46b544d520805d24a5b2a781355efa
|
||||
size 1129902
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:174be71c01c900552f2d4a54604012806db84e946f92e6f671a0dd3fd7d5df0d
|
||||
size 1536384
|
||||
oid sha256:af61c7d59a3c590488fa42805536f50a36454b73decb626e8ac7958993d60c0f
|
||||
size 1537964
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:92a0b66977279106585c101e883818375f99246c5df4d4d5ca1d7be3c1efc094
|
||||
size 700514
|
||||
oid sha256:f58e340db03cdbc494325003bb34a9efffff5d1b2d6f57dcc8948de51b240420
|
||||
size 691832
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:dad21f7e77e13e768419d1674bd986ba914d4f765e809606061b5343d604f42c
|
||||
size 381647
|
||||
oid sha256:3436f09614ae18411e04a17cb93933a3ee41439a897bd2999dd8efbfc34c466d
|
||||
size 375331
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user