mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
[TRTLLM-6674][feat] (Breaking Change) Hopper SWA non-cyclic kernels + KV reuse + Spec Dec (#6379)
Signed-off-by: Haohang Huang <31998628+symphonylyh@users.noreply.github.com> Signed-off-by: symphonylyh <31998628+symphonylyh@users.noreply.github.com>
This commit is contained in:
parent
4d040b50b7
commit
c9eebcb454
@ -796,7 +796,6 @@ struct Gmem_tile_contiguous_kv
|
||||
template <typename Smem_tile>
|
||||
inline __device__ void load(Smem_tile& smem_tile)
|
||||
{
|
||||
// TODO(perkzz): add remap_kv_row for sliding window attention.
|
||||
uint32_t preds[LDGS];
|
||||
#pragma unroll
|
||||
for (int ii = 0; ii < LDGS; ++ii)
|
||||
@ -1091,42 +1090,6 @@ struct Gmem_tile_paged_kv
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Remap the row to the one in cyclic kv cache.
|
||||
inline __device__ void remap_kv_row(int& row)
|
||||
{
|
||||
// Sliding window attention + chunked context needs special handling.
|
||||
if constexpr (SLIDING_WINDOW_ATTENTION)
|
||||
{
|
||||
// For chunked context (i.e. separate q and kv layout), the kv cache might be overwritten
|
||||
// after last chunk is processed.
|
||||
// To deal with this issue, the new tokens' kv will be appended to the kv cache first, and
|
||||
// overwrite the kv cache after FMHA is done.
|
||||
// The kv input layout is like: [cyclic kv cache] + [new tokens' kv].
|
||||
// There are two possible cases:
|
||||
// 1. The kv cache hasn't been overwritten while processing previous chunks, so we can take
|
||||
// it normally, where we have full kv cache.
|
||||
// 2. The kv cache has been overwritten while processing previous chunks. we need to mask
|
||||
// out the tokens in the kv cache based on the sliding window size. It needs to track the
|
||||
// last kv cache token's position in a circular way.
|
||||
|
||||
// Remap the kv row when kv cache has been overwritten in a circular way.
|
||||
if (past_seqlen_ > sliding_window_size_)
|
||||
{
|
||||
// Map the kv row to the new tokens' kv.
|
||||
if (row >= past_seqlen_)
|
||||
{
|
||||
row = sliding_window_size_ + (row - past_seqlen_);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Map the kv row to the cyclic kv cache.
|
||||
row = row % sliding_window_size_;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Load data from memory.
|
||||
template <typename Smem_tile>
|
||||
inline __device__ void load(Smem_tile& smem_tile)
|
||||
@ -1144,13 +1107,6 @@ struct Gmem_tile_paged_kv
|
||||
for (int ii = 0; ii < LDGS; ++ii)
|
||||
{
|
||||
int row_idx = row_ + ii * (int) ROWS_PER_LDG;
|
||||
|
||||
// Remap row_idx if sliding window attention is used.
|
||||
// This will be removed later as the remapping will be handled by the kvCacheManger in TRTLLM.
|
||||
#ifdef GENERATE_CUBIN
|
||||
remap_kv_row(row_idx);
|
||||
#endif
|
||||
|
||||
int paged_kv_block_idx = (row_idx >> paged_kv_log2_block_size_);
|
||||
char const* local_kv_ptr = reinterpret_cast<char*>(paged_kv_block_pool_ptr_
|
||||
+ params_kv_block_size_in_bytes_ * paged_kv_global_block_offsets_[paged_kv_block_idx]);
|
||||
|
||||
@ -478,7 +478,7 @@ struct Mask<Traits, Cta_tile, 4> : public Mask<Traits, Cta_tile, 3>
|
||||
inline __device__ bool is_valid(int row, int col) const
|
||||
{
|
||||
// Is it a valid position in the sequence, i.e. are we in the lower triangle?
|
||||
return (row >= col) && (col >= max(0, row - sliding_window_size_));
|
||||
return (row >= col) && (col >= max(0, row + 1 - sliding_window_size_));
|
||||
}
|
||||
|
||||
// The sliding window size.
|
||||
@ -946,7 +946,7 @@ struct Mask_hopper<Traits, Cta_tile, 4> : public Mask_hopper<Traits, Cta_tile, 3
|
||||
inline __device__ bool is_valid(int row, int col) const
|
||||
{
|
||||
// Is it a valid position in the sequence?
|
||||
return col <= row && col >= max(0, row - sliding_window_size_);
|
||||
return col <= row && col >= max(0, row + 1 - sliding_window_size_);
|
||||
}
|
||||
|
||||
// The sliding window size for attention.
|
||||
|
||||
@ -288,7 +288,7 @@ struct Compute
|
||||
// The kv_left_mask_end is the start of the chunk.
|
||||
kv_left_mask_end = div_up(is_chunked_attention
|
||||
? ((tile_offset_end >> params.log2_chunked_attention_size) << params.log2_chunked_attention_size)
|
||||
: (tile_offset_end - params.sliding_window_size),
|
||||
: (tile_offset_end + 1 - params.sliding_window_size),
|
||||
STEP_KV);
|
||||
}
|
||||
|
||||
|
||||
@ -199,7 +199,7 @@ struct DMA
|
||||
// The kv_offset_start.
|
||||
int kv_offset_start = is_chunked_attention
|
||||
? ((q_step_offset >> params.log2_chunked_attention_size) << params.log2_chunked_attention_size)
|
||||
: max(0, q_step_offset - params.sliding_window_size);
|
||||
: max(0, q_step_offset + 1 - params.sliding_window_size);
|
||||
kv_idx_start = kv_offset_start / STEP_KV;
|
||||
}
|
||||
|
||||
@ -388,51 +388,6 @@ struct DMA
|
||||
elect_one_, {-1, -1, -1, -1, -1, -1, -1, -1});
|
||||
}
|
||||
|
||||
// Calculate the start tile idx.
|
||||
inline __device__ int remap_kv_tile_idx(
|
||||
int kv_tile_idx, int num_kv_cache_tiles, int past_kv_length, int sliding_window_size)
|
||||
{
|
||||
|
||||
// The remapped kv tile idx.
|
||||
int remapped_kv_tile_idx = kv_tile_idx;
|
||||
// This will be removed later as the remapping will be handled by the kvCacheManger in TRTLLM.
|
||||
#ifdef GENERATE_CUBIN
|
||||
// Sliding window attention + chunked context needs special handling.
|
||||
if constexpr (SLIDING_OR_CHUNKED_ATTENTION)
|
||||
{
|
||||
// For chunked context (i.e. separate q and kv layout), the kv cache might be
|
||||
// overwritten after last chunk is processed.
|
||||
// To deal with this issue, the new tokens' kv will be appended to the kv cache first,
|
||||
// and overwrite the kv cache after FMHA is done.
|
||||
// The kv input layout is like: [cyclic kv cache] + [new tokens' kv].
|
||||
// There are two possible cases:
|
||||
// 1. The kv cache hasn't been overwritten while processing previous chunks, so we can
|
||||
// take it normally, where we have full kv cache.
|
||||
// 2. The kv cache has been overwritten while processing previous chunks. we need to
|
||||
// mask out the tokens in the kv cache based on the sliding window size. It needs
|
||||
// to track the last kv cache token's position in a circular way.
|
||||
|
||||
// Remap the kv tile index when kv cache has been overwritten in a circular way.
|
||||
if (past_kv_length > sliding_window_size)
|
||||
{
|
||||
// Map the kv tile index to the new tokens' kv.
|
||||
if (kv_tile_idx * STEP_KV >= past_kv_length)
|
||||
{
|
||||
remapped_kv_tile_idx
|
||||
= num_kv_cache_tiles + int((kv_tile_idx * STEP_KV - past_kv_length) / STEP_KV);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Map the kv tile index to the cyclic kv cache.
|
||||
remapped_kv_tile_idx = kv_tile_idx % num_kv_cache_tiles;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
// Return the remapped kv tile idx.
|
||||
return remapped_kv_tile_idx;
|
||||
}
|
||||
|
||||
// Support contiguous Q + contiguous/paged KV separate cache.
|
||||
inline __device__ void run_separate_q_and_kv(
|
||||
bert::Fused_multihead_attention_params_v2 const& params, Shared* shared)
|
||||
@ -560,24 +515,20 @@ struct DMA
|
||||
// Iterate over the kv tiles for this q step.
|
||||
for (int kv_step_idx = kv_idx_start; kv_step_idx < kv_idx_end; kv_step_idx++)
|
||||
{
|
||||
// Remap the kv tile idx if sliding window attention is enabled.
|
||||
// Sliding_window_size should be multiple of STEP_KV.
|
||||
int remapped_kv_step_idx = remap_kv_tile_idx(kv_step_idx, params.sliding_window_size / STEP_KV,
|
||||
past_kv_length, params.sliding_window_size);
|
||||
// The barrier id.
|
||||
int bar_id;
|
||||
// Load paged kv input.
|
||||
if constexpr (PAGED_KV_INPUT)
|
||||
{
|
||||
bar_id = load_paged_kv(bidh_kv, remapped_kv_step_idx * STEP_KV, num_valid_kv_blocks,
|
||||
bar_id = load_paged_kv(bidh_kv, kv_step_idx * STEP_KV, num_valid_kv_blocks,
|
||||
params.paged_kv_cache.mTokensPerBlockLog2, params.blocks_per_tma_load,
|
||||
params.blocks_per_tma_load_log2, params.paged_kv_cache.mMaxBlocksPerSeq,
|
||||
paged_block_offsets, desc_k, desc_v, shared, cbw_k, cbw_v, cbw_v_scratch);
|
||||
}
|
||||
else
|
||||
{
|
||||
bar_id = load_kv(bidh_kv, remapped_kv_step_idx * STEP_KV, desc_k, desc_v, shared, cbw_k,
|
||||
cbw_v, cbw_v_scratch);
|
||||
bar_id = load_kv(
|
||||
bidh_kv, kv_step_idx * STEP_KV, desc_k, desc_v, shared, cbw_k, cbw_v, cbw_v_scratch);
|
||||
}
|
||||
|
||||
// Opportunistically hide headinfo in the shadow of UTMALDGs of the QKV tensor
|
||||
|
||||
@ -134,7 +134,7 @@ struct Softmax_base
|
||||
else
|
||||
{
|
||||
// The sliding window start is the max of 0 and row - sliding_window_size.
|
||||
return max(0, row - sliding_window_size_);
|
||||
return max(0, row + 1 - sliding_window_size_);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -1578,7 +1578,7 @@ int main(int argc, char** argv)
|
||||
}
|
||||
else
|
||||
{
|
||||
valid = valid && (si >= std::max(int(so - sliding_window_size), 0));
|
||||
valid = valid && (si >= std::max(int(so + 1 - sliding_window_size), 0));
|
||||
}
|
||||
}
|
||||
if (is_mtp)
|
||||
|
||||
@ -175,10 +175,10 @@ inline __device__ void device_flash_attention_nl(Params const& params)
|
||||
|
||||
int const kv_loop_end = ((valid_seqlen + Cta_tile_p::N - 1) / Cta_tile_p::N) * Cta_tile_p::N;
|
||||
int const kv_loop_start = mask_sliding_window
|
||||
? (max(0, q_sequence_start - params.sliding_window_size) / Cta_tile_p::N) * Cta_tile_p::N
|
||||
? (max(0, q_sequence_start + 1 - params.sliding_window_size) / Cta_tile_p::N) * Cta_tile_p::N
|
||||
: 0;
|
||||
int const sliding_window_mask_end = mask_sliding_window
|
||||
? (max(0, q_sequence_start + Cta_tile_p::M - 1 - params.sliding_window_size) / Cta_tile_p::N) * Cta_tile_p::N
|
||||
? (max(0, q_sequence_start + Cta_tile_p::M - params.sliding_window_size) / Cta_tile_p::N) * Cta_tile_p::N
|
||||
: 0;
|
||||
|
||||
static_assert(Cta_tile_p::M >= Cta_tile_p::N, "");
|
||||
|
||||
@ -176,10 +176,10 @@ inline __device__ void device_flash_attention_nl_tiled(Params const& params)
|
||||
|
||||
int const kv_loop_end = ((valid_seqlen + Cta_tile_p::N - 1) / Cta_tile_p::N) * Cta_tile_p::N;
|
||||
int const kv_loop_start = mask_sliding_window
|
||||
? (max(0, q_sequence_start - params.sliding_window_size) / Cta_tile_p::N) * Cta_tile_p::N
|
||||
? (max(0, q_sequence_start + 1 - params.sliding_window_size) / Cta_tile_p::N) * Cta_tile_p::N
|
||||
: 0;
|
||||
int const sliding_window_mask_end = mask_sliding_window
|
||||
? (max(0, q_sequence_start + Cta_tile_p::M - 1 - params.sliding_window_size) / Cta_tile_p::N) * Cta_tile_p::N
|
||||
? (max(0, q_sequence_start + Cta_tile_p::M - params.sliding_window_size) / Cta_tile_p::N) * Cta_tile_p::N
|
||||
: 0;
|
||||
|
||||
// Move K and V tiles.
|
||||
|
||||
@ -162,6 +162,10 @@ static_assert(CACHE_ELEM_ENUM != 0);
|
||||
#define OPTIMIZE_FOR_LATENCY 1
|
||||
#endif
|
||||
|
||||
#ifndef IS_SPEC_DEC_TREE
|
||||
#define IS_SPEC_DEC_TREE 1 // by default SPEC_DEC expect tree-based draft token structure
|
||||
#endif
|
||||
|
||||
#define DBG_BATCH_SIZE 2
|
||||
#define DBG_SEQ_LEN 256 * 4 + 3
|
||||
#define DBG_NB_CTAS_PER_SEQ 8
|
||||
|
||||
@ -1592,7 +1592,6 @@ CUBIN_EXPORT __global__
|
||||
#endif
|
||||
|
||||
uint32_t const cacheSeqLen = getCacheSeqLen<usePagedKVCache>(cacheList, idxReq);
|
||||
static_assert(!(allowSlidingWindow && useSpecDec), "Sliding window is not yet supported in spec-dec mode");
|
||||
#if SLIDING_WINDOW
|
||||
bool const rtIsReallySliding = (cacheSeqLen > slidingWinSize);
|
||||
uint32_t const nbTotalSkipTokens = rtIsReallySliding ? cacheSeqLen - slidingWinSize : 0;
|
||||
|
||||
@ -62,7 +62,7 @@ constexpr uint32_t gemm0NbThrds = gmmaWarpGrpSize * gemm0NbGmmaGrps;
|
||||
constexpr uint32_t gemm0NbWarps = gmmaWarpsPerGrp * gemm0NbGmmaGrps;
|
||||
#if SPEC_DEC && !SWAP_AB
|
||||
inline constexpr uint32_t ctaNbQHeads = Q_HEADS_PER_CTA;
|
||||
inline constexpr uint32_t inputTokensPerCta = exactDiv(ctaNbQHeads, headGrpSize);
|
||||
inline constexpr uint32_t inputTokensPerCta = ctaNbQHeads / headGrpSize;
|
||||
constexpr uint32_t ctaNbValidQHeads = ctaNbQHeads;
|
||||
#elif SPEC_DEC && SWAP_AB
|
||||
inline constexpr uint32_t inputTokensPerCta = specDecQLen;
|
||||
@ -347,21 +347,19 @@ __device__ inline uint32_t getInputTokOffset(SpecDecParams const& params, uint32
|
||||
return (params.qCuSeqLens == nullptr) ? params.qSeqLen * idxReq : params.qCuSeqLens[idxReq];
|
||||
}
|
||||
|
||||
static_assert(!allowSlidingWindow, "SpecDec is not supported for sliding window");
|
||||
|
||||
struct SpecDec
|
||||
{
|
||||
static inline constexpr uint32_t tileSize = gemm0CtaTileNbTokens;
|
||||
static inline constexpr uint32_t ctaMaxQSeqLen = exactDiv(ctaNbQHeads, headGrpSize);
|
||||
static inline constexpr uint32_t ctaMaxQSeqLen = (ctaNbQHeads / headGrpSize);
|
||||
using TileMaskRow = Vec<uint32_t, exactDiv(tileSize, 32)>;
|
||||
|
||||
__device__ inline SpecDec(SpecDecParams const& params, uint32_t idxReq, uint32_t ctaIdxY, uint32_t seqLen)
|
||||
__device__ inline SpecDec(SpecDecParams const& params, uint32_t idxReq, uint32_t idxInputSubSeq, uint32_t seqLen)
|
||||
: params(params)
|
||||
, ctaIdxY(ctaIdxY)
|
||||
, idxInputSubSeq(idxInputSubSeq)
|
||||
, seqLen(seqLen)
|
||||
{
|
||||
inputSeqLen = getInputSeqLen(params, idxReq);
|
||||
baseOffset = divUp(params.qSeqLen, 32U) * (getInputTokOffset(params, idxReq) + ctaMaxQSeqLen * ctaIdxY);
|
||||
baseOffset = divUp(params.qSeqLen, 32U) * (getInputTokOffset(params, idxReq) + ctaMaxQSeqLen * idxInputSubSeq);
|
||||
}
|
||||
|
||||
__device__ inline uint32_t unmaskedSeqLen() const
|
||||
@ -371,8 +369,8 @@ struct SpecDec
|
||||
|
||||
__device__ inline bool needMask(uint32_t idxTile, uint32_t idxQTokInCta) const
|
||||
{
|
||||
return tileSize * (idxTile + 1) > unmaskedSeqLen() && ctaMaxQSeqLen * ctaIdxY + idxQTokInCta < inputSeqLen
|
||||
&& params.mask != nullptr;
|
||||
return tileSize * (idxTile + 1) > unmaskedSeqLen()
|
||||
&& ctaMaxQSeqLen * idxInputSubSeq + idxQTokInCta < inputSeqLen && params.mask != nullptr;
|
||||
}
|
||||
|
||||
__device__ inline int32_t maskColBeg(uint32_t idxTile) const
|
||||
@ -408,14 +406,17 @@ struct SpecDec
|
||||
}
|
||||
|
||||
SpecDecParams const& params;
|
||||
uint32_t const ctaIdxY;
|
||||
uint32_t const idxInputSubSeq;
|
||||
uint32_t const seqLen;
|
||||
uint32_t inputSeqLen;
|
||||
uint32_t baseOffset;
|
||||
};
|
||||
|
||||
__device__ void warpGrpApplyMask(
|
||||
Gemm0Acc& acc, SpecDec const& specDec, uint32_t cacheSeqLen, uint32_t idxTile, uint32_t warpRank);
|
||||
__device__ void warpGrpApplyMask(Gemm0Acc& acc, SpecDec const& specDec,
|
||||
#if SLIDING_WINDOW && !IS_SPEC_DEC_TREE
|
||||
int32_t tok0WinBeg,
|
||||
#endif
|
||||
uint32_t cacheSeqLen, uint32_t idxTile, uint32_t warpRank);
|
||||
#endif
|
||||
|
||||
#if SWAP_AB
|
||||
@ -684,9 +685,31 @@ CUBIN_EXPORT __global__
|
||||
uint32_t const cacheSeqLen = getCacheSeqLen<usePagedKVCache>(cacheList, idxReq);
|
||||
static_assert(gemm0CtaTileNbTokens == gemm1CtaTileNbTokens);
|
||||
constexpr uint32_t tileSize = gemm0CtaTileNbTokens;
|
||||
static_assert(!(allowSlidingWindow && useSpecDec), "Sliding window is not yet supported in spec-dec mode");
|
||||
#if SLIDING_WINDOW
|
||||
#if SPEC_DEC
|
||||
uint32_t const idxInputSubSeq = blockIdx.x;
|
||||
uint32_t const inputSeqLen = reqInputTokEnd - reqInputTokBeg;
|
||||
uint32_t const ctaTokOffset = inputTokensPerCta * idxInputSubSeq;
|
||||
uint32_t const ctaNbValidTokens = mha::min(uint32_t{inputTokensPerCta}, inputSeqLen - ctaTokOffset);
|
||||
|
||||
if (ctaTokOffset >= inputSeqLen)
|
||||
{
|
||||
return;
|
||||
}
|
||||
#else
|
||||
uint32_t const idxInputSubSeq = 0;
|
||||
uint32_t const inputSeqLen = 1;
|
||||
uint32_t const ctaTokOffset = 0;
|
||||
uint32_t const ctaNbValidTokens = 1;
|
||||
#endif
|
||||
#if SLIDING_WINDOW && SPEC_DEC && !IS_SPEC_DEC_TREE
|
||||
// get the actual start position depending on ctaTokOffset, which is the draft token position per CTA
|
||||
uint32_t const tok0SeqLen = cacheSeqLen - inputSeqLen + 1 + ctaTokOffset;
|
||||
int32_t const tok0WinBeg = int32_t(tok0SeqLen) - int32_t(slidingWinSize);
|
||||
uint32_t const nbTotalSkipTokens = mha::max(0, tok0WinBeg);
|
||||
#elif SLIDING_WINDOW
|
||||
bool const rtIsReallySliding = (cacheSeqLen > slidingWinSize);
|
||||
// if SPEC_DEC && SLIDING_WINDOW && IS_SPEC_DEC_TREE, it should not do sliding
|
||||
assert(!SPEC_DEC || !rtIsReallySliding);
|
||||
uint32_t const nbTotalSkipTokens = rtIsReallySliding ? cacheSeqLen - slidingWinSize : 0;
|
||||
#else
|
||||
constexpr bool rtIsReallySliding = false;
|
||||
@ -720,21 +743,6 @@ CUBIN_EXPORT __global__
|
||||
{
|
||||
return;
|
||||
}
|
||||
#if SPEC_DEC
|
||||
uint32_t const idxInputSubSeq = blockIdx.x;
|
||||
uint32_t const inputSeqLen = reqInputTokEnd - reqInputTokBeg;
|
||||
uint32_t const ctaTokOffset = inputTokensPerCta * idxInputSubSeq;
|
||||
uint32_t const ctaNbValidTokens = mha::min(uint32_t{inputTokensPerCta}, inputSeqLen - ctaTokOffset);
|
||||
if (ctaTokOffset >= inputSeqLen)
|
||||
{
|
||||
return;
|
||||
}
|
||||
#else
|
||||
uint32_t const idxInputSubSeq = 0;
|
||||
uint32_t const inputSeqLen = 1;
|
||||
uint32_t const ctaTokOffset = 0;
|
||||
uint32_t const ctaNbValidTokens = 1;
|
||||
#endif
|
||||
uint32_t const ctaInputTokBeg = reqInputTokBeg + ctaTokOffset;
|
||||
auto const warpIdx = getWarpIdx(uint3{128, 1, 3});
|
||||
auto const wid = warpIdx.z * 4 + warpIdx.x;
|
||||
@ -886,10 +894,13 @@ CUBIN_EXPORT __global__
|
||||
#endif
|
||||
// apply qkScale
|
||||
acc = acc * qkScale;
|
||||
|
||||
// apply mask
|
||||
#if SPEC_DEC
|
||||
warpGrpApplyMask(acc, specDec, cacheSeqLen, idxKTile, warpRank);
|
||||
warpGrpApplyMask(acc, specDec,
|
||||
#if SLIDING_WINDOW && !IS_SPEC_DEC_TREE
|
||||
tok0WinBeg,
|
||||
#endif
|
||||
cacheSeqLen, idxKTile, warpRank);
|
||||
#else
|
||||
bool const isFirstTile = (idxKTile == nbSkipLeadingTiles);
|
||||
bool const needMaskLeading = (rtIsReallySliding && isFirstTile && tile0NbSkipTokens > 0);
|
||||
@ -1342,7 +1353,6 @@ CUBIN_EXPORT __global__
|
||||
kTilePartLoader.loadPages(idxKTile);
|
||||
#if USE_INPUT_KV || ENABLE_PDL == 2
|
||||
#if SPEC_DEC
|
||||
static_assert(SLIDING_WINDOW == 0);
|
||||
bool const anyNewTokens = (gemm0CtaTileNbTokens * (idxKTile + 1) > cacheSeqLen - inputSeqLen);
|
||||
#else
|
||||
bool const anyNewTokens = (gemm0CtaTileNbTokens * (idxKTile + 1) >= cacheSeqLen);
|
||||
@ -1411,7 +1421,6 @@ CUBIN_EXPORT __global__
|
||||
vTileLoader.loadPages(idxVTile);
|
||||
#if USE_INPUT_KV || ENABLE_PDL == 2
|
||||
#if SPEC_DEC
|
||||
static_assert(SLIDING_WINDOW == 0);
|
||||
bool const anyNewTokens = (gemm0CtaTileNbTokens * (idxVTile + 1) > cacheSeqLen - inputSeqLen);
|
||||
#else
|
||||
bool const anyNewTokens = (gemm0CtaTileNbTokens * (idxVTile + 1) >= cacheSeqLen);
|
||||
@ -1838,8 +1847,11 @@ __device__ inline GMemKVCacheHead& KVTilePartLoader::getHead(uint32_t pos)
|
||||
|
||||
#if SWAP_AB
|
||||
#if SPEC_DEC
|
||||
__device__ inline void warpGrpApplyMask(
|
||||
Gemm0Acc& acc, SpecDec const& specDec, uint32_t cacheSeqLen, uint32_t idxTile, uint32_t warpRank)
|
||||
__device__ inline void warpGrpApplyMask(Gemm0Acc& acc, SpecDec const& specDec,
|
||||
#if SLIDING_WINDOW && !IS_SPEC_DEC_TREE
|
||||
int32_t tok0WinBeg,
|
||||
#endif
|
||||
uint32_t cacheSeqLen, uint32_t idxTile, uint32_t warpRank)
|
||||
{
|
||||
constexpr uint32_t tileSize = gemm0CtaTileNbTokens;
|
||||
static_assert(SPEC_Q_SEQ_LEN <= sizeof(MaskType) * 8, "not implemented");
|
||||
@ -2215,22 +2227,40 @@ __device__ inline RegRowWiseVec computeWarpGrpRowMax_sync(
|
||||
}
|
||||
|
||||
#if SPEC_DEC
|
||||
__device__ inline void warpGrpApplyMask(
|
||||
Gemm0Acc& acc, SpecDec const& specDec, uint32_t cacheSeqLen, uint32_t idxTile, uint32_t warpRank)
|
||||
__device__ inline void warpGrpApplyMask(Gemm0Acc& acc, SpecDec const& specDec,
|
||||
#if SLIDING_WINDOW && !IS_SPEC_DEC_TREE
|
||||
int32_t tok0WinBeg,
|
||||
#endif
|
||||
uint32_t cacheSeqLen, uint32_t idxTile, uint32_t warpRank)
|
||||
{
|
||||
static_assert(!SLIDING_WINDOW, "SpecDec is not supported for sliding window");
|
||||
constexpr uint32_t tileSize = gemm0CtaTileNbTokens;
|
||||
auto const inputSeqLen = specDec.inputSeqLen;
|
||||
auto const idxInputSubSeq = specDec.idxInputSubSeq;
|
||||
constexpr uint64_t fullMask = ~uint64_t{0};
|
||||
static_assert(tileSize == sizeof(fullMask) * 8);
|
||||
#if SLIDING_WINDOW && !IS_SPEC_DEC_TREE
|
||||
uint32_t const ctaTokOffset = inputTokensPerCta * idxInputSubSeq;
|
||||
Range const tileRange = {tileSize * idxTile, tileSize * idxTile + tileSize};
|
||||
Range const maxMaskOutRange = {0, mha::max(0, tok0WinBeg) + (inputTokensPerCta - 1)};
|
||||
bool const ctaNeedBegMask = tileRange.beg < maxMaskOutRange.end;
|
||||
assert(ctaNeedBegMask == overlap(tileRange, maxMaskOutRange));
|
||||
int32_t const tok0NbMaskOut = int32_t(tok0WinBeg) - int32_t(tileSize * idxTile);
|
||||
#else
|
||||
constexpr bool ctaNeedBegMask = false;
|
||||
uint64_t const begMask = fullMask;
|
||||
int32_t const tok0NbMaskOut = -2147483648;
|
||||
#endif
|
||||
uint32_t const offset = tileSize * idxTile;
|
||||
uint32_t const nbValidCols = mha::min(offset < cacheSeqLen ? cacheSeqLen - offset : 0U, tileSize);
|
||||
bool const ctaNeedEndMask = (nbValidCols < tileSize);
|
||||
bool const ctaNeedSpecDecMask = specDec.needMask(idxTile, 0);
|
||||
bool const needMask = ctaNeedEndMask || ctaNeedSpecDecMask;
|
||||
bool const needMask = ctaNeedBegMask || ctaNeedEndMask || ctaNeedSpecDecMask;
|
||||
if (!needMask)
|
||||
{
|
||||
return;
|
||||
}
|
||||
static_assert(tileSize == 64, "not implemented");
|
||||
auto const endMask = (~uint64_t{0} >> (tileSize - nbValidCols));
|
||||
auto const endMask = fullMask >> (tileSize - nbValidCols);
|
||||
|
||||
uint32_t const idxInQuad = laneId() % 4;
|
||||
uint32_t const idxQuad = laneId() / 4;
|
||||
@ -2241,10 +2271,19 @@ __device__ inline void warpGrpApplyMask(
|
||||
for (uint32_t i = 0; i < GmmaAccCoreMat::rows; i++)
|
||||
{
|
||||
uint32_t const row = gmma::instM * m + gmma::instM / 4 * warpRank + 8 * i + idxQuad;
|
||||
auto const specDecMask = specDec.needMask(idxTile, row / headGrpSize)
|
||||
? specDec.loadTileMaskRow(idxTile, row / headGrpSize)
|
||||
uint32_t const idxQTokInCta = row / headGrpSize;
|
||||
bool const isQTokValid
|
||||
= (headGrpSize * inputTokensPerCta == ctaNbQHeads) || (idxQTokInCta < inputTokensPerCta);
|
||||
auto const specDecMask = (isQTokValid && specDec.needMask(idxTile, idxQTokInCta))
|
||||
? specDec.loadTileMaskRow(idxTile, idxQTokInCta)
|
||||
: SpecDec::TileMaskRow{~0U, ~0U};
|
||||
auto const mask = endMask & reinterpret_cast<uint64_t const&>(specDecMask);
|
||||
#if SLIDING_WINDOW && !IS_SPEC_DEC_TREE
|
||||
int32_t const begNbMaskOut = tok0NbMaskOut + int32_t(idxQTokInCta);
|
||||
uint64_t const begMask = (begNbMaskOut > 0 ? fullMask << begNbMaskOut : fullMask);
|
||||
#else
|
||||
uint64_t const begMask = fullMask;
|
||||
#endif
|
||||
auto const mask = begMask & endMask & reinterpret_cast<uint64_t const&>(specDecMask);
|
||||
if (mask == ~uint64_t{0})
|
||||
{
|
||||
continue;
|
||||
|
||||
@ -155,7 +155,16 @@ Eigen::Matrix<float, headGrpSize, validElemsPerHead, Eigen::RowMajor> refAttenti
|
||||
{
|
||||
qF32[i] = toF32Head(q[i]);
|
||||
}
|
||||
#if SPEC_DEC && SLIDING_WINDOW
|
||||
// In Spec-dec + SLIDING WINDOW mode, only allow linear tree or !rtIsReallySliding.
|
||||
// the token starting position is seqLen - qSeqLen + 1
|
||||
assert(!IS_SPEC_DEC_TREE || seqLen - qSeqLen + 1 < slidingWinSize);
|
||||
uint32_t const tok0SeqLen = seqLen - qSeqLen + 1 + q_len;
|
||||
uint32_t const seqBeg
|
||||
= (int32_t(tok0SeqLen) < int32_t(slidingWinSize) ? 0 : int32_t(tok0SeqLen) - int32_t(slidingWinSize));
|
||||
#else
|
||||
uint32_t const seqBeg = (seqLen < slidingWinSize ? 0 : seqLen - slidingWinSize);
|
||||
#endif
|
||||
gemm0Acc.leftCols(seqBeg).fill(-INFINITY);
|
||||
for (uint32_t j = seqBeg; j < seqLen; j++)
|
||||
{
|
||||
|
||||
@ -130,7 +130,7 @@ template <uint32_t nbKHeads>
|
||||
#endif
|
||||
#endif
|
||||
void runTest(uint32_t batchSize, uint32_t seqLen, bool testPerf, bool refCheck, bool verbose = false,
|
||||
bool saveData = false, uint32_t ctxLen = ~0U, uint32_t slidingWinSize = std::numeric_limits<uint32_t>::max())
|
||||
bool saveData = false, uint32_t ctxLen = ~0U, uint32_t slidingWinSize = 1U << 30)
|
||||
{
|
||||
#if IS_MLA
|
||||
if (nbKHeads != 1)
|
||||
@ -363,6 +363,8 @@ void runTest(uint32_t batchSize, uint32_t seqLen, bool testPerf, bool refCheck,
|
||||
{
|
||||
#if IS_MLA || SPEC_Q_SEQ_LEN
|
||||
hostMask[tokenIdx * qSeqLen + kvPosIdx] = (tokenIdx >= kvPosIdx);
|
||||
#elif !IS_SPEC_DEC_TREE
|
||||
hostMask[tokenIdx * qSeqLen + kvPosIdx] = tokenIdx >= kvPosIdx;
|
||||
#else
|
||||
hostMask[tokenIdx * qSeqLen + kvPosIdx] = maskDist(rng);
|
||||
#endif
|
||||
@ -1038,6 +1040,14 @@ TEST(RefCheck, llama_V2_70b_3)
|
||||
runTest<8, HEAD_GROUP_SIZE, Q_SEQ_LEN>(8, 1028, runPerfTest, runCheckTest);
|
||||
runTest<8, HEAD_GROUP_SIZE, Q_SEQ_LEN>(8, 2048, runPerfTest, runCheckTest);
|
||||
runTest<8, HEAD_GROUP_SIZE, Q_SEQ_LEN>(8, 4096, runPerfTest, runCheckTest);
|
||||
runTest<8, HEAD_GROUP_SIZE, Q_SEQ_LEN>(8, 2048, runPerfTest, runCheckTest);
|
||||
|
||||
#if SLIDING_WINDOW && !IS_SPEC_DEC_TREE
|
||||
runTest<4, HEAD_GROUP_SIZE, Q_SEQ_LEN>(4, 2039, false, runCheckTest, true, false, ~0U, 1024);
|
||||
runTest<8, HEAD_GROUP_SIZE, Q_SEQ_LEN>(8, 63, false, runCheckTest, true, false, ~0U, 61);
|
||||
runTest<1, HEAD_GROUP_SIZE, Q_SEQ_LEN>(8, 2, false, true, true, false, ~0U, 1);
|
||||
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
@ -1056,3 +1056,14 @@ public:
|
||||
private:
|
||||
uint32_t mTic;
|
||||
};
|
||||
|
||||
// [beg, end)
|
||||
struct Range
|
||||
{
|
||||
uint32_t beg, end;
|
||||
};
|
||||
|
||||
constexpr bool overlap(Range a, Range b)
|
||||
{
|
||||
return a.beg < b.end && b.beg < a.end;
|
||||
}
|
||||
|
||||
@ -197,6 +197,7 @@ bool AttentionOp::convertMMHAParamsToXQAParams(tensorrt_llm::kernels::XQAParams&
|
||||
xqaParams.multi_block_mode = common::getEnvForceDeterministicAttention() ? false : mMultiBlockMode;
|
||||
// Medusa mode will have multiple query tokens.
|
||||
xqaParams.multi_query_tokens = mIsSpecDecodingEnabled && mUseSpecDecoding;
|
||||
xqaParams.is_spec_dec_tree = mIsSpecDecTree;
|
||||
|
||||
if (mKVCacheQuantMode.hasInt8KvCache())
|
||||
{
|
||||
@ -1723,10 +1724,6 @@ int AttentionOp::enqueueContext(EnqueueContextParams<T> const& params, cudaStrea
|
||||
// Run the fmha kernel.
|
||||
mFmhaDispatcher->run(fmhaParams);
|
||||
sync_check_cuda_error(stream);
|
||||
// The kv cache might need to be updated after FMHA (only when sliding window attention + chunked context is
|
||||
// used together). Reuse the preprocessingParams.
|
||||
invokeKvCachePostprocessing(preprocessingParams, stream);
|
||||
sync_check_cuda_error(stream);
|
||||
|
||||
if (mCpSize > 1 && mAttnTpSize > 1 && mAttnCpSize == 1)
|
||||
{
|
||||
|
||||
@ -391,6 +391,7 @@ public:
|
||||
bool mHasFullAttentionMask = false;
|
||||
bool mIsSpecDecodingEnabled = false;
|
||||
bool mUseSpecDecoding = false;
|
||||
bool mIsSpecDecTree = true;
|
||||
bool mSpecDecodingIsGenerationLengthVariable = false;
|
||||
int32_t mSpecDecodingMaxGenerationLength = 1;
|
||||
bool mIsMLAEnabled = false;
|
||||
@ -440,7 +441,7 @@ public:
|
||||
mBlockSparseParams.data(), mPagedKVCache, mTokensPerBlock, mKVCacheQuantMode.value(), mTpSize, mTpRank,
|
||||
mUnfuseQkvGemm, (int32_t) mType, mMaxContextLength, mQKVBiasEnabled, mCrossAttention, mMaxDistance,
|
||||
mPosShiftEnabled, mPagedContextFMHA, mFP8ContextFMHA, mDenseContextFMHA, mHasFullAttentionMask,
|
||||
mIsSpecDecodingEnabled, mUseSpecDecoding, mSpecDecodingIsGenerationLengthVariable,
|
||||
mIsSpecDecodingEnabled, mUseSpecDecoding, mIsSpecDecTree, mSpecDecodingIsGenerationLengthVariable,
|
||||
mSpecDecodingMaxGenerationLength, mIsMLAEnabled, mIsGenerationMLA, mUseGenFlashMLA, mMLAParams.data(),
|
||||
mCpSize, mCpRank, mCpGroup, mNumAttnHeads, mNumAttnKVHeads, mNumKVHeadsOrigin, mAttnTpSize, mAttnTpRank,
|
||||
mAttnCpSize, mAttnCpRank, mUlyssesMQABroadcast, mEnableContextFMHA, mFMHAForceFP32Acc, mMultiBlockMode,
|
||||
|
||||
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:0a0671e7cbbed9f51dc0c47e4b970e2f72067d629ff6562c9d65f9cd55c68578
|
||||
size 361861
|
||||
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:5ec9817bebb07483ce29d8d91c45d35c2c05f0101bfa70146fba5a6576a6b825
|
||||
size 1091614
|
||||
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:0540cdb398818ec54a60c34b462c158e169347db73d244d633669d74211696ba
|
||||
size 1467312
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:3fff0dfc8b05bdfd41b9f00d65567ff8a96f36e56a75b31e5c48835b7d9c90f6
|
||||
size 693780
|
||||
oid sha256:69bdfba64f1faff30ed8389a28b7b9ef37c0d180b1df643722b280011c8f74e8
|
||||
size 692990
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:9fa28c23d82290a782267b18eaa36a545213045d493a72513e3a65305c0fb080
|
||||
size 672452
|
||||
oid sha256:c8173308813999ab64ba8236016b23fbfd3f3f1501f61290bf71ea027ead2920
|
||||
size 642456
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:70b101d8936e175391d8051967ff5733a144118ff8793b29b612eac92abc581e
|
||||
size 423439
|
||||
oid sha256:f41ae066b01b2a9c3b5165535f743461a9a1d559f6fcd0a00a04c554f8a50962
|
||||
size 414757
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:26ae7817cbed824212d92c0eb8b25d0f6b9d6281e4d4b6e95e9b6d6d2f5f0faf
|
||||
size 1236860
|
||||
oid sha256:ab0be8e667d459e13135f96469613f1c095e47187b24e5d40c7c57583351a076
|
||||
size 1194236
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:97dcf2a904ca8ce22f2282644a53986b03f7c0d7948803d2b2b401d6a6dfb5a9
|
||||
size 1719120
|
||||
oid sha256:03d86280f76994e2e01d43747cb5c811496b8340d031ebb0c3bdd46437422994
|
||||
size 1654394
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d8a9578f22279c7f83f0126eada9fb14a959e3e841efd641b780be06d5e7ebde
|
||||
size 375277
|
||||
oid sha256:35c5715bcb1a16c343f3a28be105fb6fee1bbca24cf832f71a7d0f20cf9a0b3e
|
||||
size 365015
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e8f883e1814759b4e4e643edb51465f132f27dd77392e9403908cd954eccb19e
|
||||
size 1137402
|
||||
oid sha256:a3335a8d4b2c0ca63f006c3f957d57aa3f808ef06d4adda322c311a333286d84
|
||||
size 1126352
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:eb96a6fdcae7f8e19516c4bc4064ccd759906a8b0052e5148fd01e59c37e2f4f
|
||||
size 652776
|
||||
oid sha256:fdc0bf099862d352b3b765e117437240a82e4749d3efd104881647dd4ea14562
|
||||
size 644092
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:93fb97424b5abb3f807b300bc67bc37f14355831d0ff1ffa2d5d9c0fd872731d
|
||||
size 1137390
|
||||
oid sha256:ccd938df8f78af4eae306c6e9e669599c2baf6f095f956318470063c560fbd3c
|
||||
size 1091610
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:a6803c454338b0a0c548204701ba4411ab55602b42cd2122140b5db09cd19660
|
||||
size 1537558
|
||||
oid sha256:ce4d35ab4c7b65476f0dcec635db1791fcb718afd6b3531338712f5b2bc9aa84
|
||||
size 1460204
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:8396a30929e67e906ac438e011acdd1eac5e2bd2fa887c2f6ae8aa0f5b6ccda8
|
||||
size 514281
|
||||
oid sha256:d088ce37b21d335ba1f92034cf97f78fc968d7fecaa0c4f9ec83a0d5165f1d99
|
||||
size 482709
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:2c51433d1240dc1d8ab205f89b8cb7f83d93e0224850433610fd95555ecf6222
|
||||
size 665822
|
||||
oid sha256:40653ec672098e2cb1f94c473fa67852efcf6b49a6e8109e4fcf39422281acb4
|
||||
size 657930
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:60f4a4656af5bbeb2c8552bf9f9c7cd779586a4cb5cc9f6cbb1e38d8b279226d
|
||||
size 684322
|
||||
oid sha256:96348957990518db6f51af7c681a71e625dede568cc8f8303dd2de8ad09bfc28
|
||||
size 677218
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:61dcb9e691d97658eb41885a1801dc84a2818b7b9939163864c60b2f2f698d01
|
||||
size 370981
|
||||
oid sha256:4687df80ac2fa9454b0564b0a80d78cfaedc2c7796c8f3a1010dd7ebbf722c83
|
||||
size 369401
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d188489645839f22b23f7ab60024a38784246dd3cdebb2860afba4b17e555987
|
||||
size 981870
|
||||
oid sha256:d8b9985065f5f2c62b74c05f8eed02b1909c96656b26fbd7779cc57a2146b037
|
||||
size 947140
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:5bc5c98f5bb68ce8457192a8deb66fd33bd4e18181f6543a80ffee90f9fa889c
|
||||
size 610511
|
||||
oid sha256:23599e63b07ad966df921daf3cb97a9ed5cde27eeda0fd96ba5abd835b48f89a
|
||||
size 590779
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:38facf3787477a775cb81819dd32adc2b14302a6e245ea1bd39a7c79a27f6be1
|
||||
size 1922792
|
||||
oid sha256:cd1c452565583b20913d835de9b14c2f19c0cc431bc926ea6c92295362a85bca
|
||||
size 1813864
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:49d610072be65cb35753c025a6e34d297cb8b00763e31f032f8068fd49e82746
|
||||
size 2606330
|
||||
oid sha256:b20de2c6bb3081564ddfbf7ece80fb2c17e66f4e7ff0e0969da4e4655e90d1ec
|
||||
size 2407418
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:78b4569d41bffce532654f3b0641599049004acba634be1965685863f4485949
|
||||
size 570241
|
||||
oid sha256:33a0e8bb2391128e688e5c6356f09a5ed189ce5c1bcdeef4efc0ce0415dc2849
|
||||
size 555245
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:12660d6342b533a1023650fe1c40ed8df1e303878035422e4995697de1abce6b
|
||||
size 692632
|
||||
oid sha256:4b014f41b1cfdf6ed2729778841213a36440191eb3c087346a02c21510bd3f0e
|
||||
size 665794
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:ff17dcd50d76036338dc9f3d009b6b10f5d2b8a338342fef9018dd73a79f1b7a
|
||||
size 1804378
|
||||
oid sha256:bd77afeb7dcd1ff8d6be80788b20e92e4fbc8c3026ba12d1d522c99316754a7c
|
||||
size 1740442
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:760cc23fd160128f4be3fd1dd6f6ef4bf18551106404b146b7f374af3fb81c4d
|
||||
size 2338732
|
||||
oid sha256:b674707d02aac297b66d523de8b11618ca1598c49eeaf7ce9b1c9d516ce95c4b
|
||||
size 2247958
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:de60062494c933226d989901d7fc15d886fd5a84c124f1c01fe583cb45281801
|
||||
size 601899
|
||||
oid sha256:7556f88488e05ee669e763b839afa1b7690060cfa9d8482d419c0ca336df9352
|
||||
size 595585
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:367458885389381731b08889460600b9a4e9542cc979a38ad05d6ca3992744b3
|
||||
size 912898
|
||||
oid sha256:ac9d879aa0c70967bb3a79cd7034998baf43a544c0dd4444ebddeb76e78df5ae
|
||||
size 908162
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:87b40dfd9d1ab2258d7de80a89820e686e87243ab43f7dd20990c871d4202841
|
||||
size 1408612
|
||||
oid sha256:4e781c0278fc46142f578ae51bfeb38767e89d9c25b92023215948f99dd1d3ed
|
||||
size 1371512
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:ea80c0c776d59d68b5a47ed7ba0fc8e37ea38ab189419519795ca57dd7589304
|
||||
size 1475704
|
||||
oid sha256:d608e9e3ec460d2a38f43067a7d7a2dd408e068db690806bbafb11007e175336
|
||||
size 1419662
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b3c7887870f3defa8c2595868c2c8b40afb2ca0b090dc241ad8a34c754857ab4
|
||||
size 1475704
|
||||
oid sha256:9c1e1d300866c6425c2495e550230051debdca0a7eb85874ae33c0c2de8a81cb
|
||||
size 1419662
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b797da09627dbf7661ccad3e8b7fd741330f008b3f8e033b7a3c7787a7233e1d
|
||||
size 2003768
|
||||
oid sha256:132d83639e34af1b431abdcb3f09542d0389030b85752e18a3ae221ead7d24a3
|
||||
size 1965880
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c55e36802f8679e988ed6fac295314367dd9914c5ff457b7c4c5437ab8b53a41
|
||||
size 1391232
|
||||
oid sha256:4a96710f6c691580c2363c187a75fd436f5e6be732810a1a45182ce72dc52d1e
|
||||
size 1380182
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:7d9a65aa870c5057349809ae2cc7e03837e37ac3ef2e5633d19e69c444358c96
|
||||
size 1409386
|
||||
oid sha256:a6339f008f451d030aa36a6b3fac7179e7534f7f2474d641fa0ebfbf487074e7
|
||||
size 1401494
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:76cbfb5a29797bbeb2adad93c0c1e0fd4c1c544a6c12faa2a825cdb4eff1dff2
|
||||
size 1409386
|
||||
oid sha256:57ebcae2b70fc28881f2b3969868d64c203ef4a9cbc9588a9e28051c5f5b6849
|
||||
size 1401494
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:61c16947041287198b160091a89f1677ebe7babed9c9da6f6625436f7b526a6f
|
||||
size 1946134
|
||||
oid sha256:5e2a4ce1b944feb2b3ed535943089a2d5968bf523b149885df78f7fa4bd7e835
|
||||
size 1935872
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:f1114bbd784a3ea000d86f00e35086435d50c430ed695448a306cfc4bd54f60c
|
||||
size 309055
|
||||
oid sha256:f5d456b30f89ad05ba5b852fabcffb3f8269913d83ef8c0e4e319f2243dee54d
|
||||
size 305897
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:3c8905ae4aafc41cce6557456bdf08d7ae6eb5a93286ccbf5d0b745fb33cd298
|
||||
size 293267
|
||||
oid sha256:85593d3c2fecb6842a72952c6dcbde19a70e6b26245829d279ca50bb391eb636
|
||||
size 290109
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e373ec7eb583a0803821145ec16f2ecf1a173c70f0796207750e51b97c72d604
|
||||
size 528501
|
||||
oid sha256:69cd61bd8334d2109067ef0460a91b8dba4c2cb07392eb636d72d025ccb15bf9
|
||||
size 498507
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:2805c97b33142d036c8fc510d603e5c0d6d74174ae1f15b04feeedf44f0b5ab6
|
||||
size 702156
|
||||
oid sha256:0427b7729ce3cfa652a4595d04f936a947febec8f2c96ce33eed7cbaaa05613e
|
||||
size 668214
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:111f7cebf93583b831e5714ab597ef6cf9afe9a215a5a9bb1cedf04176f4129b
|
||||
size 761356
|
||||
oid sha256:321bcd81b8965c8dfc08682f775508ae18e3ff711490ee8dff5fe56c20f74843
|
||||
size 711628
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:9b44d7f8e5db9b0fd8ccdd905124faf5a703c89c6de326367ba200697fb518fa
|
||||
size 806372
|
||||
oid sha256:aa77d3789c0ca314689125ec303a8af76554120a708a4b63395c69b7aad07f04
|
||||
size 752698
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:664ed6e91ccd091fb4733b55a2799d4562df876ef4e3be8ca79e6d0b55bace4a
|
||||
size 803980
|
||||
oid sha256:aa35aa70d0fa304c776c076a1a189d32a054d3f696dac5d99018085d1108c73b
|
||||
size 748726
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:98431cb031d4d41035fd7a5a253fbf4b23214ba9e8689749ad23de925d97b0eb
|
||||
size 999734
|
||||
oid sha256:d1a702d456b5acf279487dd810e3e33efdd1c7bd82530ceb5a32ad30ec30396c
|
||||
size 946060
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:48ab14dd4c3e988db85530381833b1753fc8579a8716df1a81799d122ecc19cd
|
||||
size 520607
|
||||
oid sha256:558aa7d42de329c49361c94c4baef16738304b21b6adbe675d77c7819ef37660
|
||||
size 489823
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:a4aa5c1c533f5ce60a50110a6bbfa2af6cd7a0488776cb1fd491ce594b0f94f4
|
||||
size 520607
|
||||
oid sha256:7b5baa6048e6c33e74c6d343eb7c76252ff2e534fe467b3189af12b5d64af37c
|
||||
size 489823
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b0dae8957de096f310cfe6bb977babbe745e7542072920a454a60b9ad05c4318
|
||||
size 530867
|
||||
oid sha256:e17cb191ad092e6db255ea503e49ea883ed56322fc58ed8d68710f6687376c1f
|
||||
size 500083
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:849c37d9f772de883d6fa358161f977216d48932ef8a27cec2cfe931c9880e06
|
||||
size 500861
|
||||
oid sha256:bfca5660a931e08941347f7a0aefa82c214940e8eaa6b6d89cfded621f34a490
|
||||
size 496125
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:189df2e89d79e1969521dcb124bcd71f274493e369b2809fc5ed552e8be1977b
|
||||
size 184391
|
||||
oid sha256:fffd2cd799953808034d7e7b89a57d4fede24db124bfb0d3938188177acbdfeb
|
||||
size 182023
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:43ae547cc799f0c688c19daee4bf357d6d2fe2c06d894bcded7ac40e699caced
|
||||
size 184391
|
||||
oid sha256:19ada3a5d449542f103077db8d193bc2293a8f48ccee201e366473964287314c
|
||||
size 182023
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:39c941a13e14d0cbfcd19e1d11f75047227aaf992d60b56e45f063f92ff80cc8
|
||||
size 667412
|
||||
oid sha256:b9c32124cd708aab7da30637d85437da0af9bf2157d163c19c6fe14498698cda
|
||||
size 661096
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:868ce05564bbf9e23a3f6562bd75d537d1c5e901eeb0bbecb24261bcc7d23370
|
||||
size 676094
|
||||
oid sha256:7f248fd42759509c61d20f912ae74dc3a85448a9c8386370ea92492ed9031e80
|
||||
size 672936
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:66d791187f871dc70a6b90cd9d60dc3db06d60c2beaefb3d75c2ff1f949d5458
|
||||
size 726636
|
||||
oid sha256:190fd946ddc7e1b5e9ca2172ec1de39c6288829773d9ce29fe98374256eff566
|
||||
size 721900
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:6a065d8c65f022875bb49bdc9aa853061149ff2cdfcaf1f8cdf8a3efe456e8a5
|
||||
size 723454
|
||||
oid sha256:b7cd5976c836bcd75c0cadfe968050ac60bf89b93df021ad6c1681e159c497c5
|
||||
size 717928
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:212ffad34a9b3002c1ab7e590bbadf1c94cb9847acbb479c311e9057c4e4c44b
|
||||
size 932628
|
||||
oid sha256:7c536d725e1d9ebd2cb836dfe3993edcc81101534db6b7f1943c8a9443838bf4
|
||||
size 927892
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e70aa7f7c6f8e41c5f142fd268a88fd0390f59ac9aad56b8be062a05f8f49ff8
|
||||
size 638994
|
||||
oid sha256:b5907da5a2f68c010d44bbbd0d780e097f9625be15b2f85e8dd1f00dd4c31ff9
|
||||
size 631890
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d0cc18b1e3835a7cc42648d1bd0b63507020427299027667f9dd4faef37450ab
|
||||
size 169391
|
||||
oid sha256:9cf14c71134a89ed6ffc83c0b7db06ed10e22b55294dc15ddf7f016427f01033
|
||||
size 159919
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:90e97d06799b33f0f4ed6c68aa43616f4f2e013680909ca56d2e514a4481f0cf
|
||||
size 169391
|
||||
oid sha256:f2b83c70dbc8ab0b3695dab3f4d2069b7ee7119e9140d7860b8c19f59a498589
|
||||
size 159919
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c48f3c39368e774c4f3c281b7422e0b90e08321fa29591882c7071a635e1c3c6
|
||||
size 489019
|
||||
oid sha256:fc8369f5701dceea91d429a713ddcbb4ecb0ad08d3c9042688557ead5f00e9da
|
||||
size 483493
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b5edbd9d472583367857e998d65097561a9b36bc68ba1ae94f3b79940c7cb6f3
|
||||
size 501649
|
||||
oid sha256:4e9fffff2d13d49613e5f9334a010ca9bcde43b3bb55a792fd97fe2c867760dc
|
||||
size 496123
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:9eeb56a178049dbe0869030e20eeb608423fd5e34e3720230e5ed4373717b91a
|
||||
size 238849
|
||||
oid sha256:dd3041ba5a52263f7f02d64f1911c50e346151bf529e865c1abf22583abd3e21
|
||||
size 443285
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:00c69c0bfcb04dcd381677913781984ffafa3980922807faa94f125c01d7b901
|
||||
size 238035
|
||||
oid sha256:12482099b086249163085e6e3421a61f6e304f865aaf56dd15382614be5e48e7
|
||||
size 441683
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:cade6eee7a6be594da0a65e270954a11af436082b02bdd036aeddf9486812996
|
||||
size 298837
|
||||
oid sha256:bfea1ea1627eaef7b614db08bad00bda8b611c8e466c858e050c0ce2aee2eafb
|
||||
size 298049
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:470b274928968dc99c7cc1299cb906a9c38c2e5ddb556591047677e8b968b2c9
|
||||
size 298025
|
||||
oid sha256:f828600699faa3a0474085cbbe88d2e0ac7c8e056c976b81a882c3a72682e527
|
||||
size 296445
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:6d9c45c07e5f4513fa4666178709a7051042e1fa791d0ddfe9540802ddf36194
|
||||
size 231731
|
||||
oid sha256:2d4b297922065ecb79b4a1278d048b253b57601d011fc5833a32f9fc1b78e58e
|
||||
size 427485
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:682a0bc5821e74d56736641ecd8a7ccb1a7d7352183eda62a56edaa280d99004
|
||||
size 230917
|
||||
oid sha256:3fd5305445c9856fbd5d9dfaffdd7f87b9014638f33fb63fb2cb4fce9893b20b
|
||||
size 425883
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:2dbba9a30ed262e3096c4e7d7c3e4fdadd3e073e41894e8258de9274e08979d7
|
||||
size 1615406
|
||||
oid sha256:2b7fee97097f799830df2bcb1c782c7ea9018243cbd5cd0e0f47ec299b49db79
|
||||
size 1524634
|
||||
|
||||
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:8ac2f9270988bc02329ce11ef3413395b2b8cdc55fcf4911d170536c6e618317
|
||||
size 403697
|
||||
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:1234cf31a3a6b84ed25fa0ad6c4df9b53f673f6bac2f639a66086ba50f8717ba
|
||||
size 1120818
|
||||
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:0fff300932a16d30844e317ace515a178f159c483e436f6955983b96c5c424c6
|
||||
size 1549402
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:dbd51135c48812f21f53811b57057cabbef6c7a8a7833c411d8f8c47a2285c65
|
||||
size 724564
|
||||
oid sha256:ed10767ec913d314936fc5dbd1fd70c5381a622bf3fcf1590f837da6d3285bca
|
||||
size 723774
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c9ca2010bc714808c4e62ad7a66ae070e18bd40f678f46663b5f46d964283e6c
|
||||
size 704814
|
||||
oid sha256:7e7a7a9653a9c4e4e9b0514fc1d70abbb4521c7edbede52568d17d0779d62ffb
|
||||
size 671662
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:aff65d92093547c644da83b9800c8d8393f1a9d530f809b6bb35138afbe669c8
|
||||
size 454223
|
||||
oid sha256:1e18db0cd4de65e76e30f219d24ec00095fb16005882c43322182c5fa3f59032
|
||||
size 445541
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:3242c721b07ab2f56698b11c16f2766b61f1a27c8c30e9458e5179a71340cf76
|
||||
size 1377818
|
||||
oid sha256:9aceb502c1a95f58f1eab515cf2aeac92be6d255ef405008a4fd871fd54e9ba6
|
||||
size 1242842
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:cd323cec032400ab6c820d02d9e1c6da22ad0b627a0bf6bf51de0c0ab4aad99c
|
||||
size 1260540
|
||||
oid sha256:ec96248452f638bb9ca50d3630dd67caf71322c01b17aff301c4a98eb7e27974
|
||||
size 1215548
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:3adf59ee5801afeed6c1a51c6ca6bf504e534c3c277dd58c91d1818e13c726be
|
||||
size 1790160
|
||||
oid sha256:dabc44860e81532e9b7ecb35773d0ad409d45361e20c9510d24387039999a7c3
|
||||
size 1720698
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e17333a518382c1d0980c8c8c4500df358846c602db5f7f2c413f135f3ff263e
|
||||
size 416321
|
||||
oid sha256:0d9c8d1fe282f46c12898ed4851a2640cb33ba5d75c5fe9da8a988f818a0e733
|
||||
size 407639
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:5654ec576d9e76bec93bbc11dfc7142bf4e57d1bc718e8c76e1b8a9c9dced0dc
|
||||
size 1108986
|
||||
oid sha256:849a280994b3fa1f18ca6c3866a16a68a9b02831f134f8dfcf0d34502c1d6772
|
||||
size 1102672
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:09f3e9c7de20a1fd78f68d32b4be0301a8426ea8b61c90a361968e143a409dee
|
||||
size 633042
|
||||
oid sha256:4e209b01409585433406f8392c77a7398270ee1b58446b728cf74faa6fe1bf9a
|
||||
size 629884
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:22a85bd4725e2ca09a3f45519b9abd3d353f5de8cb5994f40213f5dca233e0ad
|
||||
size 1162650
|
||||
oid sha256:0a22bb0202916831eced0a44acbab769d5647937155e0a2b5e6d0d0cb83c726f
|
||||
size 1122394
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c373d9294f2adc0601433f57e1369eef8ec03a6fc0c0a514b5338ed313e6a6e2
|
||||
size 1620438
|
||||
oid sha256:582d17d48c7a751a345f74cc8c74f9b8c05278ddfc185da4906310a4973a9bdb
|
||||
size 1547030
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c70a136dfd55771b4218b60536d034f6dbcf285353ce8ea75c8fc93d33d09450
|
||||
size 609335
|
||||
oid sha256:70f02b7329eef7ceeb73dd43c3bf8f6ea6132c593bba6dbbed720d8b8ff0c287
|
||||
size 603809
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:0af8defec56bebfe634eafe3825626e91301937a1beafd5e2cb61d28e18e86dd
|
||||
size 333093
|
||||
oid sha256:f67d4e70c39bf379ed0f3ef73a3690ac64efaee1e7134c793a760924c270f046
|
||||
size 329935
|
||||
|
||||
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c2c284c6cb66207bd204bd1b6abe45aa8bf2e0c92631681861df237b8f849a46
|
||||
size 363451
|
||||
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d3bede327d80be420e7bf011ee1a4156365afff7020bbf5a8434da18cb19fb23
|
||||
size 1093202
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user