TensorRT-LLMs/cpp/tensorrt_llm/kernels/contextFusedMultiHeadAttention/pagedKVCubin
Kaiyu Xie 035b99e0d0
Update TensorRT-LLM (#1427)
* Update TensorRT-LLM

---------

Co-authored-by: meghagarwal <16129366+megha95@users.noreply.github.com>
2024-04-09 17:03:34 +08:00
..
fmha_cubin.h Update TensorRT-LLM (#1427) 2024-04-09 17:03:34 +08:00
fmha_v2_flash_attention_bf16_64_16_S_160_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_16_S_160_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_16_S_160_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_16_S_160_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_16_S_192_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1427) 2024-04-09 17:03:34 +08:00
fmha_v2_flash_attention_bf16_64_16_S_192_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1427) 2024-04-09 17:03:34 +08:00
fmha_v2_flash_attention_bf16_64_16_S_192_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1427) 2024-04-09 17:03:34 +08:00
fmha_v2_flash_attention_bf16_64_16_S_192_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1427) 2024-04-09 17:03:34 +08:00
fmha_v2_flash_attention_bf16_64_16_S_256_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_16_S_256_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_16_S_256_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_16_S_256_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_32_S_40_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_32_S_40_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_32_S_40_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_32_S_40_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_32_S_64_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_32_S_64_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_32_S_64_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_32_S_64_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_32_S_80_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_32_S_80_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_32_S_80_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_32_S_80_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_32_S_96_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_bf16_64_32_S_96_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_bf16_64_32_S_96_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_bf16_64_32_S_96_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_bf16_64_32_S_104_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_bf16_64_32_S_104_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_bf16_64_32_S_104_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_bf16_64_32_S_104_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_bf16_64_32_S_128_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_32_S_128_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_32_S_128_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_32_S_128_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_64_S_16_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_64_S_16_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_64_S_16_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_64_S_16_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_64_S_32_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_64_S_32_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_64_S_32_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_64_S_32_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_64_S_160_pagedKV_alibi_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_bf16_64_64_S_160_pagedKV_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_bf16_64_64_S_192_pagedKV_alibi_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1427) 2024-04-09 17:03:34 +08:00
fmha_v2_flash_attention_bf16_64_64_S_192_pagedKV_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1427) 2024-04-09 17:03:34 +08:00
fmha_v2_flash_attention_bf16_64_64_S_256_pagedKV_alibi_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_bf16_64_64_S_256_pagedKV_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_bf16_64_128_S_32_pagedKV_alibi_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_bf16_64_128_S_32_pagedKV_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_bf16_64_128_S_40_pagedKV_alibi_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_bf16_64_128_S_40_pagedKV_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_bf16_64_128_S_64_pagedKV_alibi_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_bf16_64_128_S_64_pagedKV_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_bf16_64_128_S_80_pagedKV_alibi_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_bf16_64_128_S_80_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_128_S_80_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_128_S_80_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_128_S_80_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_128_S_80_pagedKV_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_bf16_64_128_S_96_pagedKV_alibi_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_bf16_64_128_S_96_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_bf16_64_128_S_96_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_bf16_64_128_S_96_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_bf16_64_128_S_96_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_bf16_64_128_S_96_pagedKV_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_bf16_64_128_S_104_pagedKV_alibi_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_bf16_64_128_S_104_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_bf16_64_128_S_104_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_bf16_64_128_S_104_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_bf16_64_128_S_104_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_bf16_64_128_S_104_pagedKV_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_bf16_64_128_S_128_pagedKV_alibi_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_bf16_64_128_S_128_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_128_S_128_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_128_S_128_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_128_S_128_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_128_S_128_pagedKV_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_bf16_64_128_S_160_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_128_S_160_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_128_S_160_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_128_S_160_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_128_S_192_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1427) 2024-04-09 17:03:34 +08:00
fmha_v2_flash_attention_bf16_64_128_S_192_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1427) 2024-04-09 17:03:34 +08:00
fmha_v2_flash_attention_bf16_64_128_S_192_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1427) 2024-04-09 17:03:34 +08:00
fmha_v2_flash_attention_bf16_64_128_S_192_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1427) 2024-04-09 17:03:34 +08:00
fmha_v2_flash_attention_bf16_64_128_S_256_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_128_S_256_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_128_S_256_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_128_S_256_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_64_256_S_32_pagedKV_alibi_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_bf16_64_256_S_32_pagedKV_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_bf16_64_256_S_40_pagedKV_alibi_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_bf16_64_256_S_40_pagedKV_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_bf16_64_256_S_64_pagedKV_alibi_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_bf16_64_256_S_64_pagedKV_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_bf16_128_128_S_16_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_128_128_S_16_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_128_128_S_16_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_128_128_S_16_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_128_128_S_32_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_128_128_S_32_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_128_128_S_32_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_128_128_S_32_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_128_128_S_40_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_128_128_S_40_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_128_128_S_40_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_128_128_S_40_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_128_128_S_64_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_128_128_S_64_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_128_128_S_64_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_bf16_128_128_S_64_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_16_S_160_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_16_S_160_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_16_S_160_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_16_S_160_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_16_S_192_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1427) 2024-04-09 17:03:34 +08:00
fmha_v2_flash_attention_fp16_64_16_S_192_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1427) 2024-04-09 17:03:34 +08:00
fmha_v2_flash_attention_fp16_64_16_S_192_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1427) 2024-04-09 17:03:34 +08:00
fmha_v2_flash_attention_fp16_64_16_S_192_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1427) 2024-04-09 17:03:34 +08:00
fmha_v2_flash_attention_fp16_64_16_S_256_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_16_S_256_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_16_S_256_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_16_S_256_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_32_S_40_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_32_S_40_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_32_S_40_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_32_S_40_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_32_S_64_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_32_S_64_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_32_S_64_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_32_S_64_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_32_S_80_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_32_S_80_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_32_S_80_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_32_S_80_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_32_S_96_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_64_32_S_96_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_64_32_S_96_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_64_32_S_96_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_64_32_S_104_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_64_32_S_104_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_64_32_S_104_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_64_32_S_104_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_64_32_S_128_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_32_S_128_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_32_S_128_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_32_S_128_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_64_S_16_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_64_S_16_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_64_S_16_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_64_S_16_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_64_S_32_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_64_S_32_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_64_S_32_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_64_S_32_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_64_S_160_pagedKV_alibi_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_64_64_S_160_pagedKV_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_64_64_S_192_pagedKV_alibi_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1427) 2024-04-09 17:03:34 +08:00
fmha_v2_flash_attention_fp16_64_64_S_192_pagedKV_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1427) 2024-04-09 17:03:34 +08:00
fmha_v2_flash_attention_fp16_64_64_S_256_pagedKV_alibi_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_64_64_S_256_pagedKV_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_64_128_S_32_pagedKV_alibi_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_64_128_S_32_pagedKV_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_64_128_S_40_pagedKV_alibi_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_64_128_S_40_pagedKV_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_64_128_S_64_pagedKV_alibi_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_64_128_S_64_pagedKV_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_64_128_S_80_pagedKV_alibi_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_64_128_S_80_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_128_S_80_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_128_S_80_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_128_S_80_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_128_S_80_pagedKV_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_64_128_S_96_pagedKV_alibi_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_64_128_S_96_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_64_128_S_96_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_64_128_S_96_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_64_128_S_96_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_64_128_S_96_pagedKV_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_64_128_S_104_pagedKV_alibi_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_64_128_S_104_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_64_128_S_104_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_64_128_S_104_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_64_128_S_104_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_64_128_S_104_pagedKV_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_64_128_S_128_pagedKV_alibi_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_64_128_S_128_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_128_S_128_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_128_S_128_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_128_S_128_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_128_S_128_pagedKV_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_64_128_S_160_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_128_S_160_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_128_S_160_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_128_S_160_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_128_S_192_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1427) 2024-04-09 17:03:34 +08:00
fmha_v2_flash_attention_fp16_64_128_S_192_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1427) 2024-04-09 17:03:34 +08:00
fmha_v2_flash_attention_fp16_64_128_S_192_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1427) 2024-04-09 17:03:34 +08:00
fmha_v2_flash_attention_fp16_64_128_S_192_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1427) 2024-04-09 17:03:34 +08:00
fmha_v2_flash_attention_fp16_64_128_S_256_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_128_S_256_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_128_S_256_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_128_S_256_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_256_S_32_pagedKV_alibi_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_256_S_32_pagedKV_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_256_S_40_pagedKV_alibi_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_64_256_S_40_pagedKV_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_64_256_S_64_pagedKV_alibi_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_64_256_S_64_pagedKV_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_128_128_S_16_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_128_128_S_16_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_128_128_S_16_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_128_128_S_16_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_128_128_S_32_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_128_128_S_32_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_128_128_S_32_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_128_128_S_32_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_128_128_S_40_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_128_128_S_40_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_128_128_S_40_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_128_128_S_40_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_128_128_S_64_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_128_128_S_64_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_128_128_S_64_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_128_128_S_64_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_16_S_160_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_16_S_160_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_16_S_160_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_16_S_160_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_16_S_192_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1427) 2024-04-09 17:03:34 +08:00
fmha_v2_flash_attention_fp16_fp32_64_16_S_192_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1427) 2024-04-09 17:03:34 +08:00
fmha_v2_flash_attention_fp16_fp32_64_16_S_192_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1427) 2024-04-09 17:03:34 +08:00
fmha_v2_flash_attention_fp16_fp32_64_16_S_192_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1427) 2024-04-09 17:03:34 +08:00
fmha_v2_flash_attention_fp16_fp32_64_16_S_256_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_16_S_256_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_16_S_256_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_16_S_256_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_32_S_40_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_32_S_40_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_32_S_40_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_32_S_40_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_32_S_64_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_32_S_64_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_32_S_64_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_32_S_64_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_32_S_80_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_32_S_80_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_32_S_80_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_32_S_80_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_32_S_96_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_fp32_64_32_S_96_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_fp32_64_32_S_96_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_fp32_64_32_S_96_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_fp32_64_32_S_104_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_fp32_64_32_S_104_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_fp32_64_32_S_104_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_fp32_64_32_S_104_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_fp32_64_32_S_128_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_32_S_128_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_32_S_128_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_32_S_128_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_64_S_16_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_64_S_16_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_64_S_16_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_64_S_16_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_64_S_32_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_64_S_32_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_64_S_32_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_64_S_32_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_64_S_160_pagedKV_alibi_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_fp32_64_64_S_160_pagedKV_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_fp32_64_64_S_192_pagedKV_alibi_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1427) 2024-04-09 17:03:34 +08:00
fmha_v2_flash_attention_fp16_fp32_64_64_S_192_pagedKV_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1427) 2024-04-09 17:03:34 +08:00
fmha_v2_flash_attention_fp16_fp32_64_64_S_256_pagedKV_alibi_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_fp32_64_64_S_256_pagedKV_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_fp32_64_128_S_32_pagedKV_alibi_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_fp32_64_128_S_32_pagedKV_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_fp32_64_128_S_40_pagedKV_alibi_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_fp32_64_128_S_40_pagedKV_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_fp32_64_128_S_64_pagedKV_alibi_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_fp32_64_128_S_64_pagedKV_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_fp32_64_128_S_80_pagedKV_alibi_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_fp32_64_128_S_80_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_128_S_80_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_128_S_80_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_128_S_80_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_128_S_80_pagedKV_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_fp32_64_128_S_96_pagedKV_alibi_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_fp32_64_128_S_96_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_fp32_64_128_S_96_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_fp32_64_128_S_96_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_fp32_64_128_S_96_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_fp32_64_128_S_96_pagedKV_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_fp32_64_128_S_104_pagedKV_alibi_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_fp32_64_128_S_104_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_fp32_64_128_S_104_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_fp32_64_128_S_104_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_fp32_64_128_S_104_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_fp32_64_128_S_104_pagedKV_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_fp32_64_128_S_128_pagedKV_alibi_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_fp32_64_128_S_128_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_128_S_128_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_128_S_128_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_128_S_128_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_128_S_128_pagedKV_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_fp32_64_128_S_160_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_128_S_160_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_128_S_160_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_128_S_160_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_128_S_192_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1427) 2024-04-09 17:03:34 +08:00
fmha_v2_flash_attention_fp16_fp32_64_128_S_192_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1427) 2024-04-09 17:03:34 +08:00
fmha_v2_flash_attention_fp16_fp32_64_128_S_192_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1427) 2024-04-09 17:03:34 +08:00
fmha_v2_flash_attention_fp16_fp32_64_128_S_192_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1427) 2024-04-09 17:03:34 +08:00
fmha_v2_flash_attention_fp16_fp32_64_128_S_256_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_128_S_256_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_128_S_256_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_128_S_256_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_64_256_S_32_pagedKV_alibi_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_fp32_64_256_S_32_pagedKV_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_fp32_64_256_S_40_pagedKV_alibi_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_fp32_64_256_S_40_pagedKV_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_fp32_64_256_S_64_pagedKV_alibi_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_fp32_64_256_S_64_pagedKV_tma_ws_sm90.cubin.cpp Update TensorRT-LLM (#1055) 2024-02-06 18:38:07 +08:00
fmha_v2_flash_attention_fp16_fp32_128_128_S_16_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_128_128_S_16_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_128_128_S_16_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_128_128_S_16_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_128_128_S_32_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_128_128_S_32_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_128_128_S_32_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_128_128_S_32_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_128_128_S_40_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_128_128_S_40_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_128_128_S_40_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_128_128_S_40_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_128_128_S_64_pagedKV_sm80.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_128_128_S_64_pagedKV_sm86.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_128_128_S_64_pagedKV_sm89.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00
fmha_v2_flash_attention_fp16_fp32_128_128_S_64_pagedKV_sm90.cubin.cpp Update TensorRT-LLM (#1019) 2024-01-31 21:55:32 +08:00