| .. |
|
fmha_cubin.h
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_bf16_64_32_ldgsts_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_bf16_64_64_ldgsts_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_bf16_128_32_ldgsts_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_bf16_128_64_ldgsts_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_bf16_256_32_ldgsts_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_bf16_256_64_ldgsts_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_bf16_384_32_ldgsts_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_bf16_384_64_ldgsts_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_bf16_512_32_ldgsts_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_bf16_512_64_ldgsts_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_16_S_160_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_16_S_160_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_16_S_160_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_16_S_192_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_16_S_192_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_16_S_192_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_16_S_256_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_16_S_256_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_16_S_256_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_16_S_pagedKV_160_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_16_S_pagedKV_160_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_16_S_pagedKV_160_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_16_S_pagedKV_160_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_16_S_pagedKV_192_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_16_S_pagedKV_192_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_16_S_pagedKV_192_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_16_S_pagedKV_192_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_16_S_pagedKV_256_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_16_S_pagedKV_256_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_16_S_pagedKV_256_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_16_S_pagedKV_256_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_32_S_40_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_32_S_40_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_32_S_40_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_32_S_64_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_32_S_64_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_32_S_64_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_32_S_80_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_32_S_80_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_32_S_80_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_32_S_96_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_32_S_96_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_32_S_96_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_32_S_104_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_32_S_104_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_32_S_104_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_32_S_128_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_32_S_128_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_32_S_128_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_32_S_pagedKV_40_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_32_S_pagedKV_40_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_32_S_pagedKV_40_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_32_S_pagedKV_40_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_32_S_pagedKV_64_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_32_S_pagedKV_64_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_32_S_pagedKV_64_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_32_S_pagedKV_64_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_32_S_pagedKV_80_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_32_S_pagedKV_80_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_32_S_pagedKV_80_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_32_S_pagedKV_80_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_32_S_pagedKV_96_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_32_S_pagedKV_96_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_32_S_pagedKV_96_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_32_S_pagedKV_96_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_32_S_pagedKV_104_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_32_S_pagedKV_104_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_32_S_pagedKV_104_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_32_S_pagedKV_104_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_32_S_pagedKV_128_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_32_S_pagedKV_128_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_32_S_pagedKV_128_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_32_S_pagedKV_128_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_64_S_16_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_64_S_16_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_64_S_16_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_64_S_32_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_64_S_32_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_64_S_32_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_64_S_160_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_64_S_160_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_64_S_192_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_64_S_192_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_64_S_256_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_64_S_256_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_64_S_pagedKV_16_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_64_S_pagedKV_16_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_64_S_pagedKV_16_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_64_S_pagedKV_16_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_64_S_pagedKV_32_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_64_S_pagedKV_32_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_64_S_pagedKV_32_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_64_S_pagedKV_32_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_64_S_pagedKV_160_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_64_S_pagedKV_160_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_64_S_pagedKV_192_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_64_S_pagedKV_192_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_64_S_pagedKV_256_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_64_S_pagedKV_256_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_80_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_80_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_80_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_80_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_80_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_96_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_96_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_96_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_96_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_96_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_104_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_104_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_104_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_104_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_104_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_128_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_128_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_128_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_128_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_128_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_160_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_160_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_160_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_192_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_192_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_192_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_256_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_256_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_256_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_pagedKV_80_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_pagedKV_80_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_pagedKV_80_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_pagedKV_80_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_pagedKV_80_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_pagedKV_80_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_pagedKV_96_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_pagedKV_96_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_pagedKV_96_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_pagedKV_96_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_pagedKV_96_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_pagedKV_96_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_pagedKV_104_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_pagedKV_104_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_pagedKV_104_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_pagedKV_104_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_pagedKV_104_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_pagedKV_104_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_pagedKV_128_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_pagedKV_128_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_pagedKV_128_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_pagedKV_128_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_pagedKV_128_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_pagedKV_128_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_pagedKV_160_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_pagedKV_160_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_pagedKV_160_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_pagedKV_160_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_pagedKV_192_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_pagedKV_192_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_pagedKV_192_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_pagedKV_192_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_pagedKV_256_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_pagedKV_256_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_pagedKV_256_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_128_S_pagedKV_256_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_256_S_32_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_256_S_32_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_256_S_40_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_256_S_40_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_256_S_64_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_256_S_64_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_256_S_pagedKV_32_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_256_S_pagedKV_32_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_256_S_pagedKV_40_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_256_S_pagedKV_40_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_256_S_pagedKV_64_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_64_256_S_pagedKV_64_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_128_128_S_16_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_128_128_S_16_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_128_128_S_16_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_128_128_S_32_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_128_128_S_32_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_128_128_S_32_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_128_128_S_40_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_128_128_S_40_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_128_128_S_40_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_128_128_S_64_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_128_128_S_64_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_128_128_S_64_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_128_128_S_pagedKV_16_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_128_128_S_pagedKV_16_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_128_128_S_pagedKV_16_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_128_128_S_pagedKV_16_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_128_128_S_pagedKV_32_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_128_128_S_pagedKV_32_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_128_128_S_pagedKV_32_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_128_128_S_pagedKV_32_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_128_128_S_pagedKV_40_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_128_128_S_pagedKV_40_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_128_128_S_pagedKV_40_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_128_128_S_pagedKV_40_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_128_128_S_pagedKV_64_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_128_128_S_pagedKV_64_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_128_128_S_pagedKV_64_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_bf16_128_128_S_pagedKV_64_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_e4m3_64_128_S_160_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_e4m3_64_128_S_160_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_e4m3_64_128_S_192_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_e4m3_64_128_S_192_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_e4m3_64_128_S_256_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_e4m3_64_128_S_256_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_e4m3_64_128_S_pagedKV_160_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_e4m3_64_128_S_pagedKV_160_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_e4m3_64_128_S_pagedKV_192_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_e4m3_64_128_S_pagedKV_192_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_e4m3_64_128_S_pagedKV_256_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_e4m3_64_128_S_pagedKV_256_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_e4m3_64_256_S_32_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_e4m3_64_256_S_32_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_e4m3_64_256_S_40_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_e4m3_64_256_S_40_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_e4m3_64_256_S_64_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_e4m3_64_256_S_64_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_e4m3_64_256_S_80_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_e4m3_64_256_S_80_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_e4m3_64_256_S_96_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_e4m3_64_256_S_96_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_e4m3_64_256_S_104_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_e4m3_64_256_S_104_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_e4m3_64_256_S_128_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_e4m3_64_256_S_128_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_e4m3_64_256_S_pagedKV_32_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_e4m3_64_256_S_pagedKV_32_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_e4m3_64_256_S_pagedKV_40_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_e4m3_64_256_S_pagedKV_40_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_e4m3_64_256_S_pagedKV_64_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_e4m3_64_256_S_pagedKV_64_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_e4m3_64_256_S_pagedKV_80_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_e4m3_64_256_S_pagedKV_80_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_e4m3_64_256_S_pagedKV_96_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_e4m3_64_256_S_pagedKV_96_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_e4m3_64_256_S_pagedKV_104_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_e4m3_64_256_S_pagedKV_104_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_e4m3_64_256_S_pagedKV_128_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_e4m3_64_256_S_pagedKV_128_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_16_S_80_sm70.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_16_S_96_sm70.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_16_S_104_sm70.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_16_S_128_sm70.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_16_S_160_sm70.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_16_S_160_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_16_S_160_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_16_S_160_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_16_S_192_sm70.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_16_S_192_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_16_S_192_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_16_S_192_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_16_S_256_sm70.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_16_S_256_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_16_S_256_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_16_S_256_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_16_S_pagedKV_80_sm70.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_16_S_pagedKV_96_sm70.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_16_S_pagedKV_104_sm70.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_16_S_pagedKV_128_sm70.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_16_S_pagedKV_160_sm70.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_16_S_pagedKV_160_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_16_S_pagedKV_160_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_16_S_pagedKV_160_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_16_S_pagedKV_160_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_16_S_pagedKV_192_sm70.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_16_S_pagedKV_192_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_16_S_pagedKV_192_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_16_S_pagedKV_192_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_16_S_pagedKV_192_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_16_S_pagedKV_256_sm70.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_16_S_pagedKV_256_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_16_S_pagedKV_256_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_16_S_pagedKV_256_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_16_S_pagedKV_256_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_32_S_40_sm70.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_32_S_40_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_32_S_40_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_32_S_40_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_32_S_64_sm70.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_32_S_64_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_32_S_64_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_32_S_64_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_32_S_80_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_32_S_80_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_32_S_80_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_32_S_96_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_32_S_96_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_32_S_96_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_32_S_104_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_32_S_104_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_32_S_104_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_32_S_128_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_32_S_128_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_32_S_128_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_32_S_pagedKV_40_sm70.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_32_S_pagedKV_40_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_32_S_pagedKV_40_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_32_S_pagedKV_40_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_32_S_pagedKV_40_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_32_S_pagedKV_64_sm70.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_32_S_pagedKV_64_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_32_S_pagedKV_64_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_32_S_pagedKV_64_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_32_S_pagedKV_64_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_32_S_pagedKV_80_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_32_S_pagedKV_80_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_32_S_pagedKV_80_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_32_S_pagedKV_80_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_32_S_pagedKV_96_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_32_S_pagedKV_96_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_32_S_pagedKV_96_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_32_S_pagedKV_96_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_32_S_pagedKV_104_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_32_S_pagedKV_104_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_32_S_pagedKV_104_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_32_S_pagedKV_104_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_32_S_pagedKV_128_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_32_S_pagedKV_128_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_32_S_pagedKV_128_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_32_S_pagedKV_128_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_64_S_16_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_64_S_16_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_64_S_16_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_64_S_32_sm70.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_64_S_32_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_64_S_32_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_64_S_32_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_64_S_160_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_64_S_160_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_64_S_192_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_64_S_192_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_64_S_256_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_64_S_256_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_64_S_pagedKV_16_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_64_S_pagedKV_16_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_64_S_pagedKV_16_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_64_S_pagedKV_16_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_64_S_pagedKV_32_sm70.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_64_S_pagedKV_32_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_64_S_pagedKV_32_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_64_S_pagedKV_32_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_64_S_pagedKV_32_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_64_S_pagedKV_160_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_64_S_pagedKV_160_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_64_S_pagedKV_192_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_64_S_pagedKV_192_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_64_S_pagedKV_256_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_64_S_pagedKV_256_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_80_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_80_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_80_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_80_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_80_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_96_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_96_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_96_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_96_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_96_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_104_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_104_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_104_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_104_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_104_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_128_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_128_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_128_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_128_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_128_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_160_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_160_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_160_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_192_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_192_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_192_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_256_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_256_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_256_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_pagedKV_80_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_pagedKV_80_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_pagedKV_80_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_pagedKV_80_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_pagedKV_80_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_pagedKV_80_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_pagedKV_96_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_pagedKV_96_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_pagedKV_96_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_pagedKV_96_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_pagedKV_96_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_pagedKV_96_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_pagedKV_104_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_pagedKV_104_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_pagedKV_104_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_pagedKV_104_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_pagedKV_104_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_pagedKV_104_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_pagedKV_128_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_pagedKV_128_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_pagedKV_128_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_pagedKV_128_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_pagedKV_128_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_pagedKV_128_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_pagedKV_160_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_pagedKV_160_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_pagedKV_160_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_pagedKV_160_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_pagedKV_192_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_pagedKV_192_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_pagedKV_192_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_pagedKV_192_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_pagedKV_256_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_pagedKV_256_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_pagedKV_256_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_128_S_pagedKV_256_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_256_S_32_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_256_S_32_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_256_S_40_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_256_S_40_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_256_S_64_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_256_S_64_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_256_S_pagedKV_32_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_256_S_pagedKV_32_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_256_S_pagedKV_40_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_256_S_pagedKV_40_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_256_S_pagedKV_64_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_64_256_S_pagedKV_64_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_128_128_S_16_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_128_128_S_16_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_128_128_S_16_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_128_128_S_32_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_128_128_S_32_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_128_128_S_32_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_128_128_S_40_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_128_128_S_40_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_128_128_S_40_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_128_128_S_64_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_128_128_S_64_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_128_128_S_64_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_128_128_S_pagedKV_16_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_128_128_S_pagedKV_16_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_128_128_S_pagedKV_16_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_128_128_S_pagedKV_16_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_128_128_S_pagedKV_32_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_128_128_S_pagedKV_32_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_128_128_S_pagedKV_32_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_128_128_S_pagedKV_32_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_128_128_S_pagedKV_40_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_128_128_S_pagedKV_40_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_128_128_S_pagedKV_40_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_128_128_S_pagedKV_40_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_128_128_S_pagedKV_64_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_128_128_S_pagedKV_64_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_128_128_S_pagedKV_64_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_128_128_S_pagedKV_64_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_16_S_160_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_16_S_160_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_16_S_160_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_16_S_192_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_16_S_192_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_16_S_192_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_16_S_256_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_16_S_256_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_16_S_256_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_16_S_pagedKV_160_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_16_S_pagedKV_160_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_16_S_pagedKV_160_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_16_S_pagedKV_160_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_16_S_pagedKV_192_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_16_S_pagedKV_192_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_16_S_pagedKV_192_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_16_S_pagedKV_192_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_16_S_pagedKV_256_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_16_S_pagedKV_256_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_16_S_pagedKV_256_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_16_S_pagedKV_256_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_32_S_40_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_32_S_40_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_32_S_40_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_32_S_64_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_32_S_64_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_32_S_64_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_32_S_80_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_32_S_80_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_32_S_80_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_32_S_96_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_32_S_96_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_32_S_96_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_32_S_104_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_32_S_104_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_32_S_104_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_32_S_128_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_32_S_128_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_32_S_128_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_32_S_pagedKV_40_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_32_S_pagedKV_40_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_32_S_pagedKV_40_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_32_S_pagedKV_40_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_32_S_pagedKV_64_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_32_S_pagedKV_64_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_32_S_pagedKV_64_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_32_S_pagedKV_64_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_32_S_pagedKV_80_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_32_S_pagedKV_80_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_32_S_pagedKV_80_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_32_S_pagedKV_80_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_32_S_pagedKV_96_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_32_S_pagedKV_96_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_32_S_pagedKV_96_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_32_S_pagedKV_96_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_32_S_pagedKV_104_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_32_S_pagedKV_104_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_32_S_pagedKV_104_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_32_S_pagedKV_104_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_32_S_pagedKV_128_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_32_S_pagedKV_128_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_32_S_pagedKV_128_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_32_S_pagedKV_128_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_64_S_16_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_64_S_16_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_64_S_16_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_64_S_32_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_64_S_32_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_64_S_32_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_64_S_160_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_64_S_160_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_64_S_192_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_64_S_192_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_64_S_256_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_64_S_256_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_64_S_pagedKV_16_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_64_S_pagedKV_16_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_64_S_pagedKV_16_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_64_S_pagedKV_16_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_64_S_pagedKV_32_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_64_S_pagedKV_32_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_64_S_pagedKV_32_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_64_S_pagedKV_32_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_64_S_pagedKV_160_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_64_S_pagedKV_160_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_64_S_pagedKV_192_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_64_S_pagedKV_192_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_64_S_pagedKV_256_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_64_S_pagedKV_256_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_80_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_80_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_80_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_80_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_80_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_96_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_96_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_96_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_96_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_96_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_104_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_104_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_104_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_104_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_104_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_128_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_128_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_128_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_128_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_128_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_160_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_160_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_160_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_192_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_192_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_192_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_256_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_256_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_256_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_pagedKV_80_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_pagedKV_80_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_pagedKV_80_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_pagedKV_80_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_pagedKV_80_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_pagedKV_80_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_pagedKV_96_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_pagedKV_96_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_pagedKV_96_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_pagedKV_96_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_pagedKV_96_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_pagedKV_96_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_pagedKV_104_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_pagedKV_104_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_pagedKV_104_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_pagedKV_104_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_pagedKV_104_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_pagedKV_104_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_pagedKV_128_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_pagedKV_128_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_pagedKV_128_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_pagedKV_128_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_pagedKV_128_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_pagedKV_128_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_pagedKV_160_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_pagedKV_160_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_pagedKV_160_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_pagedKV_160_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_pagedKV_192_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_pagedKV_192_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_pagedKV_192_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_pagedKV_192_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_pagedKV_256_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_pagedKV_256_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_pagedKV_256_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_128_S_pagedKV_256_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_256_S_32_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_256_S_32_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_256_S_40_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_256_S_40_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_256_S_64_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_256_S_64_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_256_S_pagedKV_32_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_256_S_pagedKV_32_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_256_S_pagedKV_40_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_256_S_pagedKV_40_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_256_S_pagedKV_64_alibi_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_64_256_S_pagedKV_64_tma_ws_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_128_128_S_16_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_128_128_S_16_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_128_128_S_16_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_128_128_S_32_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_128_128_S_32_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_128_128_S_32_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_128_128_S_40_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_128_128_S_40_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_128_128_S_40_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_128_128_S_64_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_128_128_S_64_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_128_128_S_64_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_128_128_S_pagedKV_16_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_128_128_S_pagedKV_16_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_128_128_S_pagedKV_16_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_128_128_S_pagedKV_16_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_128_128_S_pagedKV_32_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_128_128_S_pagedKV_32_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_128_128_S_pagedKV_32_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_128_128_S_pagedKV_32_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_128_128_S_pagedKV_40_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_128_128_S_pagedKV_40_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_128_128_S_pagedKV_40_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_128_128_S_pagedKV_40_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_128_128_S_pagedKV_64_sm80.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_128_128_S_pagedKV_64_sm86.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_128_128_S_pagedKV_64_sm89.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_flash_attention_fp16_fp32_128_128_S_pagedKV_64_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_fp16_64_32_ldgsts_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_fp16_64_64_ldgsts_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_fp16_128_32_ldgsts_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_fp16_128_64_ldgsts_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_fp16_256_32_ldgsts_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_fp16_256_64_ldgsts_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_fp16_384_32_ldgsts_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_fp16_384_64_ldgsts_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_fp16_512_32_ldgsts_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_fp16_512_64_ldgsts_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_fp16_fp32_64_32_ldgsts_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_fp16_fp32_64_64_ldgsts_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_fp16_fp32_128_32_ldgsts_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_fp16_fp32_128_64_ldgsts_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_fp16_fp32_256_32_ldgsts_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_fp16_fp32_256_64_ldgsts_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_fp16_fp32_384_32_ldgsts_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_fp16_fp32_384_64_ldgsts_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_fp16_fp32_512_32_ldgsts_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
fmha_v2_fp16_fp32_512_64_ldgsts_sm90.cubin.cpp
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |