| .. |
|
cubin
|
Update TensorRT-LLM (#1019)
|
2024-01-31 21:55:32 +08:00 |
|
copy_cu.py
|
Update TensorRT-LLM (#787)
|
2024-01-02 17:54:32 +08:00 |
|
decoderMaskedMultiheadAttention32_bf16_implicit_relative_attn.cu
|
Update TensorRT-LLM (#1019)
|
2024-01-31 21:55:32 +08:00 |
|
decoderMaskedMultiheadAttention32_bf16.cu
|
Initial commit
|
2023-09-20 00:29:41 -07:00 |
|
decoderMaskedMultiheadAttention32_float_implicit_relative_attn.cu
|
Update TensorRT-LLM (#1019)
|
2024-01-31 21:55:32 +08:00 |
|
decoderMaskedMultiheadAttention32_float.cu
|
Initial commit
|
2023-09-20 00:29:41 -07:00 |
|
decoderMaskedMultiheadAttention32_half_implicit_relative_attn.cu
|
Update TensorRT-LLM (#1019)
|
2024-01-31 21:55:32 +08:00 |
|
decoderMaskedMultiheadAttention32_half.cu
|
Initial commit
|
2023-09-20 00:29:41 -07:00 |
|
decoderMaskedMultiheadAttention48_bf16.cu
|
Update TensorRT-LLM (#302)
|
2023-11-07 19:51:58 +08:00 |
|
decoderMaskedMultiheadAttention48_float.cu
|
Update TensorRT-LLM (#302)
|
2023-11-07 19:51:58 +08:00 |
|
decoderMaskedMultiheadAttention48_half.cu
|
Update TensorRT-LLM (#302)
|
2023-11-07 19:51:58 +08:00 |
|
decoderMaskedMultiheadAttention64_bf16_implicit_relative_attn.cu
|
Update TensorRT-LLM (#1019)
|
2024-01-31 21:55:32 +08:00 |
|
decoderMaskedMultiheadAttention64_bf16.cu
|
Initial commit
|
2023-09-20 00:29:41 -07:00 |
|
decoderMaskedMultiheadAttention64_float_implicit_relative_attn.cu
|
Update TensorRT-LLM (#1019)
|
2024-01-31 21:55:32 +08:00 |
|
decoderMaskedMultiheadAttention64_float.cu
|
Initial commit
|
2023-09-20 00:29:41 -07:00 |
|
decoderMaskedMultiheadAttention64_half_implicit_relative_attn.cu
|
Update TensorRT-LLM (#1019)
|
2024-01-31 21:55:32 +08:00 |
|
decoderMaskedMultiheadAttention64_half.cu
|
Initial commit
|
2023-09-20 00:29:41 -07:00 |
|
decoderMaskedMultiheadAttention80_bf16.cu
|
Update TensorRT-LLM (#302)
|
2023-11-07 19:51:58 +08:00 |
|
decoderMaskedMultiheadAttention80_float.cu
|
Update TensorRT-LLM (#302)
|
2023-11-07 19:51:58 +08:00 |
|
decoderMaskedMultiheadAttention80_half.cu
|
Update TensorRT-LLM (#302)
|
2023-11-07 19:51:58 +08:00 |
|
decoderMaskedMultiheadAttention96_bf16.cu
|
Update TensorRT-LLM (#302)
|
2023-11-07 19:51:58 +08:00 |
|
decoderMaskedMultiheadAttention96_float.cu
|
Update TensorRT-LLM (#302)
|
2023-11-07 19:51:58 +08:00 |
|
decoderMaskedMultiheadAttention96_half.cu
|
Update TensorRT-LLM (#302)
|
2023-11-07 19:51:58 +08:00 |
|
decoderMaskedMultiheadAttention104_bf16 .cu
|
Update TensorRT-LLM (#1055)
|
2024-02-06 18:38:07 +08:00 |
|
decoderMaskedMultiheadAttention104_float.cu
|
Update TensorRT-LLM (#1055)
|
2024-02-06 18:38:07 +08:00 |
|
decoderMaskedMultiheadAttention104_half.cu
|
Update TensorRT-LLM (#1055)
|
2024-02-06 18:38:07 +08:00 |
|
decoderMaskedMultiheadAttention112_bf16.cu
|
Update TensorRT-LLM (#302)
|
2023-11-07 19:51:58 +08:00 |
|
decoderMaskedMultiheadAttention112_float.cu
|
Update TensorRT-LLM (#302)
|
2023-11-07 19:51:58 +08:00 |
|
decoderMaskedMultiheadAttention112_half.cu
|
Update TensorRT-LLM (#302)
|
2023-11-07 19:51:58 +08:00 |
|
decoderMaskedMultiheadAttention128_bf16_implicit_relative_attn.cu
|
Update TensorRT-LLM (#1019)
|
2024-01-31 21:55:32 +08:00 |
|
decoderMaskedMultiheadAttention128_bf16.cu
|
Initial commit
|
2023-09-20 00:29:41 -07:00 |
|
decoderMaskedMultiheadAttention128_float_implicit_relative_attn.cu
|
Update TensorRT-LLM (#1019)
|
2024-01-31 21:55:32 +08:00 |
|
decoderMaskedMultiheadAttention128_float.cu
|
Initial commit
|
2023-09-20 00:29:41 -07:00 |
|
decoderMaskedMultiheadAttention128_half_implicit_relative_attn.cu
|
Update TensorRT-LLM (#1019)
|
2024-01-31 21:55:32 +08:00 |
|
decoderMaskedMultiheadAttention128_half.cu
|
Initial commit
|
2023-09-20 00:29:41 -07:00 |
|
decoderMaskedMultiheadAttention144_bf16.cu
|
Update TensorRT-LLM (#302)
|
2023-11-07 19:51:58 +08:00 |
|
decoderMaskedMultiheadAttention144_float.cu
|
Update TensorRT-LLM (#302)
|
2023-11-07 19:51:58 +08:00 |
|
decoderMaskedMultiheadAttention144_half.cu
|
Update TensorRT-LLM (#302)
|
2023-11-07 19:51:58 +08:00 |
|
decoderMaskedMultiheadAttention160_bf16.cu
|
Update TensorRT-LLM (#302)
|
2023-11-07 19:51:58 +08:00 |
|
decoderMaskedMultiheadAttention160_float.cu
|
Update TensorRT-LLM (#302)
|
2023-11-07 19:51:58 +08:00 |
|
decoderMaskedMultiheadAttention160_half.cu
|
Update TensorRT-LLM (#302)
|
2023-11-07 19:51:58 +08:00 |
|
decoderMaskedMultiheadAttention192_bf16.cu
|
Update TensorRT-LLM (#302)
|
2023-11-07 19:51:58 +08:00 |
|
decoderMaskedMultiheadAttention192_float.cu
|
Update TensorRT-LLM (#302)
|
2023-11-07 19:51:58 +08:00 |
|
decoderMaskedMultiheadAttention192_half.cu
|
Update TensorRT-LLM (#302)
|
2023-11-07 19:51:58 +08:00 |
|
decoderMaskedMultiheadAttention224_bf16.cu
|
Update TensorRT-LLM (#302)
|
2023-11-07 19:51:58 +08:00 |
|
decoderMaskedMultiheadAttention224_float.cu
|
Update TensorRT-LLM (#302)
|
2023-11-07 19:51:58 +08:00 |
|
decoderMaskedMultiheadAttention224_half.cu
|
Update TensorRT-LLM (#302)
|
2023-11-07 19:51:58 +08:00 |
|
decoderMaskedMultiheadAttention256_bf16.cu
|
Initial commit
|
2023-09-20 00:29:41 -07:00 |
|
decoderMaskedMultiheadAttention256_float.cu
|
Initial commit
|
2023-09-20 00:29:41 -07:00 |
|
decoderMaskedMultiheadAttention256_half.cu
|
Initial commit
|
2023-09-20 00:29:41 -07:00 |
|
decoderMaskedMultiheadAttentionLaunch.h
|
Update TensorRT-LLM (#1274)
|
2024-03-12 18:15:52 +08:00 |
|
decoderMaskedMultiheadAttentionTemplate.h
|
Update TensorRT-LLM (#1358)
|
2024-03-26 20:47:14 +08:00 |
|
decoderXQAConstants.h
|
Update TensorRT-LLM (#1019)
|
2024-01-31 21:55:32 +08:00 |
|
decoderXQAImpl.cpp
|
Update TensorRT-LLM (#1274)
|
2024-03-12 18:15:52 +08:00 |
|
decoderXQAImpl.h
|
Update TensorRT-LLM (#1274)
|
2024-03-12 18:15:52 +08:00 |
|
decoderXQAImplPrecompiled.cpp
|
Update TensorRT-LLM (#1358)
|
2024-03-26 20:47:14 +08:00 |
|
decoderXQAImplPrecompiled.h
|
Update TensorRT-LLM (#1274)
|
2024-03-12 18:15:52 +08:00 |
|
decoderXQARunner.cpp
|
Update TensorRT-LLM (#1274)
|
2024-03-12 18:15:52 +08:00 |
|
decoderXQARunner.h
|
Update TensorRT-LLM (#1274)
|
2024-03-12 18:15:52 +08:00 |
|
mmha_notes.md
|
Initial commit
|
2023-09-20 00:29:41 -07:00 |
|
xqaParams.h
|
Update TensorRT-LLM (#1274)
|
2024-03-12 18:15:52 +08:00 |