| .. |
|
beamSearchKernels
|
Update TensorRT-LLM (#2532)
|
2024-12-04 21:16:56 +08:00 |
|
contextFusedMultiHeadAttention
|
Update TensorRT-LLM (#2755)
|
2025-02-11 03:01:00 +00:00 |
|
cutlass_kernels
|
Update TensorRT-LLM (#2755)
|
2025-02-11 03:01:00 +00:00 |
|
decoderMaskedMultiheadAttention
|
Update TensorRT-LLM (#2755)
|
2025-02-11 03:01:00 +00:00 |
|
internal_cutlass_kernels
|
Update TensorRT-LLM (#2755)
|
2025-02-11 03:01:00 +00:00 |
|
lora
|
Update TensorRT-LLM (#2755)
|
2025-02-11 03:01:00 +00:00 |
|
selectiveScan
|
Update TensorRT-LLM (#2755)
|
2025-02-11 03:01:00 +00:00 |
|
speculativeDecoding
|
Update TensorRT-LLM (#2755)
|
2025-02-11 03:01:00 +00:00 |
|
trtllmGenKernels
|
Update TensorRT-LLM (#2755)
|
2025-02-11 03:01:00 +00:00 |
|
unfusedAttentionKernels
|
Update TensorRT-LLM (#2755)
|
2025-02-11 03:01:00 +00:00 |
|
userbuffers
|
Update TensorRT-LLM (#2755)
|
2025-02-11 03:01:00 +00:00 |
|
weightOnlyBatchedGemv
|
Update TensorRT-LLM (#2755)
|
2025-02-11 03:01:00 +00:00 |
|
attentionMask.cu
|
Update TensorRT-LLM (#2363)
|
2024-10-22 20:27:35 +08:00 |
|
attentionMask.h
|
Update TensorRT-LLM (#2363)
|
2024-10-22 20:27:35 +08:00 |
|
banBadWords.cu
|
Update TensorRT-LLM (#2008)
|
2024-07-23 23:05:09 +08:00 |
|
banBadWords.h
|
Update TensorRT-LLM (#2008)
|
2024-07-23 23:05:09 +08:00 |
|
banRepeatNgram.cu
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
banRepeatNgram.h
|
Update TensorRT-LLM (#1598)
|
2024-05-14 16:43:41 +08:00 |
|
beamSearchKernels.cu
|
Update TensorRT-LLM (#2532)
|
2024-12-04 21:16:56 +08:00 |
|
beamSearchKernels.h
|
Update TensorRT-LLM (#2532)
|
2024-12-04 21:16:56 +08:00 |
|
buildRelativeAttentionBiasKernel.cu
|
Update TensorRT-LLM (#1763)
|
2024-06-11 16:59:02 +08:00 |
|
buildRelativeAttentionBiasKernel.h
|
Update TensorRT-LLM (#1763)
|
2024-06-11 16:59:02 +08:00 |
|
CMakeLists.txt
|
Update TensorRT-LLM (#2755)
|
2025-02-11 03:01:00 +00:00 |
|
cumsumLastDim.cu
|
open source 7f370deb0090d885d7518c2b146399ba3933c004 (#2273)
|
2024-09-30 13:51:19 +02:00 |
|
cumsumLastDim.h
|
Update TensorRT-LLM (#1725)
|
2024-06-04 20:26:32 +08:00 |
|
customAllReduceKernels.cu
|
Update TensorRT-LLM (#2755)
|
2025-02-11 03:01:00 +00:00 |
|
customAllReduceKernels.h
|
Update TensorRT-LLM (#2755)
|
2025-02-11 03:01:00 +00:00 |
|
decoderMaskedMultiheadAttention.cu
|
Update TensorRT-LLM (#2502)
|
2024-11-26 16:51:34 +08:00 |
|
decoderMaskedMultiheadAttention.h
|
Update TensorRT-LLM (#2755)
|
2025-02-11 03:01:00 +00:00 |
|
decoderMaskedMultiheadAttentionUtils.h
|
Update TensorRT-LLM (#2363)
|
2024-10-22 20:27:35 +08:00 |
|
decodingCommon.cu
|
Update TensorRT-LLM (#2755)
|
2025-02-11 03:01:00 +00:00 |
|
decodingCommon.h
|
Update TensorRT-LLM (#2755)
|
2025-02-11 03:01:00 +00:00 |
|
decodingKernels.cu
|
Update TensorRT-LLM (#2532)
|
2024-12-04 21:16:56 +08:00 |
|
decodingKernels.h
|
Update TensorRT-LLM (#2389)
|
2024-10-29 22:24:38 +08:00 |
|
doraScaling.cu
|
Update TensorRT-LLM (#2755)
|
2025-02-11 03:01:00 +00:00 |
|
doraScaling.h
|
Update TensorRT-LLM (#2755)
|
2025-02-11 03:01:00 +00:00 |
|
fmhaDispatcher.cpp
|
Update TensorRT-LLM (#2755)
|
2025-02-11 03:01:00 +00:00 |
|
fmhaDispatcher.h
|
Update TensorRT-LLM (#2755)
|
2025-02-11 03:01:00 +00:00 |
|
gptKernels.cu
|
Update TensorRT-LLM (#2755)
|
2025-02-11 03:01:00 +00:00 |
|
gptKernels.h
|
Update TensorRT-LLM (#2755)
|
2025-02-11 03:01:00 +00:00 |
|
groupGemm.cu
|
Update TensorRT-LLM (#2755)
|
2025-02-11 03:01:00 +00:00 |
|
groupGemm.h
|
Update TensorRT-LLM (#2562)
|
2024-12-11 00:31:05 -08:00 |
|
kvCacheUtils.h
|
Update TensorRT-LLM (#2582)
|
2024-12-16 21:50:47 -08:00 |
|
layernormKernels.cu
|
Update TensorRT-LLM (#1274)
|
2024-03-12 18:15:52 +08:00 |
|
layernormKernels.h
|
Update TensorRT-LLM (#1274)
|
2024-03-12 18:15:52 +08:00 |
|
logitsBitmask.cu
|
Update TensorRT-LLM (#2755)
|
2025-02-11 03:01:00 +00:00 |
|
logitsBitmask.h
|
Update TensorRT-LLM (#2532)
|
2024-12-04 21:16:56 +08:00 |
|
lookupKernels.cu
|
Update TensorRT-LLM (#1639)
|
2024-05-21 17:51:02 +08:00 |
|
lookupKernels.h
|
Update TensorRT-LLM (#1639)
|
2024-05-21 17:51:02 +08:00 |
|
lruKernel.cu
|
Update TensorRT-LLM (#1688)
|
2024-05-28 20:07:49 +08:00 |
|
lruKernel.h
|
Update TensorRT-LLM (#1688)
|
2024-05-28 20:07:49 +08:00 |
|
mambaConv1dKernels.cu
|
Update TensorRT-LLM (#2562)
|
2024-12-11 00:31:05 -08:00 |
|
mambaConv1dKernels.h
|
Update TensorRT-LLM (#1954)
|
2024-07-16 15:30:25 +08:00 |
|
mlaKernels.cu
|
Update TensorRT-LLM (#2413)
|
2024-11-05 16:27:06 +08:00 |
|
mlaKernels.h
|
Update TensorRT-LLM (#2755)
|
2025-02-11 03:01:00 +00:00 |
|
multiHeadAttentionCommon.h
|
Update TensorRT-LLM (#2755)
|
2025-02-11 03:01:00 +00:00 |
|
penaltyKernels.cu
|
Update TensorRT-LLM (#2502)
|
2024-11-26 16:51:34 +08:00 |
|
penaltyKernels.h
|
Update TensorRT-LLM (#2502)
|
2024-11-26 16:51:34 +08:00 |
|
penaltyTypes.h
|
Update TensorRT-LLM (#1554)
|
2024-05-07 23:34:28 +08:00 |
|
preQuantScaleKernel.cu
|
open source 3706e7395b9b58994412617992727c8ff2d14c9f (#2010)
|
2024-07-24 05:48:06 +08:00 |
|
preQuantScaleKernel.h
|
Update TensorRT-LLM (#1274)
|
2024-03-12 18:15:52 +08:00 |
|
qserveGemm.h
|
Update TensorRT-LLM (#2436)
|
2024-11-12 15:27:49 +08:00 |
|
qserveGemmPerChannel.cu
|
Update TensorRT-LLM (#2532)
|
2024-12-04 21:16:56 +08:00 |
|
qserveGemmPerGroup.cu
|
Update TensorRT-LLM (#2502)
|
2024-11-26 16:51:34 +08:00 |
|
quantization.cu
|
Update TensorRT-LLM (#2755)
|
2025-02-11 03:01:00 +00:00 |
|
quantization.cuh
|
Update TensorRT-LLM (#2755)
|
2025-02-11 03:01:00 +00:00 |
|
quantization.h
|
Update TensorRT-LLM (#2755)
|
2025-02-11 03:01:00 +00:00 |
|
rmsnormKernels.cu
|
Update TensorRT-LLM (#2436)
|
2024-11-12 15:27:49 +08:00 |
|
rmsnormKernels.h
|
Update TensorRT-LLM (#2436)
|
2024-11-12 15:27:49 +08:00 |
|
samplingAirTopPKernels.cu
|
Update TensorRT-LLM (#2532)
|
2024-12-04 21:16:56 +08:00 |
|
samplingTopKKernels.cu
|
Update TensorRT-LLM (#2755)
|
2025-02-11 03:01:00 +00:00 |
|
samplingTopKKernels.h
|
Update TensorRT-LLM (#2755)
|
2025-02-11 03:01:00 +00:00 |
|
samplingTopPKernels.cu
|
Update TensorRT-LLM (#2755)
|
2025-02-11 03:01:00 +00:00 |
|
samplingTopPKernels.h
|
Update TensorRT-LLM (#2755)
|
2025-02-11 03:01:00 +00:00 |
|
splitkGroupGemm.cu
|
Update TensorRT-LLM (#2755)
|
2025-02-11 03:01:00 +00:00 |
|
splitkGroupGemm.h
|
Update TensorRT-LLM (#2562)
|
2024-12-11 00:31:05 -08:00 |
|
stopCriteriaKernels.cu
|
Update TensorRT-LLM (#2110)
|
2024-08-13 22:34:33 +08:00 |
|
stopCriteriaKernels.h
|
open source 4dbf696ae9b74a26829d120b67ab8443d70c8e58 (#2297)
|
2024-10-08 12:19:19 +02:00 |
|
topkLastDim.cu
|
Update TensorRT-LLM (#2436)
|
2024-11-12 15:27:49 +08:00 |
|
topkLastDim.h
|
Update TensorRT-LLM (#2436)
|
2024-11-12 15:27:49 +08:00 |
|
unfusedAttentionKernels.cu
|
Update TensorRT-LLM (#2532)
|
2024-12-04 21:16:56 +08:00 |
|
unfusedAttentionKernels.h
|
Update TensorRT-LLM (#2755)
|
2025-02-11 03:01:00 +00:00 |
|
xqaDispatcher.cpp
|
Update TensorRT-LLM (#2755)
|
2025-02-11 03:01:00 +00:00 |
|
xqaDispatcher.h
|
Update TensorRT-LLM (#2755)
|
2025-02-11 03:01:00 +00:00 |