| .. |
|
contextFusedMultiHeadAttention
|
Update TensorRT-LLM (#1019)
|
2024-01-31 21:55:32 +08:00 |
|
cutlass_kernels
|
Fix typo in cutlass preprocessors (#859)
|
2024-02-01 14:27:58 +08:00 |
|
decoderMaskedMultiheadAttention
|
Update TensorRT-LLM (#1019)
|
2024-01-31 21:55:32 +08:00 |
|
mixtureOfExperts
|
Update TensorRT-LLM (#667)
|
2023-12-15 22:14:51 +08:00 |
|
onlineSoftmaxBeamsearchKernels
|
Update TensorRT-LLM (#941)
|
2024-01-23 23:22:35 +08:00 |
|
parallelDecoding
|
Update TensorRT-LLM (20240116) (#891)
|
2024-01-16 20:03:11 +08:00 |
|
unfusedAttentionKernels
|
Update TensorRT-LLM (#941)
|
2024-01-23 23:22:35 +08:00 |
|
weightOnlyBatchedGemv
|
Update TensorRT-LLM (#787)
|
2024-01-02 17:54:32 +08:00 |
|
banBadWords.cu
|
Update TensorRT-LLM (#1019)
|
2024-01-31 21:55:32 +08:00 |
|
banBadWords.h
|
Update TensorRT-LLM (#1019)
|
2024-01-31 21:55:32 +08:00 |
|
banRepeatNgram.cu
|
Update TensorRT-LLM (#1019)
|
2024-01-31 21:55:32 +08:00 |
|
banRepeatNgram.h
|
Update TensorRT-LLM (#1019)
|
2024-01-31 21:55:32 +08:00 |
|
beamSearchTopkKernels.cu
|
Update TensorRT-LLM (#524)
|
2023-12-01 22:27:51 +08:00 |
|
beamSearchTopkKernels.h
|
Update TensorRT-LLM (#524)
|
2023-12-01 22:27:51 +08:00 |
|
CMakeLists.txt
|
Update TensorRT-LLM (#422)
|
2023-11-18 00:05:54 +08:00 |
|
customAllReduceKernels.cu
|
Update TensorRT-LLM (20240116) (#891)
|
2024-01-16 20:03:11 +08:00 |
|
customAllReduceKernels.h
|
Update TensorRT-LLM (20240116) (#891)
|
2024-01-16 20:03:11 +08:00 |
|
decoderMaskedMultiheadAttention.cu
|
Update TensorRT-LLM (#1019)
|
2024-01-31 21:55:32 +08:00 |
|
decoderMaskedMultiheadAttention.h
|
Update TensorRT-LLM (#787)
|
2024-01-02 17:54:32 +08:00 |
|
decoderMaskedMultiheadAttentionUtils.h
|
Update TensorRT-LLM (20240116) (#891)
|
2024-01-16 20:03:11 +08:00 |
|
decodingCommon.cu
|
Update TensorRT-LLM (#1019)
|
2024-01-31 21:55:32 +08:00 |
|
decodingCommon.h
|
Update TensorRT-LLM (#1019)
|
2024-01-31 21:55:32 +08:00 |
|
decodingKernels.cu
|
Update TensorRT-LLM (#1019)
|
2024-01-31 21:55:32 +08:00 |
|
decodingKernels.h
|
Update TensorRT-LLM (#1019)
|
2024-01-31 21:55:32 +08:00 |
|
gptKernels.cu
|
Update TensorRT-LLM (#787)
|
2024-01-02 17:54:32 +08:00 |
|
gptKernels.h
|
Update TensorRT-LLM (#787)
|
2024-01-02 17:54:32 +08:00 |
|
groupGemm.cu
|
Update TensorRT-LLM (20240116) (#891)
|
2024-01-16 20:03:11 +08:00 |
|
groupGemm.h
|
Update TensorRT-LLM (20240116) (#891)
|
2024-01-16 20:03:11 +08:00 |
|
kvCacheUtils.h
|
Update TensorRT-LLM (#1019)
|
2024-01-31 21:55:32 +08:00 |
|
layernormKernels.cu
|
Update TensorRT-LLM (#422)
|
2023-11-18 00:05:54 +08:00 |
|
layernormKernels.h
|
Initial commit
|
2023-09-20 00:29:41 -07:00 |
|
lookupKernels.cu
|
Initial commit
|
2023-09-20 00:29:41 -07:00 |
|
lookupKernels.h
|
Initial commit
|
2023-09-20 00:29:41 -07:00 |
|
multiHeadAttentionCommon.h
|
Update TensorRT-LLM (#524)
|
2023-12-01 22:27:51 +08:00 |
|
onlineSoftmaxBeamsearchKernels.cu
|
Update TensorRT-LLM (#524)
|
2023-12-01 22:27:51 +08:00 |
|
onlineSoftmaxBeamsearchKernels.h
|
Update TensorRT-LLM (#524)
|
2023-12-01 22:27:51 +08:00 |
|
penaltyKernels.cu
|
Update TensorRT-LLM (#1019)
|
2024-01-31 21:55:32 +08:00 |
|
penaltyKernels.h
|
Update TensorRT-LLM (#1019)
|
2024-01-31 21:55:32 +08:00 |
|
penaltyTypes.h
|
Update TensorRT-LLM (#846)
|
2024-01-09 21:03:35 +08:00 |
|
preQuantScaleKernel.cu
|
Update TensorRT-LLM (#349)
|
2023-11-10 22:30:31 +08:00 |
|
preQuantScaleKernel.h
|
Update TensorRT-LLM (#787)
|
2024-01-02 17:54:32 +08:00 |
|
quantization.cu
|
Update code
|
2023-09-28 09:00:05 -07:00 |
|
quantization.h
|
Initial commit
|
2023-09-20 00:29:41 -07:00 |
|
rmsnormKernels.cu
|
Update TensorRT-LLM (#422)
|
2023-11-18 00:05:54 +08:00 |
|
rmsnormKernels.h
|
Initial commit
|
2023-09-20 00:29:41 -07:00 |
|
samplingAirTopPKernels.cu
|
Update TensorRT-LLM (#1019)
|
2024-01-31 21:55:32 +08:00 |
|
samplingTopKKernels.cu
|
Update TensorRT-LLM (#1019)
|
2024-01-31 21:55:32 +08:00 |
|
samplingTopKKernels.h
|
Update TensorRT-LLM (#1019)
|
2024-01-31 21:55:32 +08:00 |
|
samplingTopPKernels.cu
|
Update TensorRT-LLM (#1019)
|
2024-01-31 21:55:32 +08:00 |
|
samplingTopPKernels.h
|
Update TensorRT-LLM (#1019)
|
2024-01-31 21:55:32 +08:00 |
|
selectiveScan.cu
|
Update TensorRT-LLM (#941)
|
2024-01-23 23:22:35 +08:00 |
|
selectiveScan.h
|
Update TensorRT-LLM (#941)
|
2024-01-23 23:22:35 +08:00 |
|
selectiveScanCommon.h
|
Update TensorRT-LLM (#941)
|
2024-01-23 23:22:35 +08:00 |
|
splitkGroupGemm.cu
|
Update TensorRT-LLM (20240116) (#891)
|
2024-01-16 20:03:11 +08:00 |
|
splitkGroupGemm.h
|
Update TensorRT-LLM (20240116) (#891)
|
2024-01-16 20:03:11 +08:00 |
|
stopCriteriaKernels.cu
|
Update TensorRT-LLM (#1019)
|
2024-01-31 21:55:32 +08:00 |
|
stopCriteriaKernels.h
|
Update TensorRT-LLM (#1019)
|
2024-01-31 21:55:32 +08:00 |
|
unfusedAttentionKernels.cu
|
Update TensorRT-LLM (#941)
|
2024-01-23 23:22:35 +08:00 |
|
unfusedAttentionKernels.h
|
Update TensorRT-LLM (20240116) (#891)
|
2024-01-16 20:03:11 +08:00 |