| .. |
|
beamSearchKernels
|
TensorRT-LLM v0.10 update
|
2024-06-05 20:43:25 +08:00 |
|
contextFusedMultiHeadAttention
|
TensorRT-LLM v0.10 update
|
2024-06-05 20:43:25 +08:00 |
|
cutlass_kernels
|
TensorRT-LLM v0.10 update
|
2024-06-05 20:43:25 +08:00 |
|
decoderMaskedMultiheadAttention
|
TensorRT-LLM v0.10 update
|
2024-06-05 20:43:25 +08:00 |
|
mixtureOfExperts
|
TensorRT-LLM v0.10 update
|
2024-06-05 20:43:25 +08:00 |
|
parallelDecoding
|
TensorRT-LLM v0.10 update
|
2024-06-05 20:43:25 +08:00 |
|
unfusedAttentionKernels
|
TensorRT-LLM v0.10 update
|
2024-06-05 20:43:25 +08:00 |
|
weightOnlyBatchedGemv
|
TensorRT-LLM v0.10 update
|
2024-06-05 20:43:25 +08:00 |
|
banBadWords.cu
|
TensorRT-LLM v0.10 update
|
2024-06-05 20:43:25 +08:00 |
|
banBadWords.h
|
TensorRT-LLM v0.10 update
|
2024-06-05 20:43:25 +08:00 |
|
banRepeatNgram.cu
|
TensorRT-LLM v0.10 update
|
2024-06-05 20:43:25 +08:00 |
|
banRepeatNgram.h
|
TensorRT-LLM v0.10 update
|
2024-06-05 20:43:25 +08:00 |
|
beamSearchKernels.cu
|
TensorRT-LLM v0.10 update
|
2024-06-05 20:43:25 +08:00 |
|
beamSearchKernels.h
|
TensorRT-LLM v0.10 update
|
2024-06-05 20:43:25 +08:00 |
|
CMakeLists.txt
|
TensorRT-LLM v0.10 update
|
2024-06-05 20:43:25 +08:00 |
|
cumsumLastDim.cu
|
TensorRT-LLM v0.10 update
|
2024-06-05 20:43:25 +08:00 |
|
cumsumLastDim.h
|
TensorRT-LLM v0.10 update
|
2024-06-05 20:43:25 +08:00 |
|
customAllReduceKernels.cu
|
TensorRT-LLM v0.10 update
|
2024-06-05 20:43:25 +08:00 |
|
customAllReduceKernels.h
|
Update TensorRT-LLM Release branch (#1445)
|
2024-04-12 17:59:19 +08:00 |
|
decoderMaskedMultiheadAttention.cu
|
Update TensorRT-LLM Release branch (#1445)
|
2024-04-12 17:59:19 +08:00 |
|
decoderMaskedMultiheadAttention.h
|
TensorRT-LLM v0.10 update
|
2024-06-05 20:43:25 +08:00 |
|
decoderMaskedMultiheadAttentionUtils.h
|
TensorRT-LLM v0.10 update
|
2024-06-05 20:43:25 +08:00 |
|
decodingCommon.cu
|
Update TensorRT-LLM Release branch (#1445)
|
2024-04-12 17:59:19 +08:00 |
|
decodingCommon.h
|
Update TensorRT-LLM Release branch (#1445)
|
2024-04-12 17:59:19 +08:00 |
|
decodingKernels.cu
|
TensorRT-LLM v0.10 update
|
2024-06-05 20:43:25 +08:00 |
|
decodingKernels.h
|
TensorRT-LLM v0.10 update
|
2024-06-05 20:43:25 +08:00 |
|
gptKernels.cu
|
TensorRT-LLM v0.10 update
|
2024-06-05 20:43:25 +08:00 |
|
gptKernels.h
|
TensorRT-LLM v0.10 update
|
2024-06-05 20:43:25 +08:00 |
|
groupGemm.cu
|
Update TensorRT-LLM Release branch (#1445)
|
2024-04-12 17:59:19 +08:00 |
|
groupGemm.h
|
Update TensorRT-LLM Release branch (#1192)
|
2024-02-29 17:20:55 +08:00 |
|
kvCacheIndex.h
|
TensorRT-LLM v0.10 update
|
2024-06-05 20:43:25 +08:00 |
|
kvCacheUtils.h
|
TensorRT-LLM v0.10 update
|
2024-06-05 20:43:25 +08:00 |
|
layernormKernels.cu
|
Update TensorRT-LLM Release branch (#1445)
|
2024-04-12 17:59:19 +08:00 |
|
layernormKernels.h
|
Update TensorRT-LLM Release branch (#1445)
|
2024-04-12 17:59:19 +08:00 |
|
lookupKernels.cu
|
TensorRT-LLM v0.10 update
|
2024-06-05 20:43:25 +08:00 |
|
lookupKernels.h
|
TensorRT-LLM v0.10 update
|
2024-06-05 20:43:25 +08:00 |
|
lruKernel.cu
|
TensorRT-LLM v0.10 update
|
2024-06-05 20:43:25 +08:00 |
|
lruKernel.h
|
TensorRT-LLM v0.10 update
|
2024-06-05 20:43:25 +08:00 |
|
mambaConv1dKernels.cu
|
TensorRT-LLM v0.10 update
|
2024-06-05 20:43:25 +08:00 |
|
mambaConv1dKernels.h
|
TensorRT-LLM v0.10 update
|
2024-06-05 20:43:25 +08:00 |
|
multiHeadAttentionCommon.h
|
TensorRT-LLM v0.10 update
|
2024-06-05 20:43:25 +08:00 |
|
penaltyKernels.cu
|
TensorRT-LLM v0.10 update
|
2024-06-05 20:43:25 +08:00 |
|
penaltyKernels.h
|
TensorRT-LLM v0.10 update
|
2024-06-05 20:43:25 +08:00 |
|
penaltyTypes.h
|
TensorRT-LLM v0.10 update
|
2024-06-05 20:43:25 +08:00 |
|
preQuantScaleKernel.cu
|
Update TensorRT-LLM Release branch (#1445)
|
2024-04-12 17:59:19 +08:00 |
|
preQuantScaleKernel.h
|
Update TensorRT-LLM Release branch (#1445)
|
2024-04-12 17:59:19 +08:00 |
|
quantization.cu
|
Update TensorRT-LLM Release branch (#1445)
|
2024-04-12 17:59:19 +08:00 |
|
quantization.h
|
Update TensorRT-LLM Release branch (#1445)
|
2024-04-12 17:59:19 +08:00 |
|
rmsnormKernels.cu
|
Update TensorRT-LLM Release branch (#1445)
|
2024-04-12 17:59:19 +08:00 |
|
rmsnormKernels.h
|
Update TensorRT-LLM Release branch (#1445)
|
2024-04-12 17:59:19 +08:00 |
|
samplingAirTopPKernels.cu
|
TensorRT-LLM v0.10 update
|
2024-06-05 20:43:25 +08:00 |
|
samplingTopKKernels.cu
|
TensorRT-LLM v0.10 update
|
2024-06-05 20:43:25 +08:00 |
|
samplingTopKKernels.h
|
TensorRT-LLM v0.10 update
|
2024-06-05 20:43:25 +08:00 |
|
samplingTopPKernels.cu
|
TensorRT-LLM v0.10 update
|
2024-06-05 20:43:25 +08:00 |
|
samplingTopPKernels.h
|
TensorRT-LLM v0.10 update
|
2024-06-05 20:43:25 +08:00 |
|
selectiveScan.cu
|
TensorRT-LLM v0.10 update
|
2024-06-05 20:43:25 +08:00 |
|
selectiveScan.h
|
Update TensorRT-LLM Release branch (#1445)
|
2024-04-12 17:59:19 +08:00 |
|
splitkGroupGemm.cu
|
Update TensorRT-LLM Release branch (#1445)
|
2024-04-12 17:59:19 +08:00 |
|
splitkGroupGemm.h
|
Update TensorRT-LLM Release branch (#1192)
|
2024-02-29 17:20:55 +08:00 |
|
stopCriteriaKernels.cu
|
TensorRT-LLM v0.10 update
|
2024-06-05 20:43:25 +08:00 |
|
stopCriteriaKernels.h
|
TensorRT-LLM v0.10 update
|
2024-06-05 20:43:25 +08:00 |
|
unfusedAttentionKernels.cu
|
TensorRT-LLM v0.10 update
|
2024-06-05 20:43:25 +08:00 |
|
unfusedAttentionKernels.h
|
TensorRT-LLM v0.10 update
|
2024-06-05 20:43:25 +08:00 |