| .. |
|
beamSearchKernels
|
|
|
|
causalConv1d
|
|
|
|
communicationKernels
|
[None][chore] Support larger topK for NVLinkOneSided AlltoAll. (#9816)
|
2025-12-10 11:10:55 +08:00 |
|
contextFusedMultiHeadAttention
|
|
|
|
cuteDslKernels
|
[TRTLLM-9372][feat] Enable CuteDSL MoE with Large EP (#9592)
|
2025-12-05 22:08:52 -08:00 |
|
cutlass_kernels
|
[TRTLLM-9372][feat] Enable CuteDSL MoE with Large EP (#9592)
|
2025-12-05 22:08:52 -08:00 |
|
decoderMaskedMultiheadAttention
|
[#8476][chore] Update license (#8807)
|
2025-11-19 15:05:25 -08:00 |
|
dsv3MinLatencyKernels
|
[None][fix] Fix PDL in TRTLLM MOE for dsv3 (#9799)
|
2025-12-09 10:16:29 +08:00 |
|
flashMLA
|
|
|
|
fusedLayernormKernels
|
|
|
|
groupRmsNormKernels
|
|
|
|
internal_cutlass_kernels
|
|
|
|
llama4MinLatencyKernels
|
|
|
|
lora
|
|
|
|
moeLoadBalance
|
|
|
|
selectiveScan
|
|
|
|
speculativeDecoding
|
[TRTLLM-8160][feat] Add draft token tree runtime on CDL (#8586)
|
2025-11-25 09:40:55 -05:00 |
|
tinygemm2
|
|
|
|
trtllmGenKernels
|
[https://nvbugs/5727952][fix] PDL bugs with trtllm-gen fmha kernels (#9863)
|
2025-12-10 01:47:03 -08:00 |
|
unfusedAttentionKernels
|
[None][feat] update trtllm-gen nvfp4 kernels with better performance (#9510)
|
2025-12-03 21:35:49 +08:00 |
|
userbuffers
|
[None][feat] Enable NCCL_SYMMETRIC as default fallback for AllReduce (#9314)
|
2025-12-07 09:43:26 -08:00 |
|
weightOnlyBatchedGemv
|
|
|
|
attentionMask.cu
|
|
|
|
attentionMask.h
|
|
|
|
banBadWords.cu
|
|
|
|
banBadWords.h
|
|
|
|
banRepeatNgram.cu
|
|
|
|
banRepeatNgram.h
|
|
|
|
beamSearchKernels.cu
|
|
|
|
beamSearchKernels.h
|
|
|
|
buildRelativeAttentionBiasKernel.cu
|
|
|
|
buildRelativeAttentionBiasKernel.h
|
|
|
|
CMakeLists.txt
|
[TRTLLM-9286][feat] Integration of CuteDSL NVFP4 grouped GEMM (#8880)
|
2025-11-18 17:40:12 -08:00 |
|
cumsumLastDim.cu
|
|
|
|
cumsumLastDim.h
|
|
|
|
customAllReduceKernels.cu
|
|
|
|
customAllReduceKernels.h
|
|
|
|
customMoeRoutingKernels.cu
|
[https://nvbugs/5690172][fix] Fix Qwen3-235B ATP accuracy issue with PDL (#9530)
|
2025-12-01 09:10:21 +08:00 |
|
customMoeRoutingKernels.h
|
|
|
|
decoderMaskedMultiheadAttention.cu
|
|
|
|
decoderMaskedMultiheadAttention.h
|
|
|
|
decoderMaskedMultiheadAttentionUtils.h
|
|
|
|
decodingCommon.cu
|
|
|
|
decodingKernels.cu
|
|
|
|
decodingKernels.h
|
|
|
|
delayStream.cu
|
|
|
|
delayStream.h
|
|
|
|
doraScaling.cu
|
|
|
|
doraScaling.h
|
|
|
|
fmhaDispatcher.cpp
|
[None][feat] update trtllm-gen nvfp4 kernels with better performance (#9510)
|
2025-12-03 21:35:49 +08:00 |
|
fmhaDispatcher.h
|
|
|
|
fusedMoeCommKernels.cu
|
|
|
|
fusedMoeCommKernels.h
|
|
|
|
fusedQKNormRopeKernel.cu
|
[None][feat] Support Yarn on QwQ-32B model (#9059)
|
2025-11-25 07:27:28 +08:00 |
|
fusedQKNormRopeKernel.h
|
[None][feat] Support Yarn on QwQ-32B model (#9059)
|
2025-11-25 07:27:28 +08:00 |
|
gptKernels.cu
|
|
|
|
gptKernels.h
|
|
|
|
groupGemm.cu
|
|
|
|
groupGemm.h
|
|
|
|
helixKernels.cu
|
|
|
|
helixKernels.h
|
|
|
|
indexerKCacheScatter.cu
|
|
|
|
IndexerKCacheScatter.h
|
|
|
|
indexerTopK.cu
|
[None][fix] Fix topk outIndices when using vectorized_process (#9404)
|
2025-11-24 09:08:00 -08:00 |
|
IndexerTopK.h
|
[None][feat] Update the indexer topK (#9255)
|
2025-11-19 11:49:00 +08:00 |
|
kvCachePartialCopy.cu
|
|
|
|
kvCacheUtils.h
|
|
|
|
layernormKernels.cu
|
|
|
|
layernormKernels.h
|
|
|
|
logitsBitmask.cu
|
|
|
|
logitsBitmask.h
|
|
|
|
lookupKernels.cu
|
|
|
|
lookupKernels.h
|
|
|
|
lruKernel.cu
|
|
|
|
lruKernel.h
|
|
|
|
mambaConv1dKernels.cu
|
|
|
|
mambaConv1dKernels.h
|
|
|
|
mlaChunkedPrefill.cu
|
|
|
|
mlaChunkedPrefill.cuh
|
|
|
|
mlaKernels.cu
|
[https://nvbugs/5708475][fix] Fix e2e eval accuracy for helix parallelism (#9647)
|
2025-12-03 15:13:59 +08:00 |
|
mlaKernels.h
|
[TRTLLM-5971][feat] Integrate helix parallelism (#9342)
|
2025-11-29 15:17:30 -08:00 |
|
moe_utils.cuh
|
|
|
|
moeAlignKernels.cu
|
[TRTLLM-9082][feat] AutoDeploy: Move the moe Align kernel to AOT (#9106)
|
2025-11-21 16:05:48 -08:00 |
|
moeAlignKernels.h
|
[TRTLLM-9082][feat] AutoDeploy: Move the moe Align kernel to AOT (#9106)
|
2025-11-21 16:05:48 -08:00 |
|
moeCommKernelsCommon.h
|
|
|
|
moePrepareKernels.cu
|
|
|
|
moePrepareKernels.h
|
|
|
|
moeTopKFuncs.cuh
|
|
|
|
multiHeadAttentionCommon.h
|
|
|
|
noAuxTcKernels.cu
|
[None][fix] Fix PDL in TRTLLM MOE for dsv3 (#9799)
|
2025-12-09 10:16:29 +08:00 |
|
noAuxTcKernels.h
|
|
|
|
penaltyKernels.cu
|
|
|
|
penaltyKernels.h
|
|
|
|
penaltyTypes.h
|
|
|
|
preQuantScaleKernel.cu
|
|
|
|
preQuantScaleKernel.h
|
|
|
|
qserveGemm.h
|
|
|
|
qserveGemmPerChannel.cu
|
|
|
|
qserveGemmPerGroup.cu
|
|
|
|
quantization.cu
|
|
|
|
quantization.cuh
|
[None][feat] Port fp4 quantization kernel optimization from FlashInfer (#9854)
|
2025-12-10 13:13:48 +01:00 |
|
quantization.h
|
|
|
|
recoverFromRingAtten.cu
|
|
|
|
recoverFromRingAtten.h
|
|
|
|
rmsnormKernels.cu
|
|
|
|
rmsnormKernels.h
|
|
|
|
sageAttentionKernels.cu
|
|
|
|
sageAttentionKernels.h
|
|
|
|
samplingAirTopPKernels.cu
|
|
|
|
samplingTopKKernels.cu
|
|
|
|
samplingTopKKernels.h
|
|
|
|
samplingTopPKernels.cu
|
|
|
|
samplingTopPKernels.h
|
|
|
|
sparseAttentionKernels.cu
|
|
|
|
sparseAttentionKernels.h
|
|
|
|
splitkGroupGemm.cu
|
|
|
|
splitkGroupGemm.h
|
|
|
|
stopCriteriaKernels.cu
|
|
|
|
stopCriteriaKernels.h
|
|
|
|
topkLastDim.cu
|
|
|
|
topkLastDim.h
|
|
|
|
unfusedAttentionKernels.cu
|
|
|
|
unfusedAttentionKernels.h
|
|
|
|
xqaDispatcher.cpp
|
[None][feat] update trtllm-gen nvfp4 kernels with better performance (#9510)
|
2025-12-03 21:35:49 +08:00 |
|
xqaDispatcher.h
|
|
|