mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
Signed-off-by: Ivy Zhang <25222398+crazydemo@users.noreply.github.com> Signed-off-by: Yiqing Yan <yiqingy@nvidia.com> Signed-off-by: Yuxian Qiu <142763828+yuxianq@users.noreply.github.com> Signed-off-by: Hui Gao <huig@nvidia.com> Signed-off-by: Balaram Buddharaju <169953907+brb-nv@users.noreply.github.com> Signed-off-by: nv-guomingz <137257613+nv-guomingz@users.noreply.github.com> Signed-off-by: Bo Li <22713281+bobboli@users.noreply.github.com> Signed-off-by: Iman Tabrizian <10105175+tabrizian@users.noreply.github.com> Signed-off-by: Ruodi <200874449+ruodil@users.noreply.github.com> Signed-off-by: ruodil <200874449+ruodil@users.noreply.github.com> Signed-off-by: Stanley Sun <190317771+StanleySun639@users.noreply.github.com> Signed-off-by: Pamela Peng <179191831+pamelap-nvidia@users.noreply.github.com> Signed-off-by: Anurag Mukkara <134339030+amukkara@users.noreply.github.com> Signed-off-by: xinhe-nv <200704525+xinhe-nv@users.noreply.github.com> Signed-off-by: Faraz Khoubsirat <58580514+farazkh80@users.noreply.github.com> Signed-off-by: moraxu <mguzek@nvidia.com> Signed-off-by: Fanrong Li <23290157+lfr-0531@users.noreply.github.com> Signed-off-by: yechank <161688079+yechank-nvidia@users.noreply.github.com> Co-authored-by: Ivy Zhang <25222398+crazydemo@users.noreply.github.com> Co-authored-by: Yiqing Yan <yiqingy@nvidia.com> Co-authored-by: Yuxian Qiu <142763828+yuxianq@users.noreply.github.com> Co-authored-by: HuiGao-NV <huig@nvidia.com> Co-authored-by: brb-nv <169953907+brb-nv@users.noreply.github.com> Co-authored-by: nv-guomingz <137257613+nv-guomingz@users.noreply.github.com> Co-authored-by: Bo Li <22713281+bobboli@users.noreply.github.com> Co-authored-by: Iman Tabrizian <10105175+Tabrizian@users.noreply.github.com> Co-authored-by: ruodil <200874449+ruodil@users.noreply.github.com> Co-authored-by: Stanley Sun <190317771+StanleySun639@users.noreply.github.com> Co-authored-by: Pamela Peng <179191831+pamelap-nvidia@users.noreply.github.com> Co-authored-by: Anurag Mukkara <134339030+amukkara@users.noreply.github.com> Co-authored-by: xinhe-nv <200704525+xinhe-nv@users.noreply.github.com> Co-authored-by: Faraz <58580514+farazkh80@users.noreply.github.com> Co-authored-by: Michal Guzek <moraxu@users.noreply.github.com> Co-authored-by: Larry <197874197+LarryXFly@users.noreply.github.com> Co-authored-by: Fanrong Li <23290157+lfr-0531@users.noreply.github.com> Co-authored-by: Yechan Kim <161688079+yechank-nvidia@users.noreply.github.com> |
||
|---|---|---|
| .. | ||
| beamSearchKernels | ||
| causalConv1d | ||
| communicationKernels | ||
| contextFusedMultiHeadAttention | ||
| cutlass_kernels | ||
| decoderMaskedMultiheadAttention | ||
| flashMLA | ||
| fusedLayernormKernels | ||
| groupRmsNormKernels | ||
| internal_cutlass_kernels | ||
| llama4MinLatencyKernels | ||
| lora | ||
| moeLoadBalance | ||
| selectiveScan | ||
| speculativeDecoding | ||
| trtllmGenKernels | ||
| unfusedAttentionKernels | ||
| userbuffers | ||
| weightOnlyBatchedGemv | ||
| attentionMask.cu | ||
| attentionMask.h | ||
| banBadWords.cu | ||
| banBadWords.h | ||
| banRepeatNgram.cu | ||
| banRepeatNgram.h | ||
| beamSearchKernels.cu | ||
| beamSearchKernels.h | ||
| buildRelativeAttentionBiasKernel.cu | ||
| buildRelativeAttentionBiasKernel.h | ||
| CMakeLists.txt | ||
| cumsumLastDim.cu | ||
| cumsumLastDim.h | ||
| customAllReduceKernels.cu | ||
| customAllReduceKernels.h | ||
| decoderMaskedMultiheadAttention.cu | ||
| decoderMaskedMultiheadAttention.h | ||
| decoderMaskedMultiheadAttentionUtils.h | ||
| decodingCommon.cu | ||
| decodingKernels.cu | ||
| decodingKernels.h | ||
| delayStream.cu | ||
| delayStream.h | ||
| doraScaling.cu | ||
| doraScaling.h | ||
| fmhaDispatcher.cpp | ||
| fmhaDispatcher.h | ||
| fusedQKNormRopeKernel.cu | ||
| fusedQKNormRopeKernel.h | ||
| gptKernels.cu | ||
| gptKernels.h | ||
| groupGemm.cu | ||
| groupGemm.h | ||
| kvCachePartialCopy.cu | ||
| kvCacheUtils.h | ||
| layernormKernels.cu | ||
| layernormKernels.h | ||
| logitsBitmask.cu | ||
| logitsBitmask.h | ||
| lookupKernels.cu | ||
| lookupKernels.h | ||
| lruKernel.cu | ||
| lruKernel.h | ||
| mambaConv1dKernels.cu | ||
| mambaConv1dKernels.h | ||
| mlaKernels.cu | ||
| mlaKernels.h | ||
| moeCommKernels.cu | ||
| moeCommKernels.h | ||
| multiHeadAttentionCommon.h | ||
| noAuxTcKernels.cu | ||
| noAuxTcKernels.h | ||
| penaltyKernels.cu | ||
| penaltyKernels.h | ||
| penaltyTypes.h | ||
| preQuantScaleKernel.cu | ||
| preQuantScaleKernel.h | ||
| qserveGemm.h | ||
| qserveGemmPerChannel.cu | ||
| qserveGemmPerGroup.cu | ||
| quantization.cu | ||
| quantization.cuh | ||
| quantization.h | ||
| recoverFromRingAtten.cu | ||
| recoverFromRingAtten.h | ||
| rmsnormKernels.cu | ||
| rmsnormKernels.h | ||
| sageAttentionKernels.cu | ||
| sageAttentionKernels.h | ||
| samplingAirTopPKernels.cu | ||
| samplingTopKKernels.cu | ||
| samplingTopKKernels.h | ||
| samplingTopPKernels.cu | ||
| samplingTopPKernels.h | ||
| splitkGroupGemm.cu | ||
| splitkGroupGemm.h | ||
| stopCriteriaKernels.cu | ||
| stopCriteriaKernels.h | ||
| topkLastDim.cu | ||
| topkLastDim.h | ||
| unfusedAttentionKernels.cu | ||
| unfusedAttentionKernels.h | ||
| xqaDispatcher.cpp | ||
| xqaDispatcher.h | ||