mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
* feat/vbws-part4-v1.8: rebase Signed-off-by: wili-65535 <wili-65535@users.noreply.github.com> * feat/vbws-part4-v1.9: fix incorrect output when using short output length Signed-off-by: wili-65535 <wili-65535@users.noreply.github.com> * v1.9.1: remove useless variables Signed-off-by: wili-65535 <wili-65535@users.noreply.github.com> * v1.9.2:fix incorrect output when using short output length Signed-off-by: wili-65535 <wili-65535@users.noreply.github.com> * v1.9.3: rebase Signed-off-by: wili-65535 <wili-65535@users.noreply.github.com> * v1.9.4: rebase Signed-off-by: wili-65535 <wili-65535@users.noreply.github.com> * v1.9.5: remove API change Signed-off-by: wili-65535 <wili-65535@users.noreply.github.com> --------- Signed-off-by: wili-65535 <wili-65535@users.noreply.github.com> Co-authored-by: wili-65535 <wili-65535@users.noreply.github.com> |
||
|---|---|---|
| .. | ||
| banWordsLayer.cpp | ||
| banWordsLayer.h | ||
| baseLayer.h | ||
| beamSearchLayer.cu | ||
| beamSearchLayer.h | ||
| CMakeLists.txt | ||
| decodingLayer.cpp | ||
| decodingLayer.h | ||
| decodingParams.h | ||
| dynamicDecodeLayer.cpp | ||
| dynamicDecodeLayer.h | ||
| eagleDecodingLayer.cpp | ||
| eagleDecodingLayer.h | ||
| explicitDraftTokensLayer.cpp | ||
| explicitDraftTokensLayer.h | ||
| externalDraftTokensLayer.cpp | ||
| externalDraftTokensLayer.h | ||
| layersFactory.h | ||
| layerUtils.h | ||
| lookaheadAlgorithm.cpp | ||
| lookaheadAlgorithm.h | ||
| lookaheadDecodingLayer.cpp | ||
| lookaheadDecodingLayer.h | ||
| lookaheadDecodingUtils.h | ||
| lookaheadPoolManager.cpp | ||
| lookaheadPoolManager.h | ||
| medusaDecodingLayer.cpp | ||
| medusaDecodingLayer.h | ||
| penaltyLayer.cpp | ||
| penaltyLayer.h | ||
| samplingLayer.cpp | ||
| samplingLayer.h | ||
| stopCriteriaLayer.cpp | ||
| stopCriteriaLayer.h | ||
| topKSamplingLayer.cpp | ||
| topKSamplingLayer.h | ||
| topPSamplingLayer.cpp | ||
| topPSamplingLayer.h | ||