| .. |
|
utils
|
Fix logits dtype in assert (#3038)
|
2025-03-25 10:35:21 +08:00 |
|
allocateKvCache.cpp
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
assignReqSeqSlots.cpp
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
cacheFormatter.cpp
|
Add updateKVCacheTransfer (#2984)
|
2025-03-25 21:45:35 +08:00 |
|
cacheFormatter.h
|
Add updateKVCacheTransfer (#2984)
|
2025-03-25 21:45:35 +08:00 |
|
cacheTransceiver.cpp
|
Add updateKVCacheTransfer (#2984)
|
2025-03-25 21:45:35 +08:00 |
|
capacityScheduler.cpp
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
CMakeLists.txt
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
contextProgress.cpp
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
createNewDecoderRequests.cpp
|
Update (#2978)
|
2025-03-23 16:39:35 +08:00 |
|
dataTransceiver.cpp
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
dataTransceiver.h
|
Add updateKVCacheTransfer (#2984)
|
2025-03-25 21:45:35 +08:00 |
|
dataTransceiverImpl.cpp
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
dataTransceiverImpl.h
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
decoderBuffers.cpp
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
encoderBuffers.cpp
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
encoderBuffers.h
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
evictionPolicy.cpp
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
generateRequestOptions.cpp
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
guidedDecoder.cpp
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
handleContextLogits.cpp
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
handleGenerationLogits.cpp
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
kvCacheEventManager.cpp
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
kvCacheManager.cpp
|
fix: disable kv cache reuse when minimum window size is reached, instead of maximum window size (#2983)
|
2025-03-24 22:49:52 +08:00 |
|
kvCacheTransferManager.cpp
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
llmRequest.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
logitsPostProcessor.cpp
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
loraBuffers.cpp
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
loraBuffers.h
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
makeDecodingBatchInputOutput.cpp
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
medusaBuffers.cpp
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
microBatchScheduler.cpp
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
mlaCacheFormatter.cpp
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
mlaCacheFormatter.h
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
pauseRequests.cpp
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
peftCacheManager.cpp
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
promptTuningBuffers.cpp
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
promptTuningBuffers.h
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
rnnStateBuffers.cpp
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
rnnStateBuffers.h
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
rnnStateManager.cpp
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
runtimeBuffers.cpp
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
sequenceSlotManager.cpp
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
transformerBuffers.cpp
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
trtEncoderModel.cpp
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
trtEncoderModel.h
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
trtGptModel.h
|
fix: disable kv cache reuse when minimum window size is reached, instead of maximum window size (#2983)
|
2025-03-24 22:49:52 +08:00 |
|
trtGptModelFactory.h
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
trtGptModelInflightBatching.cpp
|
fix: disable kv cache reuse when minimum window size is reached, instead of maximum window size (#2983)
|
2025-03-24 22:49:52 +08:00 |
|
trtGptModelInflightBatching.h
|
Fix logits dtype in assert (#3038)
|
2025-03-25 10:35:21 +08:00 |
|
trtGptModelV1.cpp
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
trtGptModelV1.h
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |