| .. |
|
allocateKvCache.h
|
Update TensorRT-LLM (#2792)
|
2025-02-18 21:27:39 +08:00 |
|
assignReqSeqSlots.h
|
Update TensorRT-LLM (#2436)
|
2024-11-12 15:27:49 +08:00 |
|
cacheTransceiver.h
|
feat: Run PyExecutor's inference flow to estimate max_num_tokens for kv_cache_manager (#3092)
|
2025-04-10 18:29:40 +08:00 |
|
capacityScheduler.h
|
Update TensorRT-LLM (#2820)
|
2025-02-25 21:21:49 +08:00 |
|
common.h
|
open source 4dbf696ae9b74a26829d120b67ab8443d70c8e58 (#2297)
|
2024-10-08 12:19:19 +02:00 |
|
contextProgress.h
|
Update TensorRT-LLM (#2413)
|
2024-11-05 16:27:06 +08:00 |
|
createNewDecoderRequests.h
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
decoderBuffers.h
|
refactor: Simplify disableLookahead and improve numDecodingEngineTokens handling (#3103)
|
2025-04-01 18:47:31 +08:00 |
|
evictionPolicy.h
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
generateRequestOptions.h
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
guidedDecoder.h
|
Update TensorRT-LLM (#2532)
|
2024-12-04 21:16:56 +08:00 |
|
handleContextLogits.h
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
handleGenerationLogits.h
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
kvCacheConfig.h
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
kvCacheEventManager.h
|
Update TensorRT-LLM (#2436)
|
2024-11-12 15:27:49 +08:00 |
|
kvCacheManager.h
|
fix: disable kv cache reuse when minimum window size is reached, instead of maximum window size (#2983)
|
2025-03-24 22:49:52 +08:00 |
|
kvCacheTransferManager.h
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
kvCacheUtils.h
|
Update TensorRT-LLM (#2849)
|
2025-03-04 18:44:00 +08:00 |
|
llmRequest.h
|
Feat: Variable-Beam-Width-Search (VBWS) part3 (#3338)
|
2025-04-08 23:51:27 +08:00 |
|
logitsPostProcessor.h
|
Update TensorRT-LLM (#2755)
|
2025-02-11 03:01:00 +00:00 |
|
makeDecodingBatchInputOutput.h
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
medusaBuffers.h
|
Update TensorRT-LLM (#2873)
|
2025-03-11 21:13:42 +08:00 |
|
microBatchScheduler.h
|
Update TensorRT-LLM (#2502)
|
2024-11-26 16:51:34 +08:00 |
|
pauseRequests.h
|
Update TensorRT-LLM (#2532)
|
2024-12-04 21:16:56 +08:00 |
|
peftCacheManager.h
|
Update TensorRT-LLM (#2783)
|
2025-02-13 18:40:22 +08:00 |
|
peftCacheManagerConfig.h
|
Update TensorRT-LLM (#2755)
|
2025-02-11 03:01:00 +00:00 |
|
rnnStateManager.h
|
Update TensorRT-LLM (#2413)
|
2024-11-05 16:27:06 +08:00 |
|
runtimeBuffers.h
|
Feat: Variable-Beam-Width-Search (VBWS) part3 (#3338)
|
2025-04-08 23:51:27 +08:00 |
|
sequenceSlotManager.h
|
Update TensorRT-LLM (#2413)
|
2024-11-05 16:27:06 +08:00 |
|
transformerBuffers.h
|
Feat: Variable-Beam-Width-Search (VBWS) part3 (#3338)
|
2025-04-08 23:51:27 +08:00 |
|
trtGptModelOptionalParams.h
|
Update TensorRT-LLM (#2755)
|
2025-02-11 03:01:00 +00:00 |