| .. |
|
moeLoadBalancer
|
[None][feat] add flag for EPLB to force using GDRCopy (#8650)
|
2025-10-29 13:33:26 +08:00 |
|
utils
|
[TRTLLM-7349][feat] Adding new orchestrator type -- ray (#7520)
|
2025-10-04 08:12:24 +08:00 |
|
bufferManager.cpp
|
[TRTLLM-4406][feat] LLM sleep & wakeup Part 1: virtual device memory (#5034)
|
2025-08-04 13:51:01 +08:00 |
|
bufferView.h
|
[None] [refactor] Minor cleanup and improvements (#7619)
|
2025-10-03 11:40:06 +02:00 |
|
CMakeLists.txt
|
[TRTLLM-7349][feat] Adding new orchestrator type -- ray (#7520)
|
2025-10-04 08:12:24 +08:00 |
|
cudaMemPool.cpp
|
|
|
|
cudaMemPool.h
|
|
|
|
decoderState.cpp
|
[None][refactor] Simplify decoder state initialization for speculative decoding (#6869)
|
2025-08-22 18:44:17 +02:00 |
|
decodingLayerWorkspace.cpp
|
|
|
|
decodingLayerWorkspace.h
|
|
|
|
decodingOutput.cpp
|
|
|
|
eagleBuffers.cpp
|
|
|
|
explicitDraftTokensBuffers.cpp
|
|
|
|
explicitDraftTokensModule.h
|
|
|
|
gptDecoder.cpp
|
[None][feat] Support ignored prompt length for penalties via new sampling config parameter (#8127)
|
2025-10-27 13:12:31 -04:00 |
|
gptDecoderBatched.cpp
|
[None][fix] Introduce inline namespace to avoid symbol collision (#9541)
|
2025-12-12 23:32:15 +08:00 |
|
gptJsonConfig.cpp
|
|
|
|
iBuffer.cpp
|
[TRTLLM-4629] [feat] Add support of CUDA13 and sm103 devices (#7568)
|
2025-09-16 09:56:18 +08:00 |
|
ipcNvlsMemory.cu
|
[None][fix] default disable gemm+allreduce fusion (#10656)
|
2026-01-20 12:31:17 +08:00 |
|
ipcSocket.cpp
|
|
|
|
ipcSocket.h
|
|
|
|
ipcUtils.cpp
|
|
|
|
iTensor.cpp
|
|
|
|
jsonSerialization.h
|
|
|
|
layerProfiler.cpp
|
|
|
|
layerProfiler.h
|
|
|
|
lookaheadBuffers.cpp
|
|
|
|
loraCache.cpp
|
|
|
|
loraManager.cpp
|
|
|
|
loraManager.h
|
[https://nvbugs/5322131][feat] Multi-LoRA serving with CUDA Graph (#8279)
|
2026-01-22 14:01:18 +01:00 |
|
loraModule.cpp
|
|
|
|
loraUtils.cpp
|
|
|
|
loraUtils.h
|
|
|
|
mcastDeviceMemory.cpp
|
[https://nvbugs/5782112][fix] Fix hanging issue for MNNVL Allreduce under PP (#10633)
|
2026-01-16 13:03:36 +08:00 |
|
mcastDeviceMemory.h
|
[https://nvbugs/5782112][fix] Fix hanging issue for MNNVL Allreduce under PP (#10633)
|
2026-01-16 13:03:36 +08:00 |
|
mcastGPUBuffer.h
|
[https://nvbugs/5782112][fix] Fix hanging issue for MNNVL Allreduce under PP (#10633)
|
2026-01-16 13:03:36 +08:00 |
|
memoryCounters.cpp
|
|
|
|
ncclCommunicator.cpp
|
|
|
|
ncclCommunicator.h
|
|
|
|
promptTuningParams.cpp
|
|
|
|
runtimeKernels.cu
|
refactor: Remove enforced sorted order of batch slots (#3502)
|
2025-07-14 17:23:02 +02:00 |
|
runtimeKernels.h
|
refactor: Remove enforced sorted order of batch slots (#3502)
|
2025-07-14 17:23:02 +02:00 |
|
tensorView.h
|
|
|
|
tllmBuffers.cpp
|
|
|
|
tllmBuffers.h
|
[TRTLLM-4406][feat] LLM sleep & wakeup Part 1: virtual device memory (#5034)
|
2025-08-04 13:51:01 +08:00 |
|
tllmLogger.cpp
|
|
|
|
tllmRuntime.cpp
|
|
|
|
tllmRuntime.h
|
[None][chroe] Rename TensorRT-LLM to TensorRT LLM for source code. (#7851)
|
2025-09-25 21:02:35 +08:00 |
|
tllmStreamReaders.cpp
|
|
|
|
tllmStreamReaders.h
|
|
|
|
torch.h
|
[None][feat] KV Cache Connector API (#7228)
|
2025-08-28 23:09:27 -04:00 |
|
torchUtils.h
|
|
|
|
torchView.h
|
|
|
|
virtualMemory.cpp
|
[None][fix] Correct virtual memory allocation alignment (#9491)
|
2025-12-01 10:59:19 +08:00 |
|
workerPool.cpp
|
|
|
|
workerPool.h
|
|
|
|
worldConfig.cpp
|
[TRTLLM-9465][fix] Swap TP-CP grouping order (#10350)
|
2026-01-05 20:08:03 +08:00 |