| .. |
|
attention_backend
|
[None][perf] Use fp8 quant kernel in DS3.2 indexer module (#8701)
|
2025-10-29 12:45:09 +08:00 |
|
auto_deploy
|
[https://nvbugs/5606166][fix] AutoDeploy: use tuples for cudagraph shape lookup (#8658)
|
2025-10-28 10:52:43 -07:00 |
|
compilation
|
[https://nvbugs/5550409][fix] Disable torch compile in piecewise attention part to Avoid host overhead (#8708)
|
2025-10-29 18:12:58 +08:00 |
|
configs
|
[TRTLLM-8535][feat] Support DeepSeek V3.2 with FP8 + BF16 KV cache/NVFP4 + BF16 KV cache (#8405)
|
2025-10-24 13:40:41 -04:00 |
|
custom_ops
|
[None][perf] Use fp8 quant kernel in DS3.2 indexer module (#8701)
|
2025-10-29 12:45:09 +08:00 |
|
cute_dsl_kernels
|
[TRTLLM-6898][feat] Add swapab, tileN64, cga sync support for cute dsl nvfp4 gemm (#7764)
|
2025-09-18 21:20:04 +08:00 |
|
debug
|
|
|
|
distributed
|
[TRTLLM-7318][feat] MnnvlThroughput AlltoAll implementation. (#7499)
|
2025-10-27 13:23:06 -04:00 |
|
models
|
[https://nvbugs/5549829][fix] Qwen2.5-VL TP > 1 + Quantized weight load fix (#8680)
|
2025-10-29 13:38:42 +09:00 |
|
modules
|
[https://nvbugs/5550409][fix] Disable torch compile in piecewise attention part to Avoid host overhead (#8708)
|
2025-10-29 18:12:58 +08:00 |
|
peft
|
|
|
|
pyexecutor
|
[https://nvbugs/5596377][fix] Fix mm dummy calculation (#8498)
|
2025-10-29 09:45:21 +09:00 |
|
shared_tensor
|
|
|
|
speculative
|
[TRTLLM-8535][feat] Support DeepSeek V3.2 with FP8 + BF16 KV cache/NVFP4 + BF16 KV cache (#8405)
|
2025-10-24 13:40:41 -04:00 |
|
__init__.py
|
|
|
|
autotuner.py
|
[None][feat] Update TRTLLM MoE MxFP4 cubins; autotune tileN (#8156)
|
2025-10-23 09:14:18 +08:00 |
|
cublaslt_utils.py
|
[https://nvbugs/5451205][feat] Add cuBLASLt NVFP4 GEMM backend support (#7943)
|
2025-10-23 15:55:10 +08:00 |
|
cute_dsl_utils.py
|
[None][chore] polish error message in cute_dsl_utils.py (#7852)
|
2025-09-19 12:05:11 +08:00 |
|
device_mesh.py
|
[TRTLLM-8682][chore] Remove auto_parallel module (#8329)
|
2025-10-22 20:53:08 -04:00 |
|
expert_statistic.py
|
|
|
|
flashinfer_utils.py
|
|
|
|
hostfunc.py
|
[TRTLLM-7028][feat] Enable guided decoding with speculative decoding (part 2: one-model engine) (#6948)
|
2025-09-03 15:16:11 -07:00 |
|
llm.py
|
|
|
|
memory_buffer_utils.py
|
[None][feat] reuse cudagraph memory pool in normal forward flow (#8095)
|
2025-10-16 07:08:44 +08:00 |
|
metadata.py
|
|
|
|
model_config.py
|
[None][fix] Fix ModelConfig.from_pretrained get quant config file (#8647)
|
2025-10-27 11:02:24 +08:00 |
|
utils.py
|
[https://nvbugs/5550409][fix] Disable torch compile in piecewise attention part to Avoid host overhead (#8708)
|
2025-10-29 18:12:58 +08:00 |
|
virtual_memory.py
|
|
|