mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
6 lines
99 B
YAML
6 lines
99 B
YAML
cuda_graph_config:
|
|
enable_padding: True
|
|
max_batch_size: 16
|
|
moe_config:
|
|
backend: trtllm
|