mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-02-18 16:55:08 +08:00
11 lines
298 B
YAML
11 lines
298 B
YAML
# Configuration for Llama 4 Scout (VLM)
|
|
# AutoDeploy-specific settings for Llama 4 Scout MoE vision model
|
|
|
|
max_batch_size: 1024
|
|
max_num_tokens: 2048
|
|
free_mem_ratio: 0.9
|
|
trust_remote_code: true
|
|
cuda_graph_batch_sizes: [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 768, 1024]
|
|
kv_cache_config:
|
|
dtype: fp8
|