TensorRT-LLMs/examples/layer_wise_benchmarks/config_ctx.yaml
Tailing Yuan f9c7786dc8
[None][feat] Add layer wise benchmarks (#8777)
Signed-off-by: Tailing Yuan <yuantailing@gmail.com>
2025-10-30 20:29:34 +08:00

22 lines
399 B
YAML

model: nvidia/DeepSeek-R1-0528-FP4-v2
layer_indices: [5]
run_type: CTX
scaled_from: null
# KV cache related args
tokens_per_block: 32
max_seq_len: 9220 # 8192 + 1024 + 4
enable_attention_dp: true
# Model init args
max_num_tokens: 20480
moe_backend: CUTLASS
use_cuda_graph: false
# Per iteration args
batch_size: 1
seq_len_q: 8193
seq_len_kv_cache: 0
balance_method: Balanced
balance_ratio: null