mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-02-18 08:45:05 +08:00
13 lines
378 B
YAML
13 lines
378 B
YAML
Qwen3/Qwen3-30B-A3B-Instruct-2507:
|
|
# Skip Softmax Attention ref accuracy
|
|
- extra_acc_spec: "target_sparsity=0.0"
|
|
accuracy: 47.357
|
|
- extra_acc_spec: "target_sparsity=0.5"
|
|
accuracy: 47.102
|
|
- extra_acc_spec: "target_sparsity=0.9"
|
|
accuracy: 46.169
|
|
deepseek-ai/DeepSeek-V3-0324:
|
|
- quant_algo: NVFP4
|
|
extra_acc_spec: "target_sparsity=0.9"
|
|
accuracy: 44.94
|