TensorRT-LLMs/tests/integration/defs/accuracy/references/gsm8k.yaml
Ivy Zhang 94de3c11b0
tests: Add llama4 functional cases (#6392)
Signed-off-by: Ivy Zhang <25222398+crazydemo@users.noreply.github.com>
2025-07-29 17:49:43 +10:00

116 lines
2.7 KiB
YAML

meta-llama/Llama-3.1-8B-Instruct:
- accuracy: 74.20
- quant_algo: FP8
accuracy: 74.30
- quant_algo: FP8
kv_cache_quant_algo: FP8
accuracy: 72.85
meta-llama/Llama-3.3-70B-Instruct:
- accuracy: 83.78
- quant_algo: NVFP4
kv_cache_quant_algo: FP8
accuracy: 75.61
- quant_algo: FP8
accuracy: 83.30
meta-llama/Llama-4-Maverick-17B-128E-Instruct:
- accuracy: 92.20
- quant_algo: FP8
kv_cache_quant_algo: FP8
accuracy: 83.30
meta-llama/Llama-4-Scout-17B-16E-Instruct:
- accuracy: 89.70
- quant_algo: FP8
kv_cache_quant_algo: FP8
accuracy: 89.61
- quant_algo: NVFP4
kv_cache_quant_algo: FP8
accuracy: 89.00
deepseek-ai/DeepSeek-V3-Lite:
- accuracy: 64.74
- quant_algo: NVFP4
accuracy: 63.71
- quant_algo: NVFP4
kv_cache_quant_algo: FP8
accuracy: 63.71
- quant_algo: NVFP4
spec_dec_algo: MTP
accuracy: 63.71
- quant_algo: NVFP4
kv_cache_quant_algo: FP8
spec_dec_algo: MTP
accuracy: 63.71
- quant_algo: FP8_BLOCK_SCALES
accuracy: 64.74
- quant_algo: FP8_BLOCK_SCALES
kv_cache_quant_algo: FP8
accuracy: 64.74
- spec_dec_algo: MTP
accuracy: 64.44
- spec_dec_algo: MTP
kv_cache_quant_algo: FP8
accuracy: 64.44
- quant_algo: FP8_BLOCK_SCALES
spec_dec_algo: MTP
accuracy: 64.14
- quant_algo: FP8_BLOCK_SCALES
kv_cache_quant_algo: FP8
spec_dec_algo: MTP
accuracy: 64.14
deepseek-ai/DeepSeek-R1:
- quant_algo: NVFP4
accuracy: 95.42
- quant_algo: NVFP4
spec_dec_algo: MTP
accuracy: 95.42
- quant_algo: NVFP4
kv_cache_quant_algo: FP8
accuracy: 95.42
- quant_algo: FP8_BLOCK_SCALES
accuracy: 95.413
- quant_algo: FP8_BLOCK_SCALES
spec_dec_algo: MTP
accuracy: 95.413
Qwen3/Qwen3-30B-A3B:
- quant_algo: FP8_BLOCK_SCALES
accuracy: 84.36
- quant_algo: FP8
kv_cache_quant_algo: FP8
accuracy: 83.43
- quant_algo: NVFP4
kv_cache_quant_algo: FP8
accuracy: 83.43
Qwen3/Qwen3-235B-A22B:
- quant_algo: FP8
kv_cache_quant_algo: FP8
accuracy: 85.78
- quant_algo: NVFP4
kv_cache_quant_algo: FP8
accuracy: 85.78
nvidia/Llama-3_3-Nemotron-Super-49B-v1:
- accuracy: 92.57
- quant_algo: FP8
kv_cache_quant_algo: FP8
accuracy: 92.42
nvidia/Nemotron-H-8B-Base-8K:
- accuracy: 46.20
- quant_algo: FP8
kv_cache_quant_algo: FP8
accuracy: 85.78
nvidia/Llama-3.1-Nemotron-Nano-8B-v1:
- accuracy: 37.15
- quant_algo: FP8
kv_cache_quant_algo: FP8
accuracy: 28.39
nvidia/Llama-3_1-Nemotron-Ultra-253B-v1:
- accuracy: 94.43
- quant_algo: FP8
kv_cache_quant_algo: FP8
accuracy: 94.16
kanana-1.5-2.1b-instruct-2505:
- accuracy: 75.81
speakleash/Bielik-11B-v2.2-Instruct:
- accuracy: 41.51
- quant_algo: FP8
kv_cache_quant_algo: FP8
accuracy: 40.41