TensorRT-LLMs/tests/integration/defs/accuracy/references/gsm8k.yaml

meta-llama/Llama-3.1-8B-Instruct:
  - accuracy: 74.20
  - spec_dec_algo: NGram
    accuracy: 74.20
  - spec_dec_algo: Eagle
    accuracy: 74.20
  - quant_algo: FP8
    accuracy: 74.30
  - quant_algo: FP8
    kv_cache_quant_algo: FP8
    accuracy: 72.85
  - quant_algo: FP8
    kv_cache_quant_algo: NVFP4
    accuracy: 69.75
meta-llama/Llama-3.3-70B-Instruct:
  - accuracy: 83.78
  - quant_algo: NVFP4
    kv_cache_quant_algo: FP8
    accuracy: 87.33
  - quant_algo: FP8
    kv_cache_quant_algo: FP8
    accuracy: 90.30
  - quant_algo: FP8
    accuracy: 90.30
meta-llama/Llama-4-Maverick-17B-128E-Instruct:
  - accuracy: 92.20
  - quant_algo: FP8
    kv_cache_quant_algo: FP8
    accuracy: 92.20
  - quant_algo: FP8
    kv_cache_quant_algo: FP8
    spec_dec_algo: Eagle
    accuracy: 92.20
meta-llama/Llama-4-Scout-17B-16E-Instruct:
  - accuracy: 89.70
  - quant_algo: NVFP4
    kv_cache_quant_algo: FP8
    accuracy: 88.61
  - quant_algo: FP8
    kv_cache_quant_algo: FP8
    accuracy: 89.45
deepseek-ai/DeepSeek-V3-Lite:
  - accuracy: 64.74
  - quant_algo: NVFP4
    accuracy: 63.71
  - quant_algo: NVFP4
    kv_cache_quant_algo: FP8
    accuracy: 63.71
  - quant_algo: NVFP4
    spec_dec_algo: MTP
    accuracy: 63.71
  - quant_algo: NVFP4
    kv_cache_quant_algo: FP8
    spec_dec_algo: MTP
    accuracy: 63.71
  - quant_algo: FP8_BLOCK_SCALES
    accuracy: 64.74
  - quant_algo: FP8_BLOCK_SCALES
    kv_cache_quant_algo: FP8
    accuracy: 64.74
  - spec_dec_algo: MTP
    accuracy: 64.44
  - spec_dec_algo: MTP
    kv_cache_quant_algo: FP8
    accuracy: 64.44
  - quant_algo: FP8_BLOCK_SCALES
    spec_dec_algo: MTP
    accuracy: 64.14
  - quant_algo: FP8_BLOCK_SCALES
    kv_cache_quant_algo: FP8
    spec_dec_algo: MTP
    accuracy: 64.14
deepseek-ai/DeepSeek-R1:
  - quant_algo: NVFP4
    accuracy: 95.42
  - quant_algo: NVFP4
    spec_dec_algo: MTP
    accuracy: 95.42
  - quant_algo: NVFP4
    kv_cache_quant_algo: FP8
    accuracy: 95.42
  - quant_algo: NVFP4
    kv_cache_quant_algo: FP8
    spec_dec_algo: MTP
    accuracy: 95.42
  - quant_algo: FP8_BLOCK_SCALES
    accuracy: 95.413
  - quant_algo: FP8_BLOCK_SCALES
    spec_dec_algo: MTP
    accuracy: 95.413
  - quant_algo: FP8_BLOCK_SCALES
    kv_cache_quant_algo: FP8
    accuracy: 95.413
  - quant_algo: FP8_BLOCK_SCALES
    kv_cache_quant_algo: FP8
    spec_dec_algo: MTP
    accuracy: 95.413
deepseek-ai/DeepSeek-V3.2-Exp:
  - quant_algo: FP8_BLOCK_SCALES
    accuracy: 95.6
  - quant_algo: FP8_BLOCK_SCALES
    spec_dec_algo: MTP
    accuracy: 95.6
  - quant_algo: NVFP4
    accuracy: 95.6
  - quant_algo: NVFP4
    spec_dec_algo: MTP
    accuracy: 95.6
Qwen3/Qwen3-4B:
  - spec_dec_algo: Eagle
    accuracy: 85.823
Qwen3/Qwen3-8B:
  - accuracy: 87.1114
  - spec_dec_algo: Eagle
    accuracy: 87.1114
  - quant_algo: FP8
    kv_cache_quant_algo: FP8
    accuracy: 87.1114
Qwen3/Qwen3-30B-A3B:
  - accuracy: 83.43
  - quant_algo: FP8_BLOCK_SCALES
    accuracy: 84.36
  - quant_algo: FP8
    kv_cache_quant_algo: FP8
    accuracy: 83.43
  - quant_algo: NVFP4
    kv_cache_quant_algo: FP8
    accuracy: 83.43
  - spec_dec_algo: Eagle
    accuracy: 83.43
Qwen3/Qwen3-235B-A22B:
  - quant_algo: FP8
    kv_cache_quant_algo: FP8
    accuracy: 85.78
  - quant_algo: NVFP4
    kv_cache_quant_algo: FP8
    accuracy: 85.78
  - spec_dec_algo: Eagle
    quant_algo: NVFP4
    kv_cache_quant_algo: FP8
    accuracy: 85.78
Qwen3/Qwen3-Next-80B-A3B-Thinking:
  - accuracy: 81.577
Qwen3/Qwen3-Next-80B-A3B-Instruct:
  - accuracy: 92.72
  - quant_algo: NVFP4
    kv_cache_quant_algo: FP8
    accuracy: 90.86
moonshotai/Kimi-K2-Instruct:
  - quant_algo: FP8_BLOCK_SCALES
    accuracy: 94.84
moonshotai/Kimi-K2-Thinking:
  - quant_algo: NVFP4
    kv_cache_quant_algo: FP8
    accuracy: 90.84
  - quant_algo: NVFP4
    accuracy: 90.84
nvidia/Llama-3_3-Nemotron-Super-49B-v1:
  - accuracy: 92.57
  - quant_algo: FP8
    kv_cache_quant_algo: FP8
    accuracy: 92.42
nvidia/Nemotron-H-8B-Base-8K:
  - accuracy: 46.20
  - quant_algo: FP8
    kv_cache_quant_algo: FP8
    accuracy: 85.78
nvidia/Nemotron-H-47B-Base-8K:
  - accuracy: 88.82
  - quant_algo: FP8
    kv_cache_quant_algo: FP8
    accuracy: 88.55
nvidia/Nemotron-H-56B-Base-8K:
  - accuracy: 89.27
  - quant_algo: FP8
    kv_cache_quant_algo: FP8
    accuracy: 89.27
nvidia/Nemotron-MOE:
  - accuracy: 88.249
  - quant_algo: FP8
    kv_cache_quant_algo: FP8
    accuracy: 86.884
nvidia/Llama-3.1-Nemotron-Nano-8B-v1:
  - accuracy: 37.15
  - quant_algo: FP8
    kv_cache_quant_algo: FP8
    accuracy: 28.39
nvidia/Llama-3_1-Nemotron-Ultra-253B-v1:
  - accuracy: 94.43
  - quant_algo: FP8
    kv_cache_quant_algo: FP8
    accuracy: 94.16
kanana-1.5-2.1b-instruct-2505:
  - accuracy: 75.81
speakleash/Bielik-11B-v2.2-Instruct:
  - accuracy: 41.51
  - quant_algo: FP8
    kv_cache_quant_algo: FP8
    accuracy: 40.41
google/gemma-3-1b-it:
  - accuracy: 25.52 # score getting from lm-eval with HF implementation
  - quant_algo: FP8
    kv_cache_quant_algo: FP8
    accuracy: 23.96
google/gemma-3-27b-it:
  - accuracy: 91.66
  - quant_algo: FP8
    kv_cache_quant_algo: FP8
    accuracy: 90.66
mistralai/Ministral-8B-Instruct-2410:
  - accuracy: 79.25
  - quant_algo: FP8
    kv_cache_quant_algo: FP8
    accuracy: 78.35
mistralai/Mistral-Small-3.1-24B-Instruct-2503:
  - accuracy: 89.23
  - quant_algo: FP8
    kv_cache_quant_algo: FP8
    accuracy: 89.23
microsoft/Phi-4-multimodal-instruct:
  - accuracy: 81.19
  - quant_algo: FP8
    accuracy: 80.82
  - quant_algo: NVFP4
    accuracy: 69.33
microsoft/Phi-4-multimodal-instruct-long-rope:
  - accuracy: 75.85
microsoft/Phi-4-mini-instruct:
  - accuracy: 82.30
microsoft/phi-4:
  - accuracy: 90.30
  - quant_algo: FP8
    kv_cache_quant_algo: FP8
    accuracy: 90.64
mistralai/Codestral-22B-v0.1:
  - accuracy: 67.10
GPT-OSS/BF16:
  - accuracy: 90.3
  - kv_cache_quant_algo: FP8
    accuracy: 90.3
GPT-OSS/120B-MXFP4:
  - accuracy: 90.3
  - spec_dec_algo: Eagle
    accuracy: 90.3
  - quant_algo: W4A8_MXFP4_MXFP8
    accuracy: 90.3
  - quant_algo: W4A8_MXFP4_MXFP8
    spec_dec_algo: Eagle
    accuracy: 90.3
  - quant_algo: W4A8_MXFP4_MXFP8
    kv_cache_quant_algo: FP8
    accuracy: 90.3
  - quant_algo: W4A8_MXFP4_FP8
    accuracy: 90.3
  - quant_algo: W4A8_MXFP4_FP8
    kv_cache_quant_algo: FP8
    accuracy: 90.3
  - quant_algo: W4A16_MXFP4
    accuracy: 90.3
  - quant_algo: W4A16_MXFP4
    spec_dec_algo: Eagle
    accuracy: 90.3
  - quant_algo: W4A16_MXFP4
    kv_cache_quant_algo: FP8
    accuracy: 90.3
GPT-OSS/20B-MXFP4:
  - accuracy: 85.0
  - quant_algo: W4A8_MXFP4_MXFP8
    accuracy: 85.0
  - quant_algo: W4A8_MXFP4_MXFP8
    kv_cache_quant_algo: FP8
    accuracy: 85.0
  - quant_algo: W4A16_MXFP4
    accuracy: 85.0
  - quant_algo: W4A16_MXFP4
    kv_cache_quant_algo: FP8
    accuracy: 85.0
GPT-OSS/20B-NVFP4:
  - accuracy: 85.0
  - quant_algo: NVFP4
    accuracy: 85.0
  - quant_algo: NVFP4
    kv_cache_quant_algo: FP8
    accuracy: 85.0
LGAI-EXAONE/EXAONE-4.0-32B:
  - accuracy: 88.36
ByteDance-Seed/Seed-OSS-36B-Instruct:
  - accuracy: 90.8
zai-org/GLM-4.6:
  - accuracy: 81.3
  - spec_dec_algo: MTP
    accuracy: 81.3
  - quant_algo: NVFP4
    spec_dec_algo: MTP
    accuracy: 88.0
bigcode/starcoder2-3b:
  - accuracy: 20.2
bigcode/starcoder2-7b:
  - accuracy: 26.5
bigcode/starcoder2-15b:
  - accuracy: 54.5
mistral/Mistral-Large-3-675B:
  - accuracy: 86.1
  - spec_dec_algo: Eagle
    accuracy: 86.1
nvidia/Nemotron-Super-V3:
  - accuracy: 83.74
  - quant_algo: NVFP4
    kv_cache_quant_algo: FP8
    accuracy: 80.85
nvidia/Nemotron-3-Nano:
  - accuracy: 69.37
  - quant_algo: FP8
    kv_cache_quant_algo: FP8
    accuracy: 68.73