TensorRT-LLMs/tests/integration/defs/accuracy/references/mmmu.yaml
JennyLiu 6506d63466
[None][test] Add DGX-Spark VLM gemm3-12b bfp16/fp4/fp8 accuracy and perf cases (#11096)
Signed-off-by: Jenny Liu <JennyLiu-nv+JennyLiu@users.noreply.github.com>
Co-authored-by: Jenny Liu <JennyLiu-nv+JennyLiu@users.noreply.github.com>
2026-01-30 00:38:19 -05:00

49 lines
1.3 KiB
YAML

google/gemma-3-27b-it:
- accuracy: 52.0
- quant_algo: FP8
kv_cache_quant_algo: FP8
accuracy: 50.0
- quant_algo: NVFP4
kv_cache_quant_algo: FP8
accuracy: 48.0
google/gemma-3-12b-it:
- accuracy: 50.44
- quant_algo: FP8
kv_cache_quant_algo: FP8
accuracy: 49.0
- quant_algo: NVFP4
kv_cache_quant_algo: FP8
accuracy: 50.11
Qwen/Qwen2-VL-7B-Instruct:
- accuracy: 48.44
Qwen/Qwen2.5-VL-7B-Instruct:
- accuracy: 51.22
- quant_algo: FP8
accuracy: 45.44
- quant_algo: NVFP4
accuracy: 40.67
nvidia/Nano-v2-VLM:
- accuracy: 43.78
llava-hf/llava-v1.6-mistral-7b-hf:
- accuracy: 35.33
Efficient-Large-Model/NVILA-8B:
- accuracy: 47.77
Efficient-Large-Model/VILA1.5-3b:
- accuracy: 32.33
# MMMU for Nemotron-Nano-12B-v2-VL-BF16 requires reasoning on.
# While enabling reasoning for current test harness is not supported,
# the metric here is for model sanity checking.
nvidia/NVIDIA-Nemotron-Nano-12B-v2-VL-BF16:
- accuracy: 26.67
microsoft/Phi-4-multimodal-instruct:
- accuracy: 53.67
Qwen/Qwen3-VL-30B-A3B-Instruct:
- accuracy: 55.33
mistral/Mistral-Large-3-675B:
# Mistral Large 3 675B only supports single image input, so accuracy is lower.
- accuracy: 47
Qwen/Qwen3-VL-8B-Instruct:
- accuracy: 55.11
mistralai/Mistral-Small-3.1-24B-Instruct-2503:
- accuracy: 57.0