mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
29 lines
811 B
YAML
29 lines
811 B
YAML
google/gemma-3-27b-it:
|
|
- accuracy: 52.0
|
|
- quant_algo: FP8
|
|
kv_cache_quant_algo: FP8
|
|
accuracy: 50.0
|
|
Qwen/Qwen2-VL-7B-Instruct:
|
|
- accuracy: 48.44
|
|
Qwen/Qwen2.5-VL-7B-Instruct:
|
|
- accuracy: 51.22
|
|
nvidia/Nano-v2-VLM:
|
|
- accuracy: 43.78
|
|
llava-hf/llava-v1.6-mistral-7b-hf:
|
|
- accuracy: 35.33
|
|
Efficient-Large-Model/NVILA-8B:
|
|
- accuracy: 47.77
|
|
Efficient-Large-Model/VILA1.5-3b:
|
|
- accuracy: 32.33
|
|
# MMMU for Nemotron-Nano-12B-v2-VL-BF16 requires reasoning on.
|
|
# While enabling reasoning for current test harness is not supported,
|
|
# the metric here is for model sanity checking.
|
|
nvidia/NVIDIA-Nemotron-Nano-12B-v2-VL-BF16:
|
|
- accuracy: 26.67
|
|
microsoft/Phi-4-multimodal-instruct:
|
|
- accuracy: 53.67
|
|
Qwen/Qwen3-VL-30B-A3B-Instruct:
|
|
- accuracy: 55.33
|
|
mistral/Mistral-Large-3-675B:
|
|
- accuracy: 60.00
|