meta-llama/Llama-2-7b-hf: - accuracy: 46.69 - quant_algo: FP8 kv_cache_quant_algo: FP8 accuracy: 46.76 meta-llama/Meta-Llama-3-8B-Instruct: - accuracy: 67.74 - quant_algo: NVFP4 kv_cache_quant_algo: FP8 accuracy: 63.47 meta-llama/Llama-3.1-8B: - accuracy: 66.06 - quant_algo: NVFP4 kv_cache_quant_algo: FP8 accuracy: 63.16 - quant_algo: FP8_PER_CHANNEL_PER_TOKEN accuracy: 65.55 - quant_algo: MIXED_PRECISION extra_acc_spec: autoq_format=int4_awq,fp8,w4a8_awq;auto_quantize_bits=5.8 accuracy: 64.99 meta-llama/Llama-3.1-8B-Instruct: - accuracy: 68.17 - spec_dec_algo: EAGLE3 accuracy: 68.20 - spec_dec_algo: NGRAM accuracy: 68.17 - quant_algo: FP8 accuracy: 67.93 - quant_algo: FP8 extra_acc_spec: temperature=0.8,top_p=0.95 accuracy: 64.62 - quant_algo: FP8 kv_cache_quant_algo: FP8 accuracy: 67.87 meta-llama/Llama-3.2-1B: - quant_algo: W8A8_SQ_PER_CHANNEL_PER_TOKEN_PLUGIN accuracy: 32.72 - quant_algo: W8A8_SQ_PER_CHANNEL accuracy: 32.07 - quant_algo: W4A16_AWQ accuracy: 30.56 - quant_algo: W4A16_AWQ kv_cache_quant_algo: INT8 accuracy: 31.29 - quant_algo: FP8 kv_cache_quant_algo: FP8 accuracy: 31.02 - quant_algo: FP8_PER_CHANNEL_PER_TOKEN accuracy: 33.97 - quant_algo: FP8_PER_CHANNEL_PER_TOKEN extra_acc_spec: meta_recipe accuracy: 33.87 - extra_acc_spec: max_attention_window_size=960 accuracy: 32.82 meta-llama/Llama-3.2-3B: - accuracy: 57.92 - quant_algo: FP8 kv_cache_quant_algo: FP8 accuracy: 60.60 meta-llama/Llama-3.3-70B-Instruct: - accuracy: 81.31 - quant_algo: NVFP4 kv_cache_quant_algo: FP8 accuracy: 79.31 - quant_algo: FP8 accuracy: 81.02 meta-llama/Llama-4-Maverick-17B-128E-Instruct: - accuracy: 86.40 - quant_algo: FP8 kv_cache_quant_algo: FP8 accuracy: 86.45 meta-llama/Llama-4-Scout-17B-16E-Instruct: - accuracy: 80.00 - quant_algo: FP8 kv_cache_quant_algo: FP8 accuracy: 80.00 - quant_algo: NVFP4 kv_cache_quant_algo: FP8 accuracy: 80.00 mistralai/Mistral-7B-v0.1: - accuracy: 66 mistralai/Mistral-7B-Instruct-v0.3: - quant_algo: W4A16 accuracy: 59.23 - quant_algo: W4A16_AWQ accuracy: 61.06 - quant_algo: W4A8_AWQ accuracy: 60.04 mistralai/Mixtral-8x7B-v0.1: - accuracy: 71.35 - quant_algo: FP8 kv_cache_quant_algo: FP8 accuracy: 71.27 - quant_algo: NVFP4 kv_cache_quant_algo: FP8 accuracy: 69.64 mistralai/Mixtral-8x7B-Instruct-v0.1: - accuracy: 68.0 mistralai/Mixtral-8x22B-v0.1: - quant_algo: FP8 accuracy: 77.63 google/gemma-2-9b-it: - accuracy: 73.05 Qwen/Qwen2-0.5B-Instruct: - accuracy: 45.30 - quant_algo: FP8 accuracy: 45.03 Qwen/Qwen2.5-0.5B-Instruct: - accuracy: 49.59 - quant_algo: FP8 accuracy: 48.59 Qwen/Qwen2.5-1.5B-Instruct: - accuracy: 61.45 - quant_algo: FP8 accuracy: 61.43 Qwen/Qwen2.5-7B-Instruct: - accuracy: 75.32 - quant_algo: FP8 accuracy: 75.32 - quant_algo: FP8 kv_cache_quant_algo: FP8 accuracy: 75.32 deepseek-ai/DeepSeek-V3-Lite: - accuracy: 71.40 - quant_algo: NVFP4 accuracy: 70.60 - quant_algo: NVFP4 spec_dec_algo: MTP accuracy: 70.60 - quant_algo: FP8_BLOCK_SCALES accuracy: 71.27 - spec_dec_algo: MTP accuracy: 71.39 - quant_algo: FP8_BLOCK_SCALES spec_dec_algo: MTP accuracy: 71.29 deepseek-ai/DeepSeek-R1: - quant_algo: NVFP4 accuracy: 87.33 - quant_algo: NVFP4 spec_dec_algo: MTP accuracy: 87.33 - quant_algo: NVFP4 kv_cache_quant_algo: FP8 accuracy: 87.33 - quant_algo: FP8_BLOCK_SCALES accuracy: 87.573 - quant_algo: FP8_BLOCK_SCALES spec_dec_algo: MTP accuracy: 87.573 Qwen3/Qwen3-8B: - quant_algo: FP8_BLOCK_SCALES accuracy: 76.12 Qwen3/Qwen3-30B-A3B: - quant_algo: FP8_BLOCK_SCALES accuracy: 79.53 - quant_algo: FP8 kv_cache_quant_algo: FP8 accuracy: 80.65 - quant_algo: NVFP4 kv_cache_quant_algo: FP8 accuracy: 80.65 Qwen3/Qwen3-235B-A22B: - quant_algo: FP8 kv_cache_quant_algo: FP8 accuracy: 86 - quant_algo: NVFP4 kv_cache_quant_algo: FP8 accuracy: 86 nvidia/Llama-3_3-Nemotron-Super-49B-v1: - accuracy: 79.43 - quant_algo: FP8 kv_cache_quant_algo: FP8 accuracy: 79.26 nvidia/Llama-3.1-Nemotron-Nano-8B-v1: - accuracy: 57.97 - quant_algo: FP8 kv_cache_quant_algo: FP8 accuracy: 57.12 nvidia/Nemotron-H-8B-Base-8K: - accuracy: 69.590 - quant_algo: FP8 kv_cache_quant_algo: FP8 accuracy: 69.180 microsoft/Phi-4-mini-instruct: - accuracy: 68.98 nvidia/Llama-3_1-Nemotron-Ultra-253B-v1: - accuracy: 83.70 - quant_algo: FP8 kv_cache_quant_algo: FP8 accuracy: 83.36 kanana-1.5-2.1b-instruct-2505: - accuracy: 56.89 speakleash/Bielik-11B-v2.2-Instruct: - accuracy: 64.47 - quant_algo: FP8 kv_cache_quant_algo: FP8 accuracy: 64.36