mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-02-04 18:21:52 +08:00
22 lines
499 B
YAML
22 lines
499 B
YAML
meta-llama/Llama-3.1-8B-Instruct:
|
|
- accuracy: 74.00
|
|
- spec_dec_algo: Eagle
|
|
accuracy: 74.00
|
|
- spec_dec_algo: NGram
|
|
accuracy: 74.00
|
|
deepseek-ai/DeepSeek-V3-Lite:
|
|
- accuracy: 77.00
|
|
- spec_dec_algo: MTP
|
|
accuracy: 77.00
|
|
google/gemma-3-1b-it:
|
|
- quant_algo: FP8
|
|
kv_cache_quant_algo: FP8
|
|
accuracy: 61.00
|
|
GPT-OSS/120B-MXFP4:
|
|
- quant_algo: W4A16_MXFP4
|
|
spec_dec_algo: Eagle
|
|
accuracy: 62.00
|
|
- quant_algo: W4A8_MXFP4_MXFP8
|
|
spec_dec_algo: Eagle
|
|
accuracy: 62.00
|