[None][chore] update AD model list (#10981)

Signed-off-by: Tal Cherckez <127761168+tcherckez-nvidia@users.noreply.github.com>
This commit is contained in:
tcherckez-nvidia 2026-01-26 16:49:50 +02:00 committed by GitHub
parent 00f341be49
commit 43b8a5561c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -23,25 +23,27 @@ models:
- name: google/gemma-3-1b-it
yaml_extra: ['dashboard_default.yaml', 'world_size_1.yaml', 'gemma3_1b.yaml']
- name: meta-llama/Llama-3.1-8B-Instruct
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
- name: casperhansen/llama-3-8b-instruct-awq
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
# DISABLED: NOT SUPPORTED - https://github.com/NVIDIA/TensorRT-LLM/issues/10363
# - name: casperhansen/llama-3-8b-instruct-awq
# yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
- name: meta-llama/Llama-3.2-1B-Instruct
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
- name: meta-llama/Llama-3.2-3B-Instruct
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
- name: Qwen/Qwen2.5-1.5B-Instruct
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
- name: Qwen/Qwen2.5-3B-Instruct
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
- name: Qwen/Qwen2.5-7B-Instruct
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
- name: Qwen/Qwen2.5-7B-Instruct-AWQ
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
# DISABLED: NOT SUPPORTED - https://github.com/NVIDIA/TensorRT-LLM/issues/10363
# - name: Qwen/Qwen2.5-7B-Instruct-AWQ
# yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
- name: Qwen/Qwen3-4B
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
- name: Qwen/Qwen3-8B
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
- name: microsoft/phi-4
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
- name: microsoft/Phi-4-reasoning
@ -51,31 +53,33 @@ models:
- name: google/gemma-1.1-7b-it
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
- name: google/gemma-2-2b-it
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
- name: google/gemma-2-9b-it
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
- name: google/codegemma-7b-it
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
- name: mistralai/Mistral-7B-Instruct-v0.2
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
- name: mistralai/Mistral-7B-Instruct-v0.3
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
- name: TheBloke/Mistral-7B-Instruct-v0.2-GPTQ
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
- name: bigcode/starcoder2-7b
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
- name: bigcode/starcoder2-15b
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'attn_backend_triton.yaml']
- name: bigcode/starcoder2-15b-instruct-v0.1
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'attn_backend_triton.yaml']
- name: deepseek-ai/DeepSeek-Prover-V1.5-SFT
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
- name: deepseek-ai/DeepSeek-Prover-V2-7B
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
- name: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
- name: ibm-granite/granite-3.1-2b-instruct
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
- name: ibm-granite/granite-3.1-8b-instruct
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
- name: ibm-granite/granite-3.3-2b-instruct
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
- name: ibm-granite/granite-3.3-8b-instruct
@ -85,31 +89,28 @@ models:
- name: ibm-granite/granite-guardian-3.2-5b
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
- name: meta-llama/CodeLlama-7b-Instruct-hf
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
- name: meta-llama/CodeLlama-7b-Python-hf
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
- name: meta-llama/Llama-2-7b-chat-hf
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
# DISABLED: FakeTensorMode error in unified_attn export
# - name: nvidia/Llama-3.1-8B-Instruct-FP8
# yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
- name: nvidia/Llama-3.1-Minitron-4B-Depth-Base
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
- name: nvidia/Llama-3.1-Minitron-4B-Width-Base
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
- name: nvidia/Llama-3.1-Nemotron-Nano-8B-v1
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
- name: nvidia/Mistral-NeMo-Minitron-8B-Base
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
- name: openai/gpt-oss-20b
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
# DISABLED: Custom op error - append_paged_kv_cache missing Float kernel
# - name: bigcode/starcoder2-15b
# yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
- name: ibm-granite/granite-3.0-8b-instruct
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
- name: mistralai/Ministral-8B-Instruct-2410
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
- name: nvidia/NVIDIA-Nemotron-Nano-9B-v2-FP8
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
- name: nvidia/NVIDIA-Nemotron-Nano-9B-v2-NVFP4
@ -123,56 +124,53 @@ models:
# DISABLED: Network timeout downloading from Hugging Face
# - name: ai21labs/AI21-Jamba-1.5-Mini
# yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
- name: THUDM/glm-4v-9b
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml', 'compile_backend_torch_cudagraph.yaml']
- name: zai-org/glm-4v-9b
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
# - name: zai-org/GLM-4.7
# yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
- name: meta-llama/Llama-3.2-11B-Vision-Instruct
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml', 'compile_backend_torch_cudagraph.yaml']
# DISABLED: Auto-deploy compilation error
# - name: meta-llama/Llama-3.3-70B-Instruct
# yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml', 'llama3_3_70b.yaml']
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
- name: meta-llama/Llama-3.3-70B-Instruct
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml', 'llama3_3_70b.yaml']
- name: meta-llama/CodeLlama-34b-Instruct-hf
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml', 'compile_backend_torch_cudagraph.yaml']
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
- name: meta-llama/Llama-2-13b-chat-hf
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml', 'compile_backend_torch_cudagraph.yaml']
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
- name: microsoft/Phi-3-medium-128k-instruct
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
- name: microsoft/Phi-3-medium-4k-instruct
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
- name: mistralai/Codestral-22B-v0.1
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml', 'compile_backend_torch_cudagraph.yaml']
# DISABLED: Graph transformation error in auto-deploy
# - name: neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8
# yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
- name: neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
- name: Qwen/QwQ-32B
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml', 'compile_backend_torch_cudagraph.yaml']
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
- name: google/gemma-2-27b-it
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml', 'compile_backend_torch_cudagraph.yaml']
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
- name: perplexity-ai/r1-1776-distill-llama-70b
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
- name: nvidia/NVIDIA-Nemotron-Nano-31B-A3-v3
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml', 'nano_v3.yaml']
- name: nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
- name: nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml', 'nano_v3.yaml']
- name: Qwen/QwQ-32B-Preview
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml', 'compile_backend_torch_cudagraph.yaml']
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
- name: Qwen/Qwen3-Coder-30B-A3B-Instruct
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
- name: Qwen/Qwen3-235B-A22B-Instruct-2507
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
- name: ai21labs/AI21-Jamba-1.5-Large
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
# DISABLED: Network timeout downloading from Hugging Face
# - name: ai21labs/AI21-Jamba-1.5-Large
# yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
- name: nvidia/OpenReasoning-Nemotron-32B
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
# DISABLED: Auto-deploy compilation error
# - name: mistralai/Mistral-Large-Instruct-v2.1
# yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
- name: mistralai/Mistral-Large-Instruct-v2.1
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
- name: deepseek-ai/DeepSeek-R1-Distill-Llama-70B
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'compile_backend_torch_cudagraph.yaml']
# DISABLED: Auto-deploy compilation error
# - name: deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
# yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
# DISABLED: Graph transformation error in auto-deploy
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
- name: deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
# DISABLED: stuck in graph capturing
# - name: mistralai/Mixtral-8x22B-Instruct-v0.1
# yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
# DISABLED: FakeTensorMode error in unified_attn export
@ -182,7 +180,7 @@ models:
# - name: nvidia/Llama-3.1-405B-Instruct-FP8
# yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
- name: nvidia/Llama-3.1-Nemotron-70B-Instruct-HF
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'compile_backend_torch_cudagraph.yaml']
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
# DISABLED: Model loading failure - dynamic module registry issue
# - name: nvidia/Llama-3_1-Nemotron-51B-Instruct
# yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml']
@ -198,18 +196,19 @@ models:
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml']
- name: deepseek-ai/DeepSeek-R1
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'deepseek_v3_lite.yaml']
# DISABLED: Auto-deploy compilation error
# DISABLED: Auto-deploy compilation error - shape mismatch - https://github.com/NVIDIA/TensorRT-LLM/issues/10978
# - name: deepseek-ai/DeepSeek-V3
# yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'deepseek_v3_lite.yaml']
# DISABLED: Assertion failure in auto-deploy transform pipeline
# DISABLED: Auto-deploy compilation error - shape mismatch - https://github.com/NVIDIA/TensorRT-LLM/issues/10978
# - name: deepseek-ai/DeepSeek-Coder-V2-Instruct
# yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
- name: Qwen/Qwen3-VL-8B-Instruct
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
- name: Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml', 'compile_backend_torch_cudagraph.yaml']
# DISABLED: NOT SUPPORTED - https://github.com/NVIDIA/TensorRT-LLM/issues/10363
# - name: Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4
# yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml']
- name: codellama/CodeLlama-70b-Instruct-hf
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'compile_backend_torch_cudagraph.yaml']
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
- name: meta-llama/Llama-3.2-90B-Vision-Instruct
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml']
- name: openai/gpt-oss-120b