[None][chore] update model list (#11364)

Signed-off-by: Tal Cherckez <127761168+tcherckez-nvidia@users.noreply.github.com>
This commit is contained in:
tcherckez-nvidia 2026-02-09 21:27:39 +02:00 committed by GitHub
parent 4a743338c3
commit ea81a03dd1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 29 additions and 22 deletions

View File

@ -1,4 +1,4 @@
# Configuration for DeepSeek V3 and R1 with reduced layers
# Full models are too large, so we test with limited layers
model_kwargs:
num_hidden_layers: 10
num_hidden_layers: 5

View File

@ -0,0 +1,5 @@
# Configuration for Qwen3-VL models
# Forces consistent dtype to avoid BFloat16/Float32 mismatch
model_kwargs:
torch_dtype: bfloat16

View File

@ -18,8 +18,9 @@ models:
# DISABLED: TorchDynamo compilation error - fake tensor dispatch failure
# - name: apple/OpenELM-3B-Instruct
# yaml_extra: ['dashboard_default.yaml', 'world_size_1.yaml', 'openelm.yaml']
- name: microsoft/Phi-4-mini-instruct
yaml_extra: ['dashboard_default.yaml', 'world_size_1.yaml']
# DISABLED: model not supporting installed transformers version - https://github.com/NVIDIA/TensorRT-LLM/issues/10980
# - name: microsoft/Phi-4-mini-instruct
# yaml_extra: ['dashboard_default.yaml', 'world_size_1.yaml']
- name: microsoft/Phi-4-mini-reasoning
yaml_extra: ['dashboard_default.yaml', 'world_size_1.yaml']
- name: google/gemma-3-1b-it
@ -115,8 +116,9 @@ models:
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
- name: nvidia/NVIDIA-Nemotron-Nano-9B-v2-FP8
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
- name: nvidia/NVIDIA-Nemotron-Nano-9B-v2-NVFP4
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
# DISABLED: NVFP4 quantization not supported for pre BLW - CW has only Hopper
# - name: nvidia/NVIDIA-Nemotron-Nano-9B-v2-NVFP4
# yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
- name: nvidia/NVIDIA-Nemotron-Nano-12B-v2-VL-FP8
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
- name: google/gemma-3-27b-it
@ -126,10 +128,6 @@ models:
# DISABLED: Network timeout downloading from Hugging Face
# - name: ai21labs/AI21-Jamba-1.5-Mini
# yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
- name: zai-org/glm-4v-9b
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
# - name: zai-org/GLM-4.7
# yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
- name: meta-llama/Llama-3.2-11B-Vision-Instruct
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
- name: meta-llama/Llama-3.3-70B-Instruct
@ -166,8 +164,8 @@ models:
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
- name: nvidia/OpenReasoning-Nemotron-32B
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
- name: mistralai/Mistral-Large-Instruct-v2.1
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
- name: mistralai/Mistral-Large-Instruct-2407
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
- name: deepseek-ai/DeepSeek-R1-Distill-Llama-70B
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
- name: deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
@ -186,26 +184,30 @@ models:
# DISABLED: Model loading failure - dynamic module registry issue
# - name: nvidia/Llama-3_1-Nemotron-51B-Instruct
# yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml']
- name: nvidia/Llama-3_1-Nemotron-Ultra-253B-v1
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml']
- name: nvidia/Llama-3_1-Nemotron-Ultra-253B-v1-FP8
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml']
- name: nvidia/Llama-3_3-Nemotron-Super-49B-v1
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml']
# DISABLED: model not supporting installed transformers version - https://github.com/NVIDIA/TensorRT-LLM/issues/10980
# - name: nvidia/Llama-3_1-Nemotron-Ultra-253B-v1
# yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml']
# DISABLED: model not supporting installed transformers version - https://github.com/NVIDIA/TensorRT-LLM/issues/10980
# - name: nvidia/Llama-3_1-Nemotron-Ultra-253B-v1-FP8
# yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml']
# DISABLED: model not supporting installed transformers version - https://github.com/NVIDIA/TensorRT-LLM/issues/10980
# - name: nvidia/Llama-3_3-Nemotron-Super-49B-v1
# yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml']
- name: Qwen/Qwen3-30B-A3B
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml']
- name: Qwen/Qwen3-235B-A22B
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml']
- name: deepseek-ai/DeepSeek-R1
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'deepseek_v3_lite.yaml']
# DISABLED: Auto-deploy compilation error - shape mismatch - https://github.com/NVIDIA/TensorRT-LLM/issues/10978
# - name: deepseek-ai/DeepSeek-R1
# yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'num_hidden_layers_5.yaml']
# DISABLED: Auto-deploy compilation error - shape mismatch - https://github.com/NVIDIA/TensorRT-LLM/issues/10978
# - name: deepseek-ai/DeepSeek-V3
# yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'deepseek_v3_lite.yaml']
# yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'num_hidden_layers_5.yaml']
# DISABLED: Auto-deploy compilation error - shape mismatch - https://github.com/NVIDIA/TensorRT-LLM/issues/10978
# - name: deepseek-ai/DeepSeek-Coder-V2-Instruct
# yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
- name: Qwen/Qwen3-VL-8B-Instruct
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml', 'qwen3_vl.yaml']
# DISABLED: NOT SUPPORTED - https://github.com/NVIDIA/TensorRT-LLM/issues/10363
# - name: Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4
# yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml']
@ -214,7 +216,7 @@ models:
- name: meta-llama/Llama-3.2-90B-Vision-Instruct
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml']
- name: openai/gpt-oss-120b
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'num_hidden_layers_5.yaml']
- name: meta-llama/Llama-4-Scout-17B-16E-Instruct
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml', 'llama4_scout.yaml']
- name: meta-llama/Llama-4-Maverick-17B-128E-Instruct