[None][chore] update model list (#11364)

Signed-off-by: Tal Cherckez <127761168+tcherckez-nvidia@users.noreply.github.com>
2026-02-16 15:55:08 +08:00 · 2026-02-09 21:27:39 +02:00 · 2026-02-09 21:27:39 +02:00 · ea81a03dd1
commit ea81a03dd1
parent 4a743338c3
3 changed files with 29 additions and 22 deletions
--- a/examples/auto_deploy/model_registry/configs/num_hidden_layers_5.yaml
+++ b/examples/auto_deploy/model_registry/configs/num_hidden_layers_5.yaml
@ -1,4 +1,4 @@
 # Configuration for DeepSeek V3 and R1 with reduced layers
 # Full models are too large, so we test with limited layers
 model_kwargs:
-  num_hidden_layers: 10
+  num_hidden_layers: 5
--- a/examples/auto_deploy/model_registry/configs/qwen3_vl.yaml
+++ b/examples/auto_deploy/model_registry/configs/qwen3_vl.yaml
@ -0,0 +1,5 @@
+# Configuration for Qwen3-VL models
+# Forces consistent dtype to avoid BFloat16/Float32 mismatch
+
+model_kwargs:
+  torch_dtype: bfloat16
--- a/examples/auto_deploy/model_registry/models.yaml
+++ b/examples/auto_deploy/model_registry/models.yaml
@ -18,8 +18,9 @@ models:
 # DISABLED: TorchDynamo compilation error - fake tensor dispatch failure
 # - name: apple/OpenELM-3B-Instruct
 #   yaml_extra: ['dashboard_default.yaml', 'world_size_1.yaml', 'openelm.yaml']
- name: microsoft/Phi-4-mini-instruct
-  yaml_extra: ['dashboard_default.yaml', 'world_size_1.yaml']
+# DISABLED: model not supporting installed transformers version - https://github.com/NVIDIA/TensorRT-LLM/issues/10980
+# - name: microsoft/Phi-4-mini-instruct
+#   yaml_extra: ['dashboard_default.yaml', 'world_size_1.yaml']
 - name: microsoft/Phi-4-mini-reasoning
  yaml_extra: ['dashboard_default.yaml', 'world_size_1.yaml']
 - name: google/gemma-3-1b-it
@ -115,8 +116,9 @@ models:
  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
 - name: nvidia/NVIDIA-Nemotron-Nano-9B-v2-FP8
  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
- name: nvidia/NVIDIA-Nemotron-Nano-9B-v2-NVFP4
-  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
+# DISABLED: NVFP4 quantization not supported for pre BLW - CW has only Hopper
+# - name: nvidia/NVIDIA-Nemotron-Nano-9B-v2-NVFP4
+#   yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
 - name: nvidia/NVIDIA-Nemotron-Nano-12B-v2-VL-FP8
  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
 - name: google/gemma-3-27b-it
@ -126,10 +128,6 @@ models:
 # DISABLED: Network timeout downloading from Hugging Face
 # - name: ai21labs/AI21-Jamba-1.5-Mini
 #   yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
- name: zai-org/glm-4v-9b
-  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
-# - name: zai-org/GLM-4.7
-#   yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
 - name: meta-llama/Llama-3.2-11B-Vision-Instruct
  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
 - name: meta-llama/Llama-3.3-70B-Instruct
@ -166,8 +164,8 @@ models:
  yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
 - name: nvidia/OpenReasoning-Nemotron-32B
  yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
- name: mistralai/Mistral-Large-Instruct-v2.1
-  yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
+- name: mistralai/Mistral-Large-Instruct-2407
+  yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
 - name: deepseek-ai/DeepSeek-R1-Distill-Llama-70B
  yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
 - name: deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
@ -186,26 +184,30 @@ models:
 # DISABLED: Model loading failure - dynamic module registry issue
 # - name: nvidia/Llama-3_1-Nemotron-51B-Instruct
 #   yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml']
- name: nvidia/Llama-3_1-Nemotron-Ultra-253B-v1
-  yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml']
- name: nvidia/Llama-3_1-Nemotron-Ultra-253B-v1-FP8
-  yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml']
- name: nvidia/Llama-3_3-Nemotron-Super-49B-v1
-  yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml']
+# DISABLED: model not supporting installed transformers version - https://github.com/NVIDIA/TensorRT-LLM/issues/10980
+# - name: nvidia/Llama-3_1-Nemotron-Ultra-253B-v1
+#   yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml']
+# DISABLED: model not supporting installed transformers version - https://github.com/NVIDIA/TensorRT-LLM/issues/10980
+# - name: nvidia/Llama-3_1-Nemotron-Ultra-253B-v1-FP8
+#   yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml']
+# DISABLED: model not supporting installed transformers version - https://github.com/NVIDIA/TensorRT-LLM/issues/10980
+# - name: nvidia/Llama-3_3-Nemotron-Super-49B-v1
+#   yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml']
 - name: Qwen/Qwen3-30B-A3B
  yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml']
 - name: Qwen/Qwen3-235B-A22B
  yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml']
- name: deepseek-ai/DeepSeek-R1
-  yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'deepseek_v3_lite.yaml']
+# DISABLED: Auto-deploy compilation error - shape mismatch - https://github.com/NVIDIA/TensorRT-LLM/issues/10978
+# - name: deepseek-ai/DeepSeek-R1
+#   yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'num_hidden_layers_5.yaml']
 # DISABLED: Auto-deploy compilation error - shape mismatch - https://github.com/NVIDIA/TensorRT-LLM/issues/10978
 # - name: deepseek-ai/DeepSeek-V3
-#   yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'deepseek_v3_lite.yaml']
+#   yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'num_hidden_layers_5.yaml']
 # DISABLED: Auto-deploy compilation error - shape mismatch - https://github.com/NVIDIA/TensorRT-LLM/issues/10978
 # - name: deepseek-ai/DeepSeek-Coder-V2-Instruct
 #   yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
 - name: Qwen/Qwen3-VL-8B-Instruct
-  yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
+  yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml', 'qwen3_vl.yaml']
 # DISABLED: NOT SUPPORTED - https://github.com/NVIDIA/TensorRT-LLM/issues/10363
 # - name: Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4
 #   yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml']
@ -214,7 +216,7 @@ models:
 - name: meta-llama/Llama-3.2-90B-Vision-Instruct
  yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml']
 - name: openai/gpt-oss-120b
-  yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
+  yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'num_hidden_layers_5.yaml']
 - name: meta-llama/Llama-4-Scout-17B-16E-Instruct
  yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml', 'llama4_scout.yaml']
 - name: meta-llama/Llama-4-Maverick-17B-128E-Instruct