From ea81a03dd16422d263a2c7ed751d133bb772c7b3 Mon Sep 17 00:00:00 2001
From: tcherckez-nvidia <127761168+tcherckez-nvidia@users.noreply.github.com>
Date: Mon, 9 Feb 2026 21:27:39 +0200
Subject: [PATCH] [None][chore] update model list (#11364)

Signed-off-by: Tal Cherckez <127761168+tcherckez-nvidia@users.noreply.github.com>
---
 ..._v3_lite.yaml => num_hidden_layers_5.yaml} |  2 +-
 .../model_registry/configs/qwen3_vl.yaml      |  5 +++
 .../auto_deploy/model_registry/models.yaml    | 44 ++++++++++---------
 3 files changed, 29 insertions(+), 22 deletions(-)
 rename examples/auto_deploy/model_registry/configs/{deepseek_v3_lite.yaml => num_hidden_layers_5.yaml} (84%)
 create mode 100644 examples/auto_deploy/model_registry/configs/qwen3_vl.yaml

diff --git a/examples/auto_deploy/model_registry/configs/deepseek_v3_lite.yaml b/examples/auto_deploy/model_registry/configs/num_hidden_layers_5.yaml
similarity index 84%
rename from examples/auto_deploy/model_registry/configs/deepseek_v3_lite.yaml
rename to examples/auto_deploy/model_registry/configs/num_hidden_layers_5.yaml
index 8475097ba2..0d8d094673 100644
--- a/examples/auto_deploy/model_registry/configs/deepseek_v3_lite.yaml
+++ b/examples/auto_deploy/model_registry/configs/num_hidden_layers_5.yaml
@@ -1,4 +1,4 @@
 # Configuration for DeepSeek V3 and R1 with reduced layers
 # Full models are too large, so we test with limited layers
 model_kwargs:
-  num_hidden_layers: 10
+  num_hidden_layers: 5
diff --git a/examples/auto_deploy/model_registry/configs/qwen3_vl.yaml b/examples/auto_deploy/model_registry/configs/qwen3_vl.yaml
new file mode 100644
index 0000000000..92cbece26a
--- /dev/null
+++ b/examples/auto_deploy/model_registry/configs/qwen3_vl.yaml
@@ -0,0 +1,5 @@
+# Configuration for Qwen3-VL models
+# Forces consistent dtype to avoid BFloat16/Float32 mismatch
+
+model_kwargs:
+  torch_dtype: bfloat16
diff --git a/examples/auto_deploy/model_registry/models.yaml b/examples/auto_deploy/model_registry/models.yaml
index 879a5eb0c6..fb268d7b39 100644
--- a/examples/auto_deploy/model_registry/models.yaml
+++ b/examples/auto_deploy/model_registry/models.yaml
@@ -18,8 +18,9 @@ models:
 # DISABLED: TorchDynamo compilation error - fake tensor dispatch failure
 # - name: apple/OpenELM-3B-Instruct
 #   yaml_extra: ['dashboard_default.yaml', 'world_size_1.yaml', 'openelm.yaml']
-- name: microsoft/Phi-4-mini-instruct
-  yaml_extra: ['dashboard_default.yaml', 'world_size_1.yaml']
+# DISABLED: model not supporting installed transformers version - https://github.com/NVIDIA/TensorRT-LLM/issues/10980
+# - name: microsoft/Phi-4-mini-instruct
+#   yaml_extra: ['dashboard_default.yaml', 'world_size_1.yaml']
 - name: microsoft/Phi-4-mini-reasoning
   yaml_extra: ['dashboard_default.yaml', 'world_size_1.yaml']
 - name: google/gemma-3-1b-it
@@ -115,8 +116,9 @@ models:
   yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
 - name: nvidia/NVIDIA-Nemotron-Nano-9B-v2-FP8
   yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
-- name: nvidia/NVIDIA-Nemotron-Nano-9B-v2-NVFP4
-  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
+# DISABLED: NVFP4 quantization not supported for pre BLW - CW has only Hopper
+# - name: nvidia/NVIDIA-Nemotron-Nano-9B-v2-NVFP4
+#   yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
 - name: nvidia/NVIDIA-Nemotron-Nano-12B-v2-VL-FP8
   yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
 - name: google/gemma-3-27b-it
@@ -126,10 +128,6 @@ models:
 # DISABLED: Network timeout downloading from Hugging Face
 # - name: ai21labs/AI21-Jamba-1.5-Mini
 #   yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
-- name: zai-org/glm-4v-9b
-  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
-# - name: zai-org/GLM-4.7
-#   yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
 - name: meta-llama/Llama-3.2-11B-Vision-Instruct
   yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
 - name: meta-llama/Llama-3.3-70B-Instruct
@@ -166,8 +164,8 @@ models:
   yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
 - name: nvidia/OpenReasoning-Nemotron-32B
   yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
-- name: mistralai/Mistral-Large-Instruct-v2.1
-  yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
+- name: mistralai/Mistral-Large-Instruct-2407
+  yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
 - name: deepseek-ai/DeepSeek-R1-Distill-Llama-70B
   yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
 - name: deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
@@ -186,26 +184,30 @@ models:
 # DISABLED: Model loading failure - dynamic module registry issue
 # - name: nvidia/Llama-3_1-Nemotron-51B-Instruct
 #   yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml']
-- name: nvidia/Llama-3_1-Nemotron-Ultra-253B-v1
-  yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml']
-- name: nvidia/Llama-3_1-Nemotron-Ultra-253B-v1-FP8
-  yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml']
-- name: nvidia/Llama-3_3-Nemotron-Super-49B-v1
-  yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml']
+# DISABLED: model not supporting installed transformers version - https://github.com/NVIDIA/TensorRT-LLM/issues/10980
+# - name: nvidia/Llama-3_1-Nemotron-Ultra-253B-v1
+#   yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml']
+# DISABLED: model not supporting installed transformers version - https://github.com/NVIDIA/TensorRT-LLM/issues/10980
+# - name: nvidia/Llama-3_1-Nemotron-Ultra-253B-v1-FP8
+#   yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml']
+# DISABLED: model not supporting installed transformers version - https://github.com/NVIDIA/TensorRT-LLM/issues/10980
+# - name: nvidia/Llama-3_3-Nemotron-Super-49B-v1
+#   yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml']
 - name: Qwen/Qwen3-30B-A3B
   yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml']
 - name: Qwen/Qwen3-235B-A22B
   yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml']
-- name: deepseek-ai/DeepSeek-R1
-  yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'deepseek_v3_lite.yaml']
+# DISABLED: Auto-deploy compilation error - shape mismatch - https://github.com/NVIDIA/TensorRT-LLM/issues/10978
+# - name: deepseek-ai/DeepSeek-R1
+#   yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'num_hidden_layers_5.yaml']
 # DISABLED: Auto-deploy compilation error - shape mismatch - https://github.com/NVIDIA/TensorRT-LLM/issues/10978
 # - name: deepseek-ai/DeepSeek-V3
-#   yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'deepseek_v3_lite.yaml']
+#   yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'num_hidden_layers_5.yaml']
 # DISABLED: Auto-deploy compilation error - shape mismatch - https://github.com/NVIDIA/TensorRT-LLM/issues/10978
 # - name: deepseek-ai/DeepSeek-Coder-V2-Instruct
 #   yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
 - name: Qwen/Qwen3-VL-8B-Instruct
-  yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
+  yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml', 'qwen3_vl.yaml']
 # DISABLED: NOT SUPPORTED - https://github.com/NVIDIA/TensorRT-LLM/issues/10363
 # - name: Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4
 #   yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml']
@@ -214,7 +216,7 @@ models:
 - name: meta-llama/Llama-3.2-90B-Vision-Instruct
   yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml']
 - name: openai/gpt-oss-120b
-  yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
+  yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'num_hidden_layers_5.yaml']
 - name: meta-llama/Llama-4-Scout-17B-16E-Instruct
   yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml', 'llama4_scout.yaml']
 - name: meta-llama/Llama-4-Maverick-17B-128E-Instruct