From ea81a03dd16422d263a2c7ed751d133bb772c7b3 Mon Sep 17 00:00:00 2001 From: tcherckez-nvidia <127761168+tcherckez-nvidia@users.noreply.github.com> Date: Mon, 9 Feb 2026 21:27:39 +0200 Subject: [PATCH] [None][chore] update model list (#11364) Signed-off-by: Tal Cherckez <127761168+tcherckez-nvidia@users.noreply.github.com> --- ..._v3_lite.yaml => num_hidden_layers_5.yaml} | 2 +- .../model_registry/configs/qwen3_vl.yaml | 5 +++ .../auto_deploy/model_registry/models.yaml | 44 ++++++++++--------- 3 files changed, 29 insertions(+), 22 deletions(-) rename examples/auto_deploy/model_registry/configs/{deepseek_v3_lite.yaml => num_hidden_layers_5.yaml} (84%) create mode 100644 examples/auto_deploy/model_registry/configs/qwen3_vl.yaml diff --git a/examples/auto_deploy/model_registry/configs/deepseek_v3_lite.yaml b/examples/auto_deploy/model_registry/configs/num_hidden_layers_5.yaml similarity index 84% rename from examples/auto_deploy/model_registry/configs/deepseek_v3_lite.yaml rename to examples/auto_deploy/model_registry/configs/num_hidden_layers_5.yaml index 8475097ba2..0d8d094673 100644 --- a/examples/auto_deploy/model_registry/configs/deepseek_v3_lite.yaml +++ b/examples/auto_deploy/model_registry/configs/num_hidden_layers_5.yaml @@ -1,4 +1,4 @@ # Configuration for DeepSeek V3 and R1 with reduced layers # Full models are too large, so we test with limited layers model_kwargs: - num_hidden_layers: 10 + num_hidden_layers: 5 diff --git a/examples/auto_deploy/model_registry/configs/qwen3_vl.yaml b/examples/auto_deploy/model_registry/configs/qwen3_vl.yaml new file mode 100644 index 0000000000..92cbece26a --- /dev/null +++ b/examples/auto_deploy/model_registry/configs/qwen3_vl.yaml @@ -0,0 +1,5 @@ +# Configuration for Qwen3-VL models +# Forces consistent dtype to avoid BFloat16/Float32 mismatch + +model_kwargs: + torch_dtype: bfloat16 diff --git a/examples/auto_deploy/model_registry/models.yaml b/examples/auto_deploy/model_registry/models.yaml index 879a5eb0c6..fb268d7b39 100644 --- a/examples/auto_deploy/model_registry/models.yaml +++ b/examples/auto_deploy/model_registry/models.yaml @@ -18,8 +18,9 @@ models: # DISABLED: TorchDynamo compilation error - fake tensor dispatch failure # - name: apple/OpenELM-3B-Instruct # yaml_extra: ['dashboard_default.yaml', 'world_size_1.yaml', 'openelm.yaml'] -- name: microsoft/Phi-4-mini-instruct - yaml_extra: ['dashboard_default.yaml', 'world_size_1.yaml'] +# DISABLED: model not supporting installed transformers version - https://github.com/NVIDIA/TensorRT-LLM/issues/10980 +# - name: microsoft/Phi-4-mini-instruct +# yaml_extra: ['dashboard_default.yaml', 'world_size_1.yaml'] - name: microsoft/Phi-4-mini-reasoning yaml_extra: ['dashboard_default.yaml', 'world_size_1.yaml'] - name: google/gemma-3-1b-it @@ -115,8 +116,9 @@ models: yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml'] - name: nvidia/NVIDIA-Nemotron-Nano-9B-v2-FP8 yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml'] -- name: nvidia/NVIDIA-Nemotron-Nano-9B-v2-NVFP4 - yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml'] +# DISABLED: NVFP4 quantization not supported for pre BLW - CW has only Hopper +# - name: nvidia/NVIDIA-Nemotron-Nano-9B-v2-NVFP4 +# yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml'] - name: nvidia/NVIDIA-Nemotron-Nano-12B-v2-VL-FP8 yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml'] - name: google/gemma-3-27b-it @@ -126,10 +128,6 @@ models: # DISABLED: Network timeout downloading from Hugging Face # - name: ai21labs/AI21-Jamba-1.5-Mini # yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml'] -- name: zai-org/glm-4v-9b - yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml'] -# - name: zai-org/GLM-4.7 -# yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml'] - name: meta-llama/Llama-3.2-11B-Vision-Instruct yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml'] - name: meta-llama/Llama-3.3-70B-Instruct @@ -166,8 +164,8 @@ models: yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml'] - name: nvidia/OpenReasoning-Nemotron-32B yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml'] -- name: mistralai/Mistral-Large-Instruct-v2.1 - yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml'] +- name: mistralai/Mistral-Large-Instruct-2407 + yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml'] - name: deepseek-ai/DeepSeek-R1-Distill-Llama-70B yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml'] - name: deepseek-ai/DeepSeek-R1-Distill-Qwen-32B @@ -186,26 +184,30 @@ models: # DISABLED: Model loading failure - dynamic module registry issue # - name: nvidia/Llama-3_1-Nemotron-51B-Instruct # yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml'] -- name: nvidia/Llama-3_1-Nemotron-Ultra-253B-v1 - yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml'] -- name: nvidia/Llama-3_1-Nemotron-Ultra-253B-v1-FP8 - yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml'] -- name: nvidia/Llama-3_3-Nemotron-Super-49B-v1 - yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml'] +# DISABLED: model not supporting installed transformers version - https://github.com/NVIDIA/TensorRT-LLM/issues/10980 +# - name: nvidia/Llama-3_1-Nemotron-Ultra-253B-v1 +# yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml'] +# DISABLED: model not supporting installed transformers version - https://github.com/NVIDIA/TensorRT-LLM/issues/10980 +# - name: nvidia/Llama-3_1-Nemotron-Ultra-253B-v1-FP8 +# yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml'] +# DISABLED: model not supporting installed transformers version - https://github.com/NVIDIA/TensorRT-LLM/issues/10980 +# - name: nvidia/Llama-3_3-Nemotron-Super-49B-v1 +# yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml'] - name: Qwen/Qwen3-30B-A3B yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml'] - name: Qwen/Qwen3-235B-A22B yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml'] -- name: deepseek-ai/DeepSeek-R1 - yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'deepseek_v3_lite.yaml'] +# DISABLED: Auto-deploy compilation error - shape mismatch - https://github.com/NVIDIA/TensorRT-LLM/issues/10978 +# - name: deepseek-ai/DeepSeek-R1 +# yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'num_hidden_layers_5.yaml'] # DISABLED: Auto-deploy compilation error - shape mismatch - https://github.com/NVIDIA/TensorRT-LLM/issues/10978 # - name: deepseek-ai/DeepSeek-V3 -# yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'deepseek_v3_lite.yaml'] +# yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'num_hidden_layers_5.yaml'] # DISABLED: Auto-deploy compilation error - shape mismatch - https://github.com/NVIDIA/TensorRT-LLM/issues/10978 # - name: deepseek-ai/DeepSeek-Coder-V2-Instruct # yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml'] - name: Qwen/Qwen3-VL-8B-Instruct - yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml'] + yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml', 'qwen3_vl.yaml'] # DISABLED: NOT SUPPORTED - https://github.com/NVIDIA/TensorRT-LLM/issues/10363 # - name: Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4 # yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml'] @@ -214,7 +216,7 @@ models: - name: meta-llama/Llama-3.2-90B-Vision-Instruct yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml'] - name: openai/gpt-oss-120b - yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml'] + yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'num_hidden_layers_5.yaml'] - name: meta-llama/Llama-4-Scout-17B-16E-Instruct yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml', 'llama4_scout.yaml'] - name: meta-llama/Llama-4-Maverick-17B-128E-Instruct