From 43b8a5561c1132f6473cf11a870ebba32da33df3 Mon Sep 17 00:00:00 2001 From: tcherckez-nvidia <127761168+tcherckez-nvidia@users.noreply.github.com> Date: Mon, 26 Jan 2026 16:49:50 +0200 Subject: [PATCH] [None][chore] update AD model list (#10981) Signed-off-by: Tal Cherckez <127761168+tcherckez-nvidia@users.noreply.github.com> --- .../auto_deploy/model_registry/models.yaml | 125 +++++++++--------- 1 file changed, 62 insertions(+), 63 deletions(-) diff --git a/examples/auto_deploy/model_registry/models.yaml b/examples/auto_deploy/model_registry/models.yaml index f8fa3c78b5..61dfed1ab1 100644 --- a/examples/auto_deploy/model_registry/models.yaml +++ b/examples/auto_deploy/model_registry/models.yaml @@ -23,25 +23,27 @@ models: - name: google/gemma-3-1b-it yaml_extra: ['dashboard_default.yaml', 'world_size_1.yaml', 'gemma3_1b.yaml'] - name: meta-llama/Llama-3.1-8B-Instruct - yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml'] -- name: casperhansen/llama-3-8b-instruct-awq - yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml'] + yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml'] +# DISABLED: NOT SUPPORTED - https://github.com/NVIDIA/TensorRT-LLM/issues/10363 +# - name: casperhansen/llama-3-8b-instruct-awq +# yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml'] - name: meta-llama/Llama-3.2-1B-Instruct - yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml'] + yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml'] - name: meta-llama/Llama-3.2-3B-Instruct - yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml'] + yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml'] - name: Qwen/Qwen2.5-1.5B-Instruct yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml'] - name: Qwen/Qwen2.5-3B-Instruct yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml'] - name: Qwen/Qwen2.5-7B-Instruct - yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml'] -- name: Qwen/Qwen2.5-7B-Instruct-AWQ - yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml'] + yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml'] +# DISABLED: NOT SUPPORTED - https://github.com/NVIDIA/TensorRT-LLM/issues/10363 +# - name: Qwen/Qwen2.5-7B-Instruct-AWQ +# yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml'] - name: Qwen/Qwen3-4B yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml'] - name: Qwen/Qwen3-8B - yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml'] + yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml'] - name: microsoft/phi-4 yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml'] - name: microsoft/Phi-4-reasoning @@ -51,31 +53,33 @@ models: - name: google/gemma-1.1-7b-it yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml'] - name: google/gemma-2-2b-it - yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml'] + yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml'] - name: google/gemma-2-9b-it - yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml'] + yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml'] - name: google/codegemma-7b-it yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml'] - name: mistralai/Mistral-7B-Instruct-v0.2 - yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml'] + yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml'] - name: mistralai/Mistral-7B-Instruct-v0.3 - yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml'] + yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml'] - name: TheBloke/Mistral-7B-Instruct-v0.2-GPTQ - yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml'] + yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml'] - name: bigcode/starcoder2-7b yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml'] +- name: bigcode/starcoder2-15b + yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'attn_backend_triton.yaml'] - name: bigcode/starcoder2-15b-instruct-v0.1 yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'attn_backend_triton.yaml'] - name: deepseek-ai/DeepSeek-Prover-V1.5-SFT - yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml'] + yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml'] - name: deepseek-ai/DeepSeek-Prover-V2-7B - yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml'] + yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml'] - name: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B - yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml'] + yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml'] - name: ibm-granite/granite-3.1-2b-instruct yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml'] - name: ibm-granite/granite-3.1-8b-instruct - yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml'] + yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml'] - name: ibm-granite/granite-3.3-2b-instruct yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml'] - name: ibm-granite/granite-3.3-8b-instruct @@ -85,31 +89,28 @@ models: - name: ibm-granite/granite-guardian-3.2-5b yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml'] - name: meta-llama/CodeLlama-7b-Instruct-hf - yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml'] + yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml'] - name: meta-llama/CodeLlama-7b-Python-hf - yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml'] + yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml'] - name: meta-llama/Llama-2-7b-chat-hf - yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml'] + yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml'] # DISABLED: FakeTensorMode error in unified_attn export # - name: nvidia/Llama-3.1-8B-Instruct-FP8 # yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml'] - name: nvidia/Llama-3.1-Minitron-4B-Depth-Base - yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml'] + yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml'] - name: nvidia/Llama-3.1-Minitron-4B-Width-Base - yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml'] + yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml'] - name: nvidia/Llama-3.1-Nemotron-Nano-8B-v1 - yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml'] + yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml'] - name: nvidia/Mistral-NeMo-Minitron-8B-Base yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml'] - name: openai/gpt-oss-20b yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml'] -# DISABLED: Custom op error - append_paged_kv_cache missing Float kernel -# - name: bigcode/starcoder2-15b -# yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml'] - name: ibm-granite/granite-3.0-8b-instruct - yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml'] + yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml'] - name: mistralai/Ministral-8B-Instruct-2410 - yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml'] + yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml'] - name: nvidia/NVIDIA-Nemotron-Nano-9B-v2-FP8 yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml'] - name: nvidia/NVIDIA-Nemotron-Nano-9B-v2-NVFP4 @@ -123,56 +124,53 @@ models: # DISABLED: Network timeout downloading from Hugging Face # - name: ai21labs/AI21-Jamba-1.5-Mini # yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml'] -- name: THUDM/glm-4v-9b - yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml', 'compile_backend_torch_cudagraph.yaml'] +- name: zai-org/glm-4v-9b + yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml'] +# - name: zai-org/GLM-4.7 +# yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml'] - name: meta-llama/Llama-3.2-11B-Vision-Instruct - yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml', 'compile_backend_torch_cudagraph.yaml'] -# DISABLED: Auto-deploy compilation error -# - name: meta-llama/Llama-3.3-70B-Instruct -# yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml', 'llama3_3_70b.yaml'] + yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml'] +- name: meta-llama/Llama-3.3-70B-Instruct + yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml', 'llama3_3_70b.yaml'] - name: meta-llama/CodeLlama-34b-Instruct-hf - yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml', 'compile_backend_torch_cudagraph.yaml'] + yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml'] - name: meta-llama/Llama-2-13b-chat-hf - yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml', 'compile_backend_torch_cudagraph.yaml'] + yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml'] - name: microsoft/Phi-3-medium-128k-instruct yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml'] - name: microsoft/Phi-3-medium-4k-instruct yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml'] - name: mistralai/Codestral-22B-v0.1 - yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml', 'compile_backend_torch_cudagraph.yaml'] -# DISABLED: Graph transformation error in auto-deploy -# - name: neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8 -# yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml'] + yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml'] +- name: neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8 + yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml'] - name: Qwen/QwQ-32B - yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml', 'compile_backend_torch_cudagraph.yaml'] + yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml'] - name: google/gemma-2-27b-it - yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml', 'compile_backend_torch_cudagraph.yaml'] + yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml'] - name: perplexity-ai/r1-1776-distill-llama-70b yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml'] - name: nvidia/NVIDIA-Nemotron-Nano-31B-A3-v3 - yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml', 'nano_v3.yaml'] -- name: nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8 yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml'] +- name: nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8 + yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml', 'nano_v3.yaml'] - name: Qwen/QwQ-32B-Preview - yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml', 'compile_backend_torch_cudagraph.yaml'] + yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml'] - name: Qwen/Qwen3-Coder-30B-A3B-Instruct yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml'] - name: Qwen/Qwen3-235B-A22B-Instruct-2507 + yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml'] +- name: ai21labs/AI21-Jamba-1.5-Large yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml'] -# DISABLED: Network timeout downloading from Hugging Face -# - name: ai21labs/AI21-Jamba-1.5-Large -# yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml'] - name: nvidia/OpenReasoning-Nemotron-32B yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml'] -# DISABLED: Auto-deploy compilation error -# - name: mistralai/Mistral-Large-Instruct-v2.1 -# yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml'] +- name: mistralai/Mistral-Large-Instruct-v2.1 + yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml'] - name: deepseek-ai/DeepSeek-R1-Distill-Llama-70B - yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'compile_backend_torch_cudagraph.yaml'] -# DISABLED: Auto-deploy compilation error -# - name: deepseek-ai/DeepSeek-R1-Distill-Qwen-32B -# yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml'] -# DISABLED: Graph transformation error in auto-deploy + yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml'] +- name: deepseek-ai/DeepSeek-R1-Distill-Qwen-32B + yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml'] +# DISABLED: stuck in graph capturing # - name: mistralai/Mixtral-8x22B-Instruct-v0.1 # yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml'] # DISABLED: FakeTensorMode error in unified_attn export @@ -182,7 +180,7 @@ models: # - name: nvidia/Llama-3.1-405B-Instruct-FP8 # yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml'] - name: nvidia/Llama-3.1-Nemotron-70B-Instruct-HF - yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'compile_backend_torch_cudagraph.yaml'] + yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml'] # DISABLED: Model loading failure - dynamic module registry issue # - name: nvidia/Llama-3_1-Nemotron-51B-Instruct # yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml'] @@ -198,18 +196,19 @@ models: yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml'] - name: deepseek-ai/DeepSeek-R1 yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'deepseek_v3_lite.yaml'] -# DISABLED: Auto-deploy compilation error +# DISABLED: Auto-deploy compilation error - shape mismatch - https://github.com/NVIDIA/TensorRT-LLM/issues/10978 # - name: deepseek-ai/DeepSeek-V3 # yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'deepseek_v3_lite.yaml'] -# DISABLED: Assertion failure in auto-deploy transform pipeline +# DISABLED: Auto-deploy compilation error - shape mismatch - https://github.com/NVIDIA/TensorRT-LLM/issues/10978 # - name: deepseek-ai/DeepSeek-Coder-V2-Instruct # yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml'] - name: Qwen/Qwen3-VL-8B-Instruct yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml'] -- name: Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4 - yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml', 'compile_backend_torch_cudagraph.yaml'] +# DISABLED: NOT SUPPORTED - https://github.com/NVIDIA/TensorRT-LLM/issues/10363 +# - name: Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4 +# yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml'] - name: codellama/CodeLlama-70b-Instruct-hf - yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'compile_backend_torch_cudagraph.yaml'] + yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml'] - name: meta-llama/Llama-3.2-90B-Vision-Instruct yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml'] - name: openai/gpt-oss-120b