[None][chore] Update AutoDeploy model list (#10505)

Signed-off-by: Tal Cherckez <127761168+tcherckez-nvidia@users.noreply.github.com>
This commit is contained in:
tcherckez-nvidia 2026-01-10 08:47:37 +02:00 committed by GitHub
parent 6ab996d635
commit f6c4dd885f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 5 additions and 6 deletions

View File

@ -0,0 +1 @@
attn_backend: triton

View File

@ -65,7 +65,7 @@ models:
- name: bigcode/starcoder2-7b
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
- name: bigcode/starcoder2-15b-instruct-v0.1
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'attn_backend_triton.yaml']
- name: deepseek-ai/DeepSeek-Prover-V1.5-SFT
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
- name: deepseek-ai/DeepSeek-Prover-V2-7B
@ -118,8 +118,6 @@ models:
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
- name: google/gemma-3-27b-it
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
- name: google/gemma-3-2b-it
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
- name: deepseek-ai/DeepSeek-V2.5
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
# DISABLED: Network timeout downloading from Hugging Face
@ -145,8 +143,6 @@ models:
# DISABLED: Graph transformation error in auto-deploy
# - name: neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8
# yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
- name: TheBloke/falcon-40b-instruct-GPTQ
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
- name: Qwen/QwQ-32B
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml', 'compile_backend_torch_cudagraph.yaml']
- name: google/gemma-2-27b-it
@ -159,7 +155,7 @@ models:
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
- name: Qwen/QwQ-32B-Preview
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml', 'compile_backend_torch_cudagraph.yaml']
- name: Qwen/Qwen3-Coder-32B-Instruct
- name: Qwen/Qwen3-Coder-30B-A3B-Instruct
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
- name: Qwen/Qwen3-235B-A22B-Instruct-2507
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
@ -222,3 +218,5 @@ models:
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml', 'llama4_scout.yaml']
- name: meta-llama/Llama-4-Maverick-17B-128E-Instruct
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml', 'llama4_maverick_lite.yaml']
- name: nvidia/NVIDIA-Nemotron-3-Super-120B-BF16-BF16KV-010726
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml','super_v3.yaml']