[None][chore] Update AD coverage to use torch-cudagraph (#10233)

Signed-off-by: Tal Cherckez <127761168+tcherckez-nvidia@users.noreply.github.com>
2026-01-13 22:18:36 +08:00 · 2025-12-23 14:20:32 +02:00 · 2025-12-23 14:20:32 +02:00 · 64bb1a5155
commit 64bb1a5155
parent 8408c40d8b
2 changed files with 64 additions and 84 deletions
--- a/examples/auto_deploy/model_registry/configs/compile_backend_torch_cudagraph.yaml
+++ b/examples/auto_deploy/model_registry/configs/compile_backend_torch_cudagraph.yaml
@ -0,0 +1,4 @@
+# Default configuration for all AutoDeploy dashboard tests
+# These are baseline settings that apply to all models unless overridden
+
+compile_backend: torch-cudagraph
--- a/examples/auto_deploy/model_registry/models.yaml
+++ b/examples/auto_deploy/model_registry/models.yaml
@ -22,31 +22,26 @@ models:
  yaml_extra: ['dashboard_default.yaml', 'world_size_1.yaml']
 - name: google/gemma-3-1b-it
  yaml_extra: ['dashboard_default.yaml', 'world_size_1.yaml', 'gemma3_1b.yaml']
-# DISABLED: SLURM cluster cancellation - infrastructure issue
-# - name: meta-llama/Llama-3.1-8B-Instruct
-#   yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
-# DISABLED: SLURM cluster cancellation - infrastructure issue
-# - name: casperhansen/llama-3-8b-instruct-awq
-#   yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
-# DISABLED: SLURM cluster cancellation - infrastructure issue
-# - name: meta-llama/Llama-3.2-1B-Instruct
-#   yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
-# DISABLED: SLURM cluster cancellation - infrastructure issue
-# - name: meta-llama/Llama-3.2-3B-Instruct
-#   yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
+- name: meta-llama/Llama-3.1-8B-Instruct
+  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
+- name: casperhansen/llama-3-8b-instruct-awq
+  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
+- name: meta-llama/Llama-3.2-1B-Instruct
+  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
+- name: meta-llama/Llama-3.2-3B-Instruct
+  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
 - name: Qwen/Qwen2.5-1.5B-Instruct
  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
 - name: Qwen/Qwen2.5-3B-Instruct
  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
-# DISABLED: SLURM cluster cancellation - infrastructure issue
-# - name: Qwen/Qwen2.5-7B-Instruct
-#   yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
+- name: Qwen/Qwen2.5-7B-Instruct
+  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
 - name: Qwen/Qwen2.5-7B-Instruct-AWQ
-  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
+  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
 - name: Qwen/Qwen3-4B
  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
 - name: Qwen/Qwen3-8B
-  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
+  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
 - name: microsoft/phi-4
  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
 - name: microsoft/Phi-4-reasoning
@ -55,40 +50,32 @@ models:
  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
 - name: google/gemma-1.1-7b-it
  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
-# DISABLED: SLURM cluster cancellation - infrastructure issue
-# - name: google/gemma-2-2b-it
-#   yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
-# DISABLED: SLURM cluster cancellation - infrastructure issue
-# - name: google/gemma-2-9b-it
-#   yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
+- name: google/gemma-2-2b-it
+  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
+- name: google/gemma-2-9b-it
+  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
 - name: google/codegemma-7b-it
  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
-# DISABLED: SLURM cluster cancellation - infrastructure issue
-# - name: mistralai/Mistral-7B-Instruct-v0.2
-#   yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
-# DISABLED: SLURM cluster cancellation - infrastructure issue
-# - name: mistralai/Mistral-7B-Instruct-v0.3
-#   yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
+- name: mistralai/Mistral-7B-Instruct-v0.2
+  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
+- name: mistralai/Mistral-7B-Instruct-v0.3
+  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
 - name: TheBloke/Mistral-7B-Instruct-v0.2-GPTQ
-  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
+  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
 - name: bigcode/starcoder2-7b
  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
 - name: bigcode/starcoder2-15b-instruct-v0.1
  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
-# DISABLED: SLURM cluster cancellation - infrastructure issue
-# - name: deepseek-ai/DeepSeek-Prover-V1.5-SFT
-#   yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
-# DISABLED: SLURM cluster cancellation - infrastructure issue
-# - name: deepseek-ai/DeepSeek-Prover-V2-7B
-#   yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
-# DISABLED: SLURM cluster cancellation - infrastructure issue
-# - name: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
-#   yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
+- name: deepseek-ai/DeepSeek-Prover-V1.5-SFT
+  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
+- name: deepseek-ai/DeepSeek-Prover-V2-7B
+  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
+- name: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
+  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
 - name: ibm-granite/granite-3.1-2b-instruct
  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
-# DISABLED: SLURM cluster cancellation - infrastructure issue
-# - name: ibm-granite/granite-3.1-8b-instruct
-#   yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
+- name: ibm-granite/granite-3.1-8b-instruct
+  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
 - name: ibm-granite/granite-3.3-2b-instruct
  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
 - name: ibm-granite/granite-3.3-8b-instruct
@ -98,23 +85,20 @@ models:
 - name: ibm-granite/granite-guardian-3.2-5b
  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
 - name: meta-llama/CodeLlama-7b-Instruct-hf
-  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
+  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
 - name: meta-llama/CodeLlama-7b-Python-hf
-  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
-# DISABLED: SLURM cluster cancellation - infrastructure issue
-# - name: meta-llama/Llama-2-7b-chat-hf
-#   yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
+  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
+- name: meta-llama/Llama-2-7b-chat-hf
+  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
 # DISABLED: FakeTensorMode error in unified_attn export
 # - name: nvidia/Llama-3.1-8B-Instruct-FP8
 #   yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
-# DISABLED: SLURM cluster cancellation - infrastructure issue
-# - name: nvidia/Llama-3.1-Minitron-4B-Depth-Base
-#   yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
-# DISABLED: SLURM cluster cancellation - infrastructure issue
-# - name: nvidia/Llama-3.1-Minitron-4B-Width-Base
-#   yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
+- name: nvidia/Llama-3.1-Minitron-4B-Depth-Base
+  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
+- name: nvidia/Llama-3.1-Minitron-4B-Width-Base
+  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
 - name: nvidia/Llama-3.1-Nemotron-Nano-8B-v1
-  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
+  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
 - name: nvidia/Mistral-NeMo-Minitron-8B-Base
  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
 - name: openai/gpt-oss-20b
@ -122,12 +106,10 @@ models:
 # DISABLED: Custom op error - append_paged_kv_cache missing Float kernel
 # - name: bigcode/starcoder2-15b
 #   yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
-# DISABLED: SLURM cluster cancellation - infrastructure issue
-# - name: ibm-granite/granite-3.0-8b-instruct
-#   yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
-# DISABLED: SLURM cluster cancellation - infrastructure issue
-# - name: mistralai/Ministral-8B-Instruct-2410
-#   yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
+- name: ibm-granite/granite-3.0-8b-instruct
+  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
+- name: mistralai/Ministral-8B-Instruct-2410
+  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
 - name: nvidia/NVIDIA-Nemotron-Nano-9B-v2-FP8
  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
 - name: nvidia/NVIDIA-Nemotron-Nano-9B-v2-NVFP4
@ -143,36 +125,32 @@ models:
 # DISABLED: Network timeout downloading from Hugging Face
 # - name: ai21labs/AI21-Jamba-1.5-Mini
 #   yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
-# DISABLED: SLURM cluster cancellation - infrastructure issue
-# - name: THUDM/glm-4v-9b
-#   yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
-# DISABLED: SLURM cluster cancellation - infrastructure issue
-# - name: meta-llama/Llama-3.2-11B-Vision-Instruct
-#   yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
+- name: THUDM/glm-4v-9b
+  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml', 'compile_backend_torch_cudagraph.yaml']
+- name: meta-llama/Llama-3.2-11B-Vision-Instruct
+  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml', 'compile_backend_torch_cudagraph.yaml']
 # DISABLED: Auto-deploy compilation error
 # - name: meta-llama/Llama-3.3-70B-Instruct
 #   yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml', 'llama3_3_70b.yaml']
 - name: meta-llama/CodeLlama-34b-Instruct-hf
-  yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
+  yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml', 'compile_backend_torch_cudagraph.yaml']
 - name: meta-llama/Llama-2-13b-chat-hf
-  yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
+  yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml', 'compile_backend_torch_cudagraph.yaml']
 - name: microsoft/Phi-3-medium-128k-instruct
  yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
 - name: microsoft/Phi-3-medium-4k-instruct
  yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
-# DISABLED: SLURM cluster cancellation - infrastructure issue
-# - name: mistralai/Codestral-22B-v0.1
-#   yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
+- name: mistralai/Codestral-22B-v0.1
+  yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml', 'compile_backend_torch_cudagraph.yaml']
 # DISABLED: Graph transformation error in auto-deploy
 # - name: neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8
 #   yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
 - name: TheBloke/falcon-40b-instruct-GPTQ
  yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
 - name: Qwen/QwQ-32B
-  yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
-# DISABLED: SLURM cluster cancellation - infrastructure issue
-# - name: google/gemma-2-27b-it
-#   yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
+  yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml', 'compile_backend_torch_cudagraph.yaml']
+- name: google/gemma-2-27b-it
+  yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml', 'compile_backend_torch_cudagraph.yaml']
 - name: perplexity-ai/r1-1776-distill-llama-70b
  yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
 - name: nvidia/NVIDIA-Nemotron-Nano-31B-A3-v3
@ -180,7 +158,7 @@ models:
 - name: nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8
  yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
 - name: Qwen/QwQ-32B-Preview
-  yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
+  yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml', 'compile_backend_torch_cudagraph.yaml']
 - name: Qwen/Qwen3-Coder-32B-Instruct
  yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
 - name: Qwen/Qwen3-235B-A22B-Instruct-2507
@ -193,9 +171,8 @@ models:
 # DISABLED: Auto-deploy compilation error
 # - name: mistralai/Mistral-Large-Instruct-v2.1
 #   yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
-# DISABLED: SLURM cluster cancellation - infrastructure issue
-# - name: deepseek-ai/DeepSeek-R1-Distill-Llama-70B
-#   yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
+- name: deepseek-ai/DeepSeek-R1-Distill-Llama-70B
+  yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'compile_backend_torch_cudagraph.yaml']
 # DISABLED: Auto-deploy compilation error
 # - name: deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
 #   yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
@ -209,7 +186,7 @@ models:
 # - name: nvidia/Llama-3.1-405B-Instruct-FP8
 #   yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
 - name: nvidia/Llama-3.1-Nemotron-70B-Instruct-HF
-  yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
+  yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'compile_backend_torch_cudagraph.yaml']
 # DISABLED: Model loading failure - dynamic module registry issue
 # - name: nvidia/Llama-3_1-Nemotron-51B-Instruct
 #   yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml']
@ -234,10 +211,9 @@ models:
 - name: Qwen/Qwen3-VL-8B-Instruct
  yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
 - name: Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4
-  yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml']
-# DISABLED: SLURM cluster cancellation - infrastructure issue
-# - name: codellama/CodeLlama-70b-Instruct-hf
-#   yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
+  yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml', 'compile_backend_torch_cudagraph.yaml']
+- name: codellama/CodeLlama-70b-Instruct-hf
+  yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'compile_backend_torch_cudagraph.yaml']
 - name: meta-llama/Llama-3.2-90B-Vision-Instruct
  yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml']
 - name: openai/gpt-oss-120b