From 32ab809f36d7bc85df2e3a4b258dfd9b4f9f97ab Mon Sep 17 00:00:00 2001
From: Eran Geva <19514940+MrGeva@users.noreply.github.com>
Date: Mon, 19 Jan 2026 08:48:07 +0200
Subject: [PATCH] [#10607][chore] Add Nemotron Nano v3 FP8 autodeploy perf test
 (#10603)

Signed-off-by: Eran Geva <19514940+MrGeva@users.noreply.github.com>
Signed-off-by: Eran Geva <egeva@cw-dfw-cs-001-vscode-01.cm.cluster>
Co-authored-by: Eran Geva <egeva@cw-dfw-cs-001-vscode-01.cm.cluster>
---
 tests/integration/defs/perf/test_perf.py        | 17 ++++++++++++++++-
 .../integration/test_lists/test-db/l0_perf.yml  |  1 +
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/tests/integration/defs/perf/test_perf.py b/tests/integration/defs/perf/test_perf.py
index 44145728ae..a4ed2b60de 100644
--- a/tests/integration/defs/perf/test_perf.py
+++ b/tests/integration/defs/perf/test_perf.py
@@ -169,6 +169,7 @@ MODEL_PATH_DICT = {
     "mistral_small_v3.1_24b": "Mistral-Small-3.1-24B-Instruct-2503",
     "gpt_oss_120b_fp4": "gpt_oss/gpt-oss-120b",
     "gpt_oss_20b_fp4": "gpt_oss/gpt-oss-20b",
+    "nemotron_nano_3_30b_fp8": "Nemotron-Nano-3-30B-A3.5B-FP8-KVFP8-dev",
     "nemotron_nano_12b_v2": "NVIDIA-Nemotron-Nano-12B-v2",
     "nvidia_nemotron_nano_9b_v2_nvfp4": "NVIDIA-Nemotron-Nano-9B-v2-NVFP4",
     "starcoder2_7b": "starcoder2-7b",
@@ -238,6 +239,11 @@ TRUST_REMOTE_CODE_MODELS = {  # these models require explicit trust_remote_code=
     "llama_v3.1_nemotron_ultra_253b_fp8",
 }
 
+# Autodeploy model configs - maps model name to config file path (relative to TRT-LLM root)
+AUTODEPLOY_MODEL_CONFIGS = {
+    "nemotron_nano_3_30b_fp8": "examples/auto_deploy/nano_v3.yaml",
+}
+
 
 def get_model_dir(model_name: str):
     model_dir = ""
@@ -1408,7 +1414,7 @@ class MultiMetricPerfTest(AbstractPerfScriptTestClass):
                 os.makedirs(os.path.dirname(autodeploy_config_path),
                             exist_ok=True)
 
-            # Create _autodeploy specific configuration
+            # Default autodeploy config
             autodeploy_config = {
                 'transforms': {
                     'compile_model': {
@@ -1422,6 +1428,15 @@ class MultiMetricPerfTest(AbstractPerfScriptTestClass):
                 'skip_loading_weights': self._config.skip_loading_weights
             }
 
+            # If model has a curated config, use it instead
+            if self._config.model_name in AUTODEPLOY_MODEL_CONFIGS:
+                config_file = os.path.join(
+                    self._llm_root,
+                    AUTODEPLOY_MODEL_CONFIGS[self._config.model_name])
+                if os.path.exists(config_file):
+                    with open(config_file, 'r') as f:
+                        autodeploy_config = yaml.safe_load(f)
+
             print_info(f"_autodeploy model config: {autodeploy_config}")
             with open(autodeploy_config_path, 'w') as f:
                 yaml.dump(autodeploy_config, f, default_flow_style=False)
diff --git a/tests/integration/test_lists/test-db/l0_perf.yml b/tests/integration/test_lists/test-db/l0_perf.yml
index a915f9a908..d850621ae5 100644
--- a/tests/integration/test_lists/test-db/l0_perf.yml
+++ b/tests/integration/test_lists/test-db/l0_perf.yml
@@ -43,3 +43,4 @@ l0_perf:
         backend: pytorch
     tests:
       - perf/test_perf.py::test_perf[deepseek_r1_distill_qwen_32b-bench-_autodeploy-float16-input_output_len:1024,1024-reqs:512]
+      - perf/test_perf.py::test_perf[nemotron_nano_3_30b_fp8-bench-_autodeploy-float16-input_output_len:1024,1024-reqs:512]