[#10607][chore] Add Nemotron Nano v3 FP8 autodeploy perf test (#10603)

Signed-off-by: Eran Geva <19514940+MrGeva@users.noreply.github.com> Signed-off-by: Eran Geva <egeva@cw-dfw-cs-001-vscode-01.cm.cluster> Co-authored-by: Eran Geva <egeva@cw-dfw-cs-001-vscode-01.cm.cluster>
2026-02-05 02:31:33 +08:00 · 2026-01-19 08:48:07 +02:00 · 2026-01-19 08:48:07 +02:00 · 32ab809f36
commit 32ab809f36
parent baa250d1d6
2 changed files with 17 additions and 1 deletions
--- a/tests/integration/defs/perf/test_perf.py
+++ b/tests/integration/defs/perf/test_perf.py
@ -169,6 +169,7 @@ MODEL_PATH_DICT = {
    "mistral_small_v3.1_24b": "Mistral-Small-3.1-24B-Instruct-2503",
    "gpt_oss_120b_fp4": "gpt_oss/gpt-oss-120b",
    "gpt_oss_20b_fp4": "gpt_oss/gpt-oss-20b",
+    "nemotron_nano_3_30b_fp8": "Nemotron-Nano-3-30B-A3.5B-FP8-KVFP8-dev",
    "nemotron_nano_12b_v2": "NVIDIA-Nemotron-Nano-12B-v2",
    "nvidia_nemotron_nano_9b_v2_nvfp4": "NVIDIA-Nemotron-Nano-9B-v2-NVFP4",
    "starcoder2_7b": "starcoder2-7b",
@ -238,6 +239,11 @@ TRUST_REMOTE_CODE_MODELS = {  # these models require explicit trust_remote_code=
    "llama_v3.1_nemotron_ultra_253b_fp8",
 }

+# Autodeploy model configs - maps model name to config file path (relative to TRT-LLM root)
+AUTODEPLOY_MODEL_CONFIGS = {
+    "nemotron_nano_3_30b_fp8": "examples/auto_deploy/nano_v3.yaml",
+}
+

 def get_model_dir(model_name: str):
    model_dir = ""
@ -1408,7 +1414,7 @@ class MultiMetricPerfTest(AbstractPerfScriptTestClass):
                os.makedirs(os.path.dirname(autodeploy_config_path),
                            exist_ok=True)

-            # Create _autodeploy specific configuration
+            # Default autodeploy config
            autodeploy_config = {
                'transforms': {
                    'compile_model': {
@ -1422,6 +1428,15 @@ class MultiMetricPerfTest(AbstractPerfScriptTestClass):
                'skip_loading_weights': self._config.skip_loading_weights
            }

+            # If model has a curated config, use it instead
+            if self._config.model_name in AUTODEPLOY_MODEL_CONFIGS:
+                config_file = os.path.join(
+                    self._llm_root,
+                    AUTODEPLOY_MODEL_CONFIGS[self._config.model_name])
+                if os.path.exists(config_file):
+                    with open(config_file, 'r') as f:
+                        autodeploy_config = yaml.safe_load(f)
+
            print_info(f"_autodeploy model config: {autodeploy_config}")
            with open(autodeploy_config_path, 'w') as f:
                yaml.dump(autodeploy_config, f, default_flow_style=False)
--- a/tests/integration/test_lists/test-db/l0_perf.yml
+++ b/tests/integration/test_lists/test-db/l0_perf.yml
@ -43,3 +43,4 @@ l0_perf:
        backend: pytorch
    tests:
      - perf/test_perf.py::test_perf[deepseek_r1_distill_qwen_32b-bench-_autodeploy-float16-input_output_len:1024,1024-reqs:512]
+      - perf/test_perf.py::test_perf[nemotron_nano_3_30b_fp8-bench-_autodeploy-float16-input_output_len:1024,1024-reqs:512]