From 32ab809f36d7bc85df2e3a4b258dfd9b4f9f97ab Mon Sep 17 00:00:00 2001 From: Eran Geva <19514940+MrGeva@users.noreply.github.com> Date: Mon, 19 Jan 2026 08:48:07 +0200 Subject: [PATCH] [#10607][chore] Add Nemotron Nano v3 FP8 autodeploy perf test (#10603) Signed-off-by: Eran Geva <19514940+MrGeva@users.noreply.github.com> Signed-off-by: Eran Geva Co-authored-by: Eran Geva --- tests/integration/defs/perf/test_perf.py | 17 ++++++++++++++++- .../integration/test_lists/test-db/l0_perf.yml | 1 + 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/tests/integration/defs/perf/test_perf.py b/tests/integration/defs/perf/test_perf.py index 44145728ae..a4ed2b60de 100644 --- a/tests/integration/defs/perf/test_perf.py +++ b/tests/integration/defs/perf/test_perf.py @@ -169,6 +169,7 @@ MODEL_PATH_DICT = { "mistral_small_v3.1_24b": "Mistral-Small-3.1-24B-Instruct-2503", "gpt_oss_120b_fp4": "gpt_oss/gpt-oss-120b", "gpt_oss_20b_fp4": "gpt_oss/gpt-oss-20b", + "nemotron_nano_3_30b_fp8": "Nemotron-Nano-3-30B-A3.5B-FP8-KVFP8-dev", "nemotron_nano_12b_v2": "NVIDIA-Nemotron-Nano-12B-v2", "nvidia_nemotron_nano_9b_v2_nvfp4": "NVIDIA-Nemotron-Nano-9B-v2-NVFP4", "starcoder2_7b": "starcoder2-7b", @@ -238,6 +239,11 @@ TRUST_REMOTE_CODE_MODELS = { # these models require explicit trust_remote_code= "llama_v3.1_nemotron_ultra_253b_fp8", } +# Autodeploy model configs - maps model name to config file path (relative to TRT-LLM root) +AUTODEPLOY_MODEL_CONFIGS = { + "nemotron_nano_3_30b_fp8": "examples/auto_deploy/nano_v3.yaml", +} + def get_model_dir(model_name: str): model_dir = "" @@ -1408,7 +1414,7 @@ class MultiMetricPerfTest(AbstractPerfScriptTestClass): os.makedirs(os.path.dirname(autodeploy_config_path), exist_ok=True) - # Create _autodeploy specific configuration + # Default autodeploy config autodeploy_config = { 'transforms': { 'compile_model': { @@ -1422,6 +1428,15 @@ class MultiMetricPerfTest(AbstractPerfScriptTestClass): 'skip_loading_weights': self._config.skip_loading_weights } + # If model has a curated config, use it instead + if self._config.model_name in AUTODEPLOY_MODEL_CONFIGS: + config_file = os.path.join( + self._llm_root, + AUTODEPLOY_MODEL_CONFIGS[self._config.model_name]) + if os.path.exists(config_file): + with open(config_file, 'r') as f: + autodeploy_config = yaml.safe_load(f) + print_info(f"_autodeploy model config: {autodeploy_config}") with open(autodeploy_config_path, 'w') as f: yaml.dump(autodeploy_config, f, default_flow_style=False) diff --git a/tests/integration/test_lists/test-db/l0_perf.yml b/tests/integration/test_lists/test-db/l0_perf.yml index a915f9a908..d850621ae5 100644 --- a/tests/integration/test_lists/test-db/l0_perf.yml +++ b/tests/integration/test_lists/test-db/l0_perf.yml @@ -43,3 +43,4 @@ l0_perf: backend: pytorch tests: - perf/test_perf.py::test_perf[deepseek_r1_distill_qwen_32b-bench-_autodeploy-float16-input_output_len:1024,1024-reqs:512] + - perf/test_perf.py::test_perf[nemotron_nano_3_30b_fp8-bench-_autodeploy-float16-input_output_len:1024,1024-reqs:512]