[#10607][chore] Add Nemotron Nano v3 FP8 autodeploy perf test (#10603)

Signed-off-by: Eran Geva <19514940+MrGeva@users.noreply.github.com>
Signed-off-by: Eran Geva <egeva@cw-dfw-cs-001-vscode-01.cm.cluster>
Co-authored-by: Eran Geva <egeva@cw-dfw-cs-001-vscode-01.cm.cluster>
This commit is contained in:
Eran Geva 2026-01-19 08:48:07 +02:00 committed by GitHub
parent baa250d1d6
commit 32ab809f36
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 17 additions and 1 deletions

View File

@ -169,6 +169,7 @@ MODEL_PATH_DICT = {
"mistral_small_v3.1_24b": "Mistral-Small-3.1-24B-Instruct-2503",
"gpt_oss_120b_fp4": "gpt_oss/gpt-oss-120b",
"gpt_oss_20b_fp4": "gpt_oss/gpt-oss-20b",
"nemotron_nano_3_30b_fp8": "Nemotron-Nano-3-30B-A3.5B-FP8-KVFP8-dev",
"nemotron_nano_12b_v2": "NVIDIA-Nemotron-Nano-12B-v2",
"nvidia_nemotron_nano_9b_v2_nvfp4": "NVIDIA-Nemotron-Nano-9B-v2-NVFP4",
"starcoder2_7b": "starcoder2-7b",
@ -238,6 +239,11 @@ TRUST_REMOTE_CODE_MODELS = { # these models require explicit trust_remote_code=
"llama_v3.1_nemotron_ultra_253b_fp8",
}
# Autodeploy model configs - maps model name to config file path (relative to TRT-LLM root)
AUTODEPLOY_MODEL_CONFIGS = {
"nemotron_nano_3_30b_fp8": "examples/auto_deploy/nano_v3.yaml",
}
def get_model_dir(model_name: str):
model_dir = ""
@ -1408,7 +1414,7 @@ class MultiMetricPerfTest(AbstractPerfScriptTestClass):
os.makedirs(os.path.dirname(autodeploy_config_path),
exist_ok=True)
# Create _autodeploy specific configuration
# Default autodeploy config
autodeploy_config = {
'transforms': {
'compile_model': {
@ -1422,6 +1428,15 @@ class MultiMetricPerfTest(AbstractPerfScriptTestClass):
'skip_loading_weights': self._config.skip_loading_weights
}
# If model has a curated config, use it instead
if self._config.model_name in AUTODEPLOY_MODEL_CONFIGS:
config_file = os.path.join(
self._llm_root,
AUTODEPLOY_MODEL_CONFIGS[self._config.model_name])
if os.path.exists(config_file):
with open(config_file, 'r') as f:
autodeploy_config = yaml.safe_load(f)
print_info(f"_autodeploy model config: {autodeploy_config}")
with open(autodeploy_config_path, 'w') as f:
yaml.dump(autodeploy_config, f, default_flow_style=False)

View File

@ -43,3 +43,4 @@ l0_perf:
backend: pytorch
tests:
- perf/test_perf.py::test_perf[deepseek_r1_distill_qwen_32b-bench-_autodeploy-float16-input_output_len:1024,1024-reqs:512]
- perf/test_perf.py::test_perf[nemotron_nano_3_30b_fp8-bench-_autodeploy-float16-input_output_len:1024,1024-reqs:512]