[https://nvbugs/5698434][test] Add Qwen3-4B-Eagle3 One-model perf test (#10041)

Signed-off-by: yufeiwu-nv <230315618+yufeiwu-nv@users.noreply.github.com>
2026-01-13 22:18:36 +08:00 · 2025-12-17 13:37:25 +08:00 · 2025-12-17 13:37:25 +08:00 · 5d71f662c3
commit 5d71f662c3
parent 47404196fa
3 changed files with 19 additions and 0 deletions
--- a/tests/integration/defs/perf/pytorch_model_config.py
+++ b/tests/integration/defs/perf/pytorch_model_config.py
@ -215,6 +215,23 @@ def get_model_yaml_config(model_label: str,
                }
            }
        },
+        {
+            'patterns': [
+                'qwen3_4b-bench-pytorch-streaming-bfloat16-maxbs:4-kv_frac:0.6-input_output_len:500,100-reqs:200-con:4',
+            ],
+            'config': {
+                'speculative_config': {
+                    'decoding_type': 'Eagle',
+                    'eagle3_one_model': True,
+                    'speculative_model_dir': 'Qwen3-4B_eagle3',
+                    'max_draft_len': 3,
+                },
+                'kv_cache_config': {
+                    'enable_block_reuse': False,
+                },
+                'enable_chunked_prefill': False,
+            }
+        },
        # Llama-v3.3 models with fp8 quantization
        {
            'patterns': [
--- a/tests/integration/defs/perf/test_perf.py
+++ b/tests/integration/defs/perf/test_perf.py
@ -110,6 +110,7 @@ MODEL_PATH_DICT = {
    "deepseek_v3_lite_nvfp4": "DeepSeek-V3-Lite/nvfp4_moe_only",
    "qwen2_7b_instruct": "Qwen2-7B-Instruct",
    "qwen_14b_chat": "Qwen-14B-Chat",
+    "qwen3_4b_eagle3": "Qwen3/Qwen3-4B",
    "qwen3_235b_a22b_fp8": "Qwen3/saved_models_Qwen3-235B-A22B_fp8_hf",
    "qwen3_235b_a22b_fp4": "Qwen3/saved_models_Qwen3-235B-A22B_nvfp4_hf",
    "starcoder2_3b": "starcoder2-3b",
--- a/tests/integration/test_lists/qa/llm_perf_sanity.yml
+++ b/tests/integration/test_lists/qa/llm_perf_sanity.yml
@ -27,6 +27,7 @@ llm_perf_sanity:
  - perf/test_perf.py::test_perf[ministral_8b-bench-pytorch-bfloat16-input_output_len:500,2000-reqs:500-con:250]
  - perf/test_perf.py::test_perf[phi_4_mini_instruct-bench-pytorch-bfloat16-input_output_len:500,2000]
  - perf/test_perf.py::test_perf[nemotron_nano_9b_v2-bench-pytorch-bfloat16-input_output_len:512,512]
+  - perf/test_perf.py::test_perf[qwen3_4b_eagle3-bench-pytorch-streaming-bfloat16-maxbs:4-kv_frac:0.6-input_output_len:500,100-reqs:200-con:4]


 # FP8 specific tests