From 11d79aa875e6ff964357b10fea3abb251f040292 Mon Sep 17 00:00:00 2001
From: JennyLiu <141791095+JennyLiu-nv@users.noreply.github.com>
Date: Thu, 12 Feb 2026 18:33:39 +0800
Subject: [PATCH] [https://nvbugs/5832481][test] Add
 gpt-oss-120b-Eagle3-throughput case on DGX-Spark (#11419)

Signed-off-by: Jenny Liu <JennyLiu-nv+JennyLiu@users.noreply.github.com>
Co-authored-by: Jenny Liu <JennyLiu-nv+JennyLiu@users.noreply.github.com>
---
 .../defs/perf/pytorch_model_config.py         | 33 ++++++++++++++++---
 tests/integration/defs/perf/test_perf.py      |  3 +-
 .../test_lists/qa/llm_spark_perf.yml          |  6 ++--
 3 files changed, 34 insertions(+), 8 deletions(-)

diff --git a/tests/integration/defs/perf/pytorch_model_config.py b/tests/integration/defs/perf/pytorch_model_config.py
index db2d6a12b5..25337faa8a 100644
--- a/tests/integration/defs/perf/pytorch_model_config.py
+++ b/tests/integration/defs/perf/pytorch_model_config.py
@@ -320,10 +320,10 @@ def get_model_yaml_config(model_label: str,
                 'num_postprocess_workers': 4
             }
         },
-        # GPT-OSS 120B speculative decoding (Eagle3 draft)
+        # GPT-OSS 120B speculative decoding with Eagle3
         {
             'patterns': [
-                'gpt_oss_120b_fp4-bench-pytorch-streaming-float4-maxbs:1-maxnt:4096-input_output_len:2048,128-reqs:1-con:1',
+                'gpt_oss_120b_eagle3-bench-pytorch',
             ],
             'config': {
                 'enable_attention_dp': False,
@@ -337,9 +337,34 @@ def get_model_yaml_config(model_label: str,
                     'decoding_type':
                     'Eagle',
                     'max_draft_len':
-                    5,
+                    3,
                     'speculative_model_dir':
-                    f"{llm_models_root()}/gpt_oss/gpt-oss-120b-Eagle3",
+                    f'{llm_models_root()}/gpt_oss/gpt-oss-120b-Eagle3',
+                },
+                'kv_cache_config': {
+                    'enable_block_reuse': False,
+                },
+            }
+        },
+        # GPT-OSS 120B speculative decoding with Eagle3-throughput (https://nvbugspro.nvidia.com/bug/5832481)
+        {
+            'patterns': [
+                'gpt_oss_120b_eagle3_throughput-bench-pytorch',
+            ],
+            'config': {
+                'enable_attention_dp': False,
+                'disable_overlap_scheduler': True,
+                'enable_autotuner': False,
+                'cuda_graph_config': {
+                    'enable_padding': True,
+                },
+                'speculative_config': {
+                    'decoding_type':
+                    'Eagle',
+                    'max_draft_len':
+                    3,
+                    'speculative_model_dir':
+                    f'{llm_models_root()}/gpt_oss/gpt-oss-120b-Eagle3-throughput',
                 },
                 'kv_cache_config': {
                     'enable_block_reuse': False,
diff --git a/tests/integration/defs/perf/test_perf.py b/tests/integration/defs/perf/test_perf.py
index 3695cf7e29..120df0f439 100644
--- a/tests/integration/defs/perf/test_perf.py
+++ b/tests/integration/defs/perf/test_perf.py
@@ -173,7 +173,8 @@ MODEL_PATH_DICT = {
     "mistral_small_v3.1_24b": "Mistral-Small-3.1-24B-Instruct-2503",
     "gpt_oss_120b_fp4": "gpt_oss/gpt-oss-120b",
     "gpt_oss_20b_fp4": "gpt_oss/gpt-oss-20b",
-    "gpt_oss_120b_eagle3": "gpt_oss/gpt-oss-120b-Eagle3",
+    "gpt_oss_120b_eagle3": "gpt_oss/gpt-oss-120b",
+    "gpt_oss_120b_eagle3_throughput": "gpt_oss/gpt-oss-120b",
     "nemotron_nano_3_30b_fp8": "Nemotron-Nano-3-30B-A3.5B-FP8-KVFP8-dev",
     "nemotron_nano_12b_v2": "NVIDIA-Nemotron-Nano-12B-v2",
     "nvidia_nemotron_nano_9b_v2_nvfp4": "NVIDIA-Nemotron-Nano-9B-v2-NVFP4",
diff --git a/tests/integration/test_lists/qa/llm_spark_perf.yml b/tests/integration/test_lists/qa/llm_spark_perf.yml
index 713192e93c..904bb30f17 100644
--- a/tests/integration/test_lists/qa/llm_spark_perf.yml
+++ b/tests/integration/test_lists/qa/llm_spark_perf.yml
@@ -10,10 +10,10 @@ llm_spark_perf:
         lte: 1
   tests:
   - perf/test_perf.py::test_perf[gpt_oss_20b_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
-  # GPT-OSS 120B normal case (no spec dec)
   - perf/test_perf.py::test_perf[gpt_oss_120b_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
-  # GPT-OSS 120B spec dec case (Eagle3)
-  - perf/test_perf.py::test_perf[gpt_oss_120b_fp4-bench-pytorch-streaming-float4-maxbs:1-maxnt:4096-input_output_len:2048,128-reqs:1-con:1]
+  - perf/test_perf.py::test_perf[gpt_oss_120b_eagle3-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
+  # GPT-OSS 120B spec dec case (Eagle3-throughput) - https://nvbugspro.nvidia.com/bug/5832481
+  - perf/test_perf.py::test_perf[gpt_oss_120b_eagle3_throughput-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
   - perf/test_perf.py::test_perf[nvidia_nemotron_nano_9b_v2_nvfp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
   - perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-streaming-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
   - perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]