From 79e872de31db35217c74ea0a7f4ed74f0e74af65 Mon Sep 17 00:00:00 2001
From: Guoming Zhang <137257613+nv-guomingz@users.noreply.github.com>
Date: Wed, 3 Dec 2025 20:52:53 +0800
Subject: [PATCH] =?UTF-8?q?[None][test]=20Update=20Qwen3-next=20accuracy?=
 =?UTF-8?q?=20testing=20by=20setting=20the=20cuda=20=E2=80=A6=20(#9613)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: nv-guomingz <137257613+nv-guomingz@users.noreply.github.com>
---
 tests/integration/defs/accuracy/test_llm_api_pytorch.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/integration/defs/accuracy/test_llm_api_pytorch.py b/tests/integration/defs/accuracy/test_llm_api_pytorch.py
index cad0007c8b..0d7e6d1748 100644
--- a/tests/integration/defs/accuracy/test_llm_api_pytorch.py
+++ b/tests/integration/defs/accuracy/test_llm_api_pytorch.py
@@ -4392,7 +4392,8 @@ class TestQwen3NextInstruct(LlmapiAccuracyTestHarness):
                                         enable_block_reuse=False)
         pytorch_config = dict(disable_overlap_scheduler=not overlap_scheduler,
                               cuda_graph_config=CudaGraphConfig(
-                                  max_batch_size=512) if cuda_graph else None)
+                                  max_batch_size=512, enable_padding=True)
+                              if cuda_graph else None)
 
         with LLM(
                 model_path,
@@ -4427,7 +4428,8 @@ class TestQwen3NextInstruct(LlmapiAccuracyTestHarness):
                                         enable_block_reuse=False)
         pytorch_config = dict(disable_overlap_scheduler=not overlap_scheduler,
                               cuda_graph_config=CudaGraphConfig(
-                                  max_batch_size=512) if cuda_graph else None)
+                                  max_batch_size=512, enable_padding=True)
+                              if cuda_graph else None)
         moe_config = MoeConfig(backend=moe_backend)
 
         with LLM(model_path,