From 79e872de31db35217c74ea0a7f4ed74f0e74af65 Mon Sep 17 00:00:00 2001 From: Guoming Zhang <137257613+nv-guomingz@users.noreply.github.com> Date: Wed, 3 Dec 2025 20:52:53 +0800 Subject: [PATCH] =?UTF-8?q?[None][test]=20Update=20Qwen3-next=20accuracy?= =?UTF-8?q?=20testing=20by=20setting=20the=20cuda=20=E2=80=A6=20(#9613)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: nv-guomingz <137257613+nv-guomingz@users.noreply.github.com> --- tests/integration/defs/accuracy/test_llm_api_pytorch.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/integration/defs/accuracy/test_llm_api_pytorch.py b/tests/integration/defs/accuracy/test_llm_api_pytorch.py index cad0007c8b..0d7e6d1748 100644 --- a/tests/integration/defs/accuracy/test_llm_api_pytorch.py +++ b/tests/integration/defs/accuracy/test_llm_api_pytorch.py @@ -4392,7 +4392,8 @@ class TestQwen3NextInstruct(LlmapiAccuracyTestHarness): enable_block_reuse=False) pytorch_config = dict(disable_overlap_scheduler=not overlap_scheduler, cuda_graph_config=CudaGraphConfig( - max_batch_size=512) if cuda_graph else None) + max_batch_size=512, enable_padding=True) + if cuda_graph else None) with LLM( model_path, @@ -4427,7 +4428,8 @@ class TestQwen3NextInstruct(LlmapiAccuracyTestHarness): enable_block_reuse=False) pytorch_config = dict(disable_overlap_scheduler=not overlap_scheduler, cuda_graph_config=CudaGraphConfig( - max_batch_size=512) if cuda_graph else None) + max_batch_size=512, enable_padding=True) + if cuda_graph else None) moe_config = MoeConfig(backend=moe_backend) with LLM(model_path,