[None][test] Update Qwen3-next accuracy testing by setting the cuda … (#9613)

Signed-off-by: nv-guomingz <137257613+nv-guomingz@users.noreply.github.com>
2026-01-14 06:27:45 +08:00 · 2025-12-03 20:52:53 +08:00 · 2025-12-03 20:52:53 +08:00 · 79e872de31
commit 79e872de31
parent 743486b2ea
1 changed files with 4 additions and 2 deletions
--- a/tests/integration/defs/accuracy/test_llm_api_pytorch.py
+++ b/tests/integration/defs/accuracy/test_llm_api_pytorch.py
@ -4392,7 +4392,8 @@ class TestQwen3NextInstruct(LlmapiAccuracyTestHarness):
                                        enable_block_reuse=False)
        pytorch_config = dict(disable_overlap_scheduler=not overlap_scheduler,
                              cuda_graph_config=CudaGraphConfig(
-                                  max_batch_size=512) if cuda_graph else None)
+                                  max_batch_size=512, enable_padding=True)
+                              if cuda_graph else None)

        with LLM(
                model_path,
@ -4427,7 +4428,8 @@ class TestQwen3NextInstruct(LlmapiAccuracyTestHarness):
                                        enable_block_reuse=False)
        pytorch_config = dict(disable_overlap_scheduler=not overlap_scheduler,
                              cuda_graph_config=CudaGraphConfig(
-                                  max_batch_size=512) if cuda_graph else None)
+                                  max_batch_size=512, enable_padding=True)
+                              if cuda_graph else None)
        moe_config = MoeConfig(backend=moe_backend)

        with LLM(model_path,