[None][test] Update Qwen3-next accuracy testing by setting the cuda … (#9613)

Signed-off-by: nv-guomingz <137257613+nv-guomingz@users.noreply.github.com>
This commit is contained in:
Guoming Zhang 2025-12-03 20:52:53 +08:00 committed by GitHub
parent 743486b2ea
commit 79e872de31
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -4392,7 +4392,8 @@ class TestQwen3NextInstruct(LlmapiAccuracyTestHarness):
enable_block_reuse=False)
pytorch_config = dict(disable_overlap_scheduler=not overlap_scheduler,
cuda_graph_config=CudaGraphConfig(
max_batch_size=512) if cuda_graph else None)
max_batch_size=512, enable_padding=True)
if cuda_graph else None)
with LLM(
model_path,
@ -4427,7 +4428,8 @@ class TestQwen3NextInstruct(LlmapiAccuracyTestHarness):
enable_block_reuse=False)
pytorch_config = dict(disable_overlap_scheduler=not overlap_scheduler,
cuda_graph_config=CudaGraphConfig(
max_batch_size=512) if cuda_graph else None)
max_batch_size=512, enable_padding=True)
if cuda_graph else None)
moe_config = MoeConfig(backend=moe_backend)
with LLM(model_path,