[https://nvbugs/5575902][fix] set max_batch_size=1 to stabilize accuracy test result (#8609)

Signed-off-by: Lizhi Zhou <1432185+reasonsolo@users.noreply.github.com>
This commit is contained in:
Lizhi Zhou 2025-10-23 22:28:29 +08:00 committed by GitHub
parent 4e11e0bd20
commit 686298d2d5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -953,12 +953,15 @@ class TestQwen3_8B(LlmapiAccuracyTestHarness):
},
"enable_chunked_prefill": True,
"max_num_tokens": 256,
"max_batch_size":
1, # max_batch_size=1 will stabilize the accuracy test result at a cost of speed
}
gen_server_config = {
"cuda_graph_config": None,
"cache_transceiver_config": {
"backend": "DEFAULT"
}
},
"max_batch_size": 1,
}
disaggregated_server_config = {
"hostname": "localhost",