mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
[https://nvbugs/5575902][fix] set max_batch_size=1 to stabilize accuracy test result (#8609)
Signed-off-by: Lizhi Zhou <1432185+reasonsolo@users.noreply.github.com>
This commit is contained in:
parent
4e11e0bd20
commit
686298d2d5
@ -953,12 +953,15 @@ class TestQwen3_8B(LlmapiAccuracyTestHarness):
|
||||
},
|
||||
"enable_chunked_prefill": True,
|
||||
"max_num_tokens": 256,
|
||||
"max_batch_size":
|
||||
1, # max_batch_size=1 will stabilize the accuracy test result at a cost of speed
|
||||
}
|
||||
gen_server_config = {
|
||||
"cuda_graph_config": None,
|
||||
"cache_transceiver_config": {
|
||||
"backend": "DEFAULT"
|
||||
}
|
||||
},
|
||||
"max_batch_size": 1,
|
||||
}
|
||||
disaggregated_server_config = {
|
||||
"hostname": "localhost",
|
||||
|
||||
Loading…
Reference in New Issue
Block a user