[https://nvbugs/5575902][fix] set max_batch_size=1 to stabilize accuracy test result (#8609)

Signed-off-by: Lizhi Zhou <1432185+reasonsolo@users.noreply.github.com>
2026-01-14 06:27:45 +08:00 · 2025-10-23 22:28:29 +08:00 · 2025-10-23 22:28:29 +08:00 · 686298d2d5
commit 686298d2d5
parent 4e11e0bd20
1 changed files with 4 additions and 1 deletions
--- a/tests/integration/defs/accuracy/test_disaggregated_serving.py
+++ b/tests/integration/defs/accuracy/test_disaggregated_serving.py
@ -953,12 +953,15 @@ class TestQwen3_8B(LlmapiAccuracyTestHarness):
            },
            "enable_chunked_prefill": True,
            "max_num_tokens": 256,
+            "max_batch_size":
+            1,  # max_batch_size=1 will stabilize the accuracy test result at a cost of speed
        }
        gen_server_config = {
            "cuda_graph_config": None,
            "cache_transceiver_config": {
                "backend": "DEFAULT"
-            }
+            },
+            "max_batch_size": 1,
        }
        disaggregated_server_config = {
            "hostname": "localhost",