Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
yewentao256
2026-06-04 18:33:31 +00:00
parent 3b794074fc
commit 25ec32e1b9
2 changed files with 15 additions and 4 deletions
+8 -3
View File
@@ -283,9 +283,14 @@ def test_multiproc_executor_pipeline_parallel():
output_rank = executor._get_output_rank()
assert output_rank == 2, "Output rank should be 2 (first rank of last PP stage)"
# Verify max_concurrent_batches for pipeline parallel
assert vllm_config.max_concurrent_batches == 2, (
"Max concurrent batches should equal PP size"
# V2 model runner uses one extra batch to overlap async scheduling.
expected_concurrent_batches = 2 + int(
vllm_config.scheduler_config.async_scheduling
and vllm_config.use_v2_model_runner
)
assert vllm_config.max_concurrent_batches == expected_concurrent_batches, (
"Max concurrent batches should follow the configured PP/async "
"scheduling policy"
)
finally:
+7 -1
View File
@@ -83,7 +83,13 @@ def assert_executor(executor, tp_size, pp_size):
assert executor._get_output_rank() == expected_output_rank
if pp_size > 1:
assert executor.vllm_config.max_concurrent_batches == pp_size
expected_concurrent_batches = pp_size + int(
executor.vllm_config.scheduler_config.async_scheduling
and executor.vllm_config.use_v2_model_runner
)
assert (
executor.vllm_config.max_concurrent_batches == expected_concurrent_batches
)
executor.check_health()
assert not executor.is_failed