mirror of
https://github.com/vllm-project/vllm.git
synced 2026-06-06 00:16:14 +00:00
@@ -283,9 +283,14 @@ def test_multiproc_executor_pipeline_parallel():
|
||||
output_rank = executor._get_output_rank()
|
||||
assert output_rank == 2, "Output rank should be 2 (first rank of last PP stage)"
|
||||
|
||||
# Verify max_concurrent_batches for pipeline parallel
|
||||
assert vllm_config.max_concurrent_batches == 2, (
|
||||
"Max concurrent batches should equal PP size"
|
||||
# V2 model runner uses one extra batch to overlap async scheduling.
|
||||
expected_concurrent_batches = 2 + int(
|
||||
vllm_config.scheduler_config.async_scheduling
|
||||
and vllm_config.use_v2_model_runner
|
||||
)
|
||||
assert vllm_config.max_concurrent_batches == expected_concurrent_batches, (
|
||||
"Max concurrent batches should follow the configured PP/async "
|
||||
"scheduling policy"
|
||||
)
|
||||
|
||||
finally:
|
||||
|
||||
@@ -83,7 +83,13 @@ def assert_executor(executor, tp_size, pp_size):
|
||||
assert executor._get_output_rank() == expected_output_rank
|
||||
|
||||
if pp_size > 1:
|
||||
assert executor.vllm_config.max_concurrent_batches == pp_size
|
||||
expected_concurrent_batches = pp_size + int(
|
||||
executor.vllm_config.scheduler_config.async_scheduling
|
||||
and executor.vllm_config.use_v2_model_runner
|
||||
)
|
||||
assert (
|
||||
executor.vllm_config.max_concurrent_batches == expected_concurrent_batches
|
||||
)
|
||||
|
||||
executor.check_health()
|
||||
assert not executor.is_failed
|
||||
|
||||
Reference in New Issue
Block a user