diff --git a/tests/integration/defs/disaggregated/test_disaggregated_single_gpu.py b/tests/integration/defs/disaggregated/test_disaggregated_single_gpu.py index 570e499fb7..d6b63d3ab3 100644 --- a/tests/integration/defs/disaggregated/test_disaggregated_single_gpu.py +++ b/tests/integration/defs/disaggregated/test_disaggregated_single_gpu.py @@ -351,8 +351,8 @@ def test_disaggregated_llama_context_capacity(model, enable_cuda_graph, max_tokens = 25 requests = [] - # Send 256 requests to make sure the context worker is saturated - for _ in range(256): + # Send 32 requests to make sure the context worker is saturated + for _ in range(32): requests.append( (prompt, SamplingParams(max_tokens=1, ignore_eos=True), DisaggregatedParams(request_type="context_only")))