From 3991aa9c72145cf5908c574e60d200b788422519 Mon Sep 17 00:00:00 2001 From: Patrice Castonguay <55748270+pcastonguay@users.noreply.github.com> Date: Tue, 2 Dec 2025 12:48:53 -0500 Subject: [PATCH] [https://nvbugs/5688388][fix] fix: Reducing num request in disagg test to speed up (#9598) Signed-off-by: Patrice Castonguay <55748270+pcastonguay@users.noreply.github.com> --- .../defs/disaggregated/test_disaggregated_single_gpu.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/defs/disaggregated/test_disaggregated_single_gpu.py b/tests/integration/defs/disaggregated/test_disaggregated_single_gpu.py index 570e499fb7..d6b63d3ab3 100644 --- a/tests/integration/defs/disaggregated/test_disaggregated_single_gpu.py +++ b/tests/integration/defs/disaggregated/test_disaggregated_single_gpu.py @@ -351,8 +351,8 @@ def test_disaggregated_llama_context_capacity(model, enable_cuda_graph, max_tokens = 25 requests = [] - # Send 256 requests to make sure the context worker is saturated - for _ in range(256): + # Send 32 requests to make sure the context worker is saturated + for _ in range(32): requests.append( (prompt, SamplingParams(max_tokens=1, ignore_eos=True), DisaggregatedParams(request_type="context_only")))