From 3991aa9c72145cf5908c574e60d200b788422519 Mon Sep 17 00:00:00 2001
From: Patrice Castonguay <55748270+pcastonguay@users.noreply.github.com>
Date: Tue, 2 Dec 2025 12:48:53 -0500
Subject: [PATCH] [https://nvbugs/5688388][fix] fix: Reducing num request in
 disagg test to speed up (#9598)

Signed-off-by: Patrice Castonguay <55748270+pcastonguay@users.noreply.github.com>
---
 .../defs/disaggregated/test_disaggregated_single_gpu.py       | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/integration/defs/disaggregated/test_disaggregated_single_gpu.py b/tests/integration/defs/disaggregated/test_disaggregated_single_gpu.py
index 570e499fb7..d6b63d3ab3 100644
--- a/tests/integration/defs/disaggregated/test_disaggregated_single_gpu.py
+++ b/tests/integration/defs/disaggregated/test_disaggregated_single_gpu.py
@@ -351,8 +351,8 @@ def test_disaggregated_llama_context_capacity(model, enable_cuda_graph,
             max_tokens = 25
 
             requests = []
-            # Send 256 requests to make sure the context worker is saturated
-            for _ in range(256):
+            # Send 32 requests to make sure the context worker is saturated
+            for _ in range(32):
                 requests.append(
                     (prompt, SamplingParams(max_tokens=1, ignore_eos=True),
                      DisaggregatedParams(request_type="context_only")))