From 338d6e9f958e2ba06817187bbe36c83399fa1cf4 Mon Sep 17 00:00:00 2001
From: Dom Brown <3886319+DomBrown@users.noreply.github.com>
Date: Sat, 31 May 2025 12:21:06 +0100
Subject: [PATCH] [nvbug 5305210] fix: Resolve nvbug 5305210 (#4759)

Signed-off-by: Dom Brown <3886319+DomBrown@users.noreply.github.com>
---
 .../tensorrt_llm/batch_manager/llmRequest.h   | 28 +++++++++----------
 tests/integration/test_lists/waives.txt       |  5 ----
 2 files changed, 13 insertions(+), 20 deletions(-)

diff --git a/cpp/include/tensorrt_llm/batch_manager/llmRequest.h b/cpp/include/tensorrt_llm/batch_manager/llmRequest.h
index cc95100764..647df2348b 100644
--- a/cpp/include/tensorrt_llm/batch_manager/llmRequest.h
+++ b/cpp/include/tensorrt_llm/batch_manager/llmRequest.h
@@ -41,26 +41,24 @@ namespace tensorrt_llm::batch_manager
  * @brief The state of the request.
  *
  * Enum order must follow chronological order for state dependency check, @see hasReachedState().
- * Enum starts with kDISAGG are for disaggregated serving only.
  */
 enum class LlmRequestState : int32_t
 {
     kUNKNOWN = 0,                              ///< Unknown state
     kENCODER_INIT = 1,                         ///< Encoder phase starts (for encoder-decoder models)
-
-    kCONTEXT_INIT = 10,                        ///< Context phase starts
-    kDISAGG_CONTEXT_INIT_AND_TRANS = 11,       ///< Context phase starts and cache transmission is in progress,
-                                               /// used in layer-wise transmission
-    kDISAGG_CONTEXT_TRANS_IN_PROGRESS = 12,    ///< Waiting context-only request transmitting the kv cache,
-                                               /// after computation finished
-    kDISAGG_CONTEXT_COMPLETE = 13,             ///< Context-only request finished kv cache transmission.
-
-    kDISAGG_GENERATION_INIT = 20,              ///< New Generation request arrived at generation model
-    kDISAGG_GENERATION_TRANS_IN_PROGRESS = 21, ///< Transmitting the kv cache
-    kDISAGG_GENERATION_TRANS_COMPLETE = 22,    ///< Kv cache transmission are finished
-    kGENERATION_IN_PROGRESS = 23,              ///< Generation phase is in progress
-    kGENERATION_TO_COMPLETE = 24,              ///< Generation phase is to be completed
-    kGENERATION_COMPLETE = 25,                 ///< Generation phase completed
+    kCONTEXT_INIT = 2,                         ///< Context phase starts
+    kDISAGG_GENERATION_TRANS_COMPLETE = 3,     ///< For disaggrgated
+    kGENERATION_IN_PROGRESS = 4,               ///< Generation phase is in progress
+    kGENERATION_TO_COMPLETE = 5,               ///< Generation phase is to be completed
+    kGENERATION_COMPLETE = 6,                  ///< Generation phase completed
+    kDISAGG_GENERATION_INIT = 7,               ///< For disaggregated serving only:
+                                               /// new Generation request arrived at generation model
+    kDISAGG_CONTEXT_TRANS_IN_PROGRESS = 8,     ///< For disaggregated serving only:
+                                               /// Waiting context-only request transmitting the kv cache
+    kDISAGG_CONTEXT_COMPLETE = 9,              ///< Context-only request finished kv cache transmission.
+    kDISAGG_GENERATION_TRANS_IN_PROGRESS = 10, ///< For disaggregated serving only: transmitting the kv cache
+    kDISAGG_CONTEXT_INIT_AND_TRANS = 11,       ///< For disaggregated serving only:
+                                               /// Context phase starts and cache transmission is in progress
 };
 
 enum LlmRequestType
diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt
index dddf29f645..84b546cbeb 100644
--- a/tests/integration/test_lists/waives.txt
+++ b/tests/integration/test_lists/waives.txt
@@ -507,11 +507,6 @@ perf/test_perf.py::test_perf[starcoder2_3b-bench-pytorch-float16-input_output_le
 accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales_cuda_graph_padding SKIP (https://nvbugs/5303555)
 test_e2e.py::test_trtllm_bench_llmapi_launch[pytorch_backend-llama-v3-llama3-8b] SKIP (https://nvbugs/5302895)
 test_e2e.py::test_trtllm_bench_llmapi_launch[trt_backend-llama-v3-llama3-8b] SKIP (https://nvbugs/5302895)
-cpp/test_multi_gpu.py::TestDisagg::test_orchestrator_params[llama-mpi_kvcache-90] SKIP (https://nvbugs/5305210)
-cpp/test_multi_gpu.py::TestDisagg::test_orchestrator_params[llama-nixl_kvcache-90] SKIP (https://nvbugs/5305210)
-cpp/test_multi_gpu.py::TestDisagg::test_orchestrator_params[llama-ucx_kvcache-90] SKIP (https://nvbugs/5305210)
-cpp/test_multi_gpu.py::TestDisagg::test_spawn_orchestrator[llama-nixl_kvcache-90] SKIP (https://nvbugs/5305210)
-cpp/test_multi_gpu.py::TestDisagg::test_spawn_orchestrator[llama-ucx_kvcache-90] SKIP (https://nvbugs/5305210)
 accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_llm_decoder SKIP (https://nvbugs/5292517)
 accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_auto_dtype[False] SKIP (https://nvbugs/5303573)
 accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_auto_dtype[True] SKIP (https://nvbugs/5303573)