[nvbug 5305210] fix: Resolve nvbug 5305210 (#4759)

Signed-off-by: Dom Brown <3886319+DomBrown@users.noreply.github.com>
2026-01-14 06:27:45 +08:00 · 2025-05-31 12:21:06 +01:00 · 2025-05-31 12:21:06 +01:00 · 338d6e9f95
commit 338d6e9f95
parent a02df6aa4b
2 changed files with 13 additions and 20 deletions
--- a/cpp/include/tensorrt_llm/batch_manager/llmRequest.h
+++ b/cpp/include/tensorrt_llm/batch_manager/llmRequest.h
@ -41,26 +41,24 @@ namespace tensorrt_llm::batch_manager
 * @brief The state of the request.
 *
 * Enum order must follow chronological order for state dependency check, @see hasReachedState().
- * Enum starts with kDISAGG are for disaggregated serving only.
 */
 enum class LlmRequestState : int32_t
 {
    kUNKNOWN = 0,                              ///< Unknown state
    kENCODER_INIT = 1,                         ///< Encoder phase starts (for encoder-decoder models)
-
-    kCONTEXT_INIT = 10,                        ///< Context phase starts
-    kDISAGG_CONTEXT_INIT_AND_TRANS = 11,       ///< Context phase starts and cache transmission is in progress,
-                                               /// used in layer-wise transmission
-    kDISAGG_CONTEXT_TRANS_IN_PROGRESS = 12,    ///< Waiting context-only request transmitting the kv cache,
-                                               /// after computation finished
-    kDISAGG_CONTEXT_COMPLETE = 13,             ///< Context-only request finished kv cache transmission.
-
-    kDISAGG_GENERATION_INIT = 20,              ///< New Generation request arrived at generation model
-    kDISAGG_GENERATION_TRANS_IN_PROGRESS = 21, ///< Transmitting the kv cache
-    kDISAGG_GENERATION_TRANS_COMPLETE = 22,    ///< Kv cache transmission are finished
-    kGENERATION_IN_PROGRESS = 23,              ///< Generation phase is in progress
-    kGENERATION_TO_COMPLETE = 24,              ///< Generation phase is to be completed
-    kGENERATION_COMPLETE = 25,                 ///< Generation phase completed
+    kCONTEXT_INIT = 2,                         ///< Context phase starts
+    kDISAGG_GENERATION_TRANS_COMPLETE = 3,     ///< For disaggrgated
+    kGENERATION_IN_PROGRESS = 4,               ///< Generation phase is in progress
+    kGENERATION_TO_COMPLETE = 5,               ///< Generation phase is to be completed
+    kGENERATION_COMPLETE = 6,                  ///< Generation phase completed
+    kDISAGG_GENERATION_INIT = 7,               ///< For disaggregated serving only:
+                                               /// new Generation request arrived at generation model
+    kDISAGG_CONTEXT_TRANS_IN_PROGRESS = 8,     ///< For disaggregated serving only:
+                                               /// Waiting context-only request transmitting the kv cache
+    kDISAGG_CONTEXT_COMPLETE = 9,              ///< Context-only request finished kv cache transmission.
+    kDISAGG_GENERATION_TRANS_IN_PROGRESS = 10, ///< For disaggregated serving only: transmitting the kv cache
+    kDISAGG_CONTEXT_INIT_AND_TRANS = 11,       ///< For disaggregated serving only:
+                                               /// Context phase starts and cache transmission is in progress
 };

 enum LlmRequestType
--- a/tests/integration/test_lists/waives.txt
+++ b/tests/integration/test_lists/waives.txt
@ -507,11 +507,6 @@ perf/test_perf.py::test_perf[starcoder2_3b-bench-pytorch-float16-input_output_le
 accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales_cuda_graph_padding SKIP (https://nvbugs/5303555)
 test_e2e.py::test_trtllm_bench_llmapi_launch[pytorch_backend-llama-v3-llama3-8b] SKIP (https://nvbugs/5302895)
 test_e2e.py::test_trtllm_bench_llmapi_launch[trt_backend-llama-v3-llama3-8b] SKIP (https://nvbugs/5302895)
-cpp/test_multi_gpu.py::TestDisagg::test_orchestrator_params[llama-mpi_kvcache-90] SKIP (https://nvbugs/5305210)
-cpp/test_multi_gpu.py::TestDisagg::test_orchestrator_params[llama-nixl_kvcache-90] SKIP (https://nvbugs/5305210)
-cpp/test_multi_gpu.py::TestDisagg::test_orchestrator_params[llama-ucx_kvcache-90] SKIP (https://nvbugs/5305210)
-cpp/test_multi_gpu.py::TestDisagg::test_spawn_orchestrator[llama-nixl_kvcache-90] SKIP (https://nvbugs/5305210)
-cpp/test_multi_gpu.py::TestDisagg::test_spawn_orchestrator[llama-ucx_kvcache-90] SKIP (https://nvbugs/5305210)
 accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_llm_decoder SKIP (https://nvbugs/5292517)
 accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_auto_dtype[False] SKIP (https://nvbugs/5303573)
 accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_auto_dtype[True] SKIP (https://nvbugs/5303573)