From 338d6e9f958e2ba06817187bbe36c83399fa1cf4 Mon Sep 17 00:00:00 2001 From: Dom Brown <3886319+DomBrown@users.noreply.github.com> Date: Sat, 31 May 2025 12:21:06 +0100 Subject: [PATCH] [nvbug 5305210] fix: Resolve nvbug 5305210 (#4759) Signed-off-by: Dom Brown <3886319+DomBrown@users.noreply.github.com> --- .../tensorrt_llm/batch_manager/llmRequest.h | 28 +++++++++---------- tests/integration/test_lists/waives.txt | 5 ---- 2 files changed, 13 insertions(+), 20 deletions(-) diff --git a/cpp/include/tensorrt_llm/batch_manager/llmRequest.h b/cpp/include/tensorrt_llm/batch_manager/llmRequest.h index cc95100764..647df2348b 100644 --- a/cpp/include/tensorrt_llm/batch_manager/llmRequest.h +++ b/cpp/include/tensorrt_llm/batch_manager/llmRequest.h @@ -41,26 +41,24 @@ namespace tensorrt_llm::batch_manager * @brief The state of the request. * * Enum order must follow chronological order for state dependency check, @see hasReachedState(). - * Enum starts with kDISAGG are for disaggregated serving only. */ enum class LlmRequestState : int32_t { kUNKNOWN = 0, ///< Unknown state kENCODER_INIT = 1, ///< Encoder phase starts (for encoder-decoder models) - - kCONTEXT_INIT = 10, ///< Context phase starts - kDISAGG_CONTEXT_INIT_AND_TRANS = 11, ///< Context phase starts and cache transmission is in progress, - /// used in layer-wise transmission - kDISAGG_CONTEXT_TRANS_IN_PROGRESS = 12, ///< Waiting context-only request transmitting the kv cache, - /// after computation finished - kDISAGG_CONTEXT_COMPLETE = 13, ///< Context-only request finished kv cache transmission. - - kDISAGG_GENERATION_INIT = 20, ///< New Generation request arrived at generation model - kDISAGG_GENERATION_TRANS_IN_PROGRESS = 21, ///< Transmitting the kv cache - kDISAGG_GENERATION_TRANS_COMPLETE = 22, ///< Kv cache transmission are finished - kGENERATION_IN_PROGRESS = 23, ///< Generation phase is in progress - kGENERATION_TO_COMPLETE = 24, ///< Generation phase is to be completed - kGENERATION_COMPLETE = 25, ///< Generation phase completed + kCONTEXT_INIT = 2, ///< Context phase starts + kDISAGG_GENERATION_TRANS_COMPLETE = 3, ///< For disaggrgated + kGENERATION_IN_PROGRESS = 4, ///< Generation phase is in progress + kGENERATION_TO_COMPLETE = 5, ///< Generation phase is to be completed + kGENERATION_COMPLETE = 6, ///< Generation phase completed + kDISAGG_GENERATION_INIT = 7, ///< For disaggregated serving only: + /// new Generation request arrived at generation model + kDISAGG_CONTEXT_TRANS_IN_PROGRESS = 8, ///< For disaggregated serving only: + /// Waiting context-only request transmitting the kv cache + kDISAGG_CONTEXT_COMPLETE = 9, ///< Context-only request finished kv cache transmission. + kDISAGG_GENERATION_TRANS_IN_PROGRESS = 10, ///< For disaggregated serving only: transmitting the kv cache + kDISAGG_CONTEXT_INIT_AND_TRANS = 11, ///< For disaggregated serving only: + /// Context phase starts and cache transmission is in progress }; enum LlmRequestType diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt index dddf29f645..84b546cbeb 100644 --- a/tests/integration/test_lists/waives.txt +++ b/tests/integration/test_lists/waives.txt @@ -507,11 +507,6 @@ perf/test_perf.py::test_perf[starcoder2_3b-bench-pytorch-float16-input_output_le accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales_cuda_graph_padding SKIP (https://nvbugs/5303555) test_e2e.py::test_trtllm_bench_llmapi_launch[pytorch_backend-llama-v3-llama3-8b] SKIP (https://nvbugs/5302895) test_e2e.py::test_trtllm_bench_llmapi_launch[trt_backend-llama-v3-llama3-8b] SKIP (https://nvbugs/5302895) -cpp/test_multi_gpu.py::TestDisagg::test_orchestrator_params[llama-mpi_kvcache-90] SKIP (https://nvbugs/5305210) -cpp/test_multi_gpu.py::TestDisagg::test_orchestrator_params[llama-nixl_kvcache-90] SKIP (https://nvbugs/5305210) -cpp/test_multi_gpu.py::TestDisagg::test_orchestrator_params[llama-ucx_kvcache-90] SKIP (https://nvbugs/5305210) -cpp/test_multi_gpu.py::TestDisagg::test_spawn_orchestrator[llama-nixl_kvcache-90] SKIP (https://nvbugs/5305210) -cpp/test_multi_gpu.py::TestDisagg::test_spawn_orchestrator[llama-ucx_kvcache-90] SKIP (https://nvbugs/5305210) accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_llm_decoder SKIP (https://nvbugs/5292517) accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_auto_dtype[False] SKIP (https://nvbugs/5303573) accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_auto_dtype[True] SKIP (https://nvbugs/5303573)