[nvbug 5305210] fix: Resolve nvbug 5305210 (#4759)

Signed-off-by: Dom Brown <3886319+DomBrown@users.noreply.github.com>
This commit is contained in:
Dom Brown 2025-05-31 12:21:06 +01:00 committed by GitHub
parent a02df6aa4b
commit 338d6e9f95
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 13 additions and 20 deletions

View File

@ -41,26 +41,24 @@ namespace tensorrt_llm::batch_manager
* @brief The state of the request.
*
* Enum order must follow chronological order for state dependency check, @see hasReachedState().
* Enum starts with kDISAGG are for disaggregated serving only.
*/
enum class LlmRequestState : int32_t
{
kUNKNOWN = 0, ///< Unknown state
kENCODER_INIT = 1, ///< Encoder phase starts (for encoder-decoder models)
kCONTEXT_INIT = 10, ///< Context phase starts
kDISAGG_CONTEXT_INIT_AND_TRANS = 11, ///< Context phase starts and cache transmission is in progress,
/// used in layer-wise transmission
kDISAGG_CONTEXT_TRANS_IN_PROGRESS = 12, ///< Waiting context-only request transmitting the kv cache,
/// after computation finished
kDISAGG_CONTEXT_COMPLETE = 13, ///< Context-only request finished kv cache transmission.
kDISAGG_GENERATION_INIT = 20, ///< New Generation request arrived at generation model
kDISAGG_GENERATION_TRANS_IN_PROGRESS = 21, ///< Transmitting the kv cache
kDISAGG_GENERATION_TRANS_COMPLETE = 22, ///< Kv cache transmission are finished
kGENERATION_IN_PROGRESS = 23, ///< Generation phase is in progress
kGENERATION_TO_COMPLETE = 24, ///< Generation phase is to be completed
kGENERATION_COMPLETE = 25, ///< Generation phase completed
kCONTEXT_INIT = 2, ///< Context phase starts
kDISAGG_GENERATION_TRANS_COMPLETE = 3, ///< For disaggrgated
kGENERATION_IN_PROGRESS = 4, ///< Generation phase is in progress
kGENERATION_TO_COMPLETE = 5, ///< Generation phase is to be completed
kGENERATION_COMPLETE = 6, ///< Generation phase completed
kDISAGG_GENERATION_INIT = 7, ///< For disaggregated serving only:
/// new Generation request arrived at generation model
kDISAGG_CONTEXT_TRANS_IN_PROGRESS = 8, ///< For disaggregated serving only:
/// Waiting context-only request transmitting the kv cache
kDISAGG_CONTEXT_COMPLETE = 9, ///< Context-only request finished kv cache transmission.
kDISAGG_GENERATION_TRANS_IN_PROGRESS = 10, ///< For disaggregated serving only: transmitting the kv cache
kDISAGG_CONTEXT_INIT_AND_TRANS = 11, ///< For disaggregated serving only:
/// Context phase starts and cache transmission is in progress
};
enum LlmRequestType

View File

@ -507,11 +507,6 @@ perf/test_perf.py::test_perf[starcoder2_3b-bench-pytorch-float16-input_output_le
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales_cuda_graph_padding SKIP (https://nvbugs/5303555)
test_e2e.py::test_trtllm_bench_llmapi_launch[pytorch_backend-llama-v3-llama3-8b] SKIP (https://nvbugs/5302895)
test_e2e.py::test_trtllm_bench_llmapi_launch[trt_backend-llama-v3-llama3-8b] SKIP (https://nvbugs/5302895)
cpp/test_multi_gpu.py::TestDisagg::test_orchestrator_params[llama-mpi_kvcache-90] SKIP (https://nvbugs/5305210)
cpp/test_multi_gpu.py::TestDisagg::test_orchestrator_params[llama-nixl_kvcache-90] SKIP (https://nvbugs/5305210)
cpp/test_multi_gpu.py::TestDisagg::test_orchestrator_params[llama-ucx_kvcache-90] SKIP (https://nvbugs/5305210)
cpp/test_multi_gpu.py::TestDisagg::test_spawn_orchestrator[llama-nixl_kvcache-90] SKIP (https://nvbugs/5305210)
cpp/test_multi_gpu.py::TestDisagg::test_spawn_orchestrator[llama-ucx_kvcache-90] SKIP (https://nvbugs/5305210)
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_llm_decoder SKIP (https://nvbugs/5292517)
accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_auto_dtype[False] SKIP (https://nvbugs/5303573)
accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_auto_dtype[True] SKIP (https://nvbugs/5303573)