mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
[nvbug 5305210] fix: Resolve nvbug 5305210 (#4759)
Signed-off-by: Dom Brown <3886319+DomBrown@users.noreply.github.com>
This commit is contained in:
parent
a02df6aa4b
commit
338d6e9f95
@ -41,26 +41,24 @@ namespace tensorrt_llm::batch_manager
|
||||
* @brief The state of the request.
|
||||
*
|
||||
* Enum order must follow chronological order for state dependency check, @see hasReachedState().
|
||||
* Enum starts with kDISAGG are for disaggregated serving only.
|
||||
*/
|
||||
enum class LlmRequestState : int32_t
|
||||
{
|
||||
kUNKNOWN = 0, ///< Unknown state
|
||||
kENCODER_INIT = 1, ///< Encoder phase starts (for encoder-decoder models)
|
||||
|
||||
kCONTEXT_INIT = 10, ///< Context phase starts
|
||||
kDISAGG_CONTEXT_INIT_AND_TRANS = 11, ///< Context phase starts and cache transmission is in progress,
|
||||
/// used in layer-wise transmission
|
||||
kDISAGG_CONTEXT_TRANS_IN_PROGRESS = 12, ///< Waiting context-only request transmitting the kv cache,
|
||||
/// after computation finished
|
||||
kDISAGG_CONTEXT_COMPLETE = 13, ///< Context-only request finished kv cache transmission.
|
||||
|
||||
kDISAGG_GENERATION_INIT = 20, ///< New Generation request arrived at generation model
|
||||
kDISAGG_GENERATION_TRANS_IN_PROGRESS = 21, ///< Transmitting the kv cache
|
||||
kDISAGG_GENERATION_TRANS_COMPLETE = 22, ///< Kv cache transmission are finished
|
||||
kGENERATION_IN_PROGRESS = 23, ///< Generation phase is in progress
|
||||
kGENERATION_TO_COMPLETE = 24, ///< Generation phase is to be completed
|
||||
kGENERATION_COMPLETE = 25, ///< Generation phase completed
|
||||
kCONTEXT_INIT = 2, ///< Context phase starts
|
||||
kDISAGG_GENERATION_TRANS_COMPLETE = 3, ///< For disaggrgated
|
||||
kGENERATION_IN_PROGRESS = 4, ///< Generation phase is in progress
|
||||
kGENERATION_TO_COMPLETE = 5, ///< Generation phase is to be completed
|
||||
kGENERATION_COMPLETE = 6, ///< Generation phase completed
|
||||
kDISAGG_GENERATION_INIT = 7, ///< For disaggregated serving only:
|
||||
/// new Generation request arrived at generation model
|
||||
kDISAGG_CONTEXT_TRANS_IN_PROGRESS = 8, ///< For disaggregated serving only:
|
||||
/// Waiting context-only request transmitting the kv cache
|
||||
kDISAGG_CONTEXT_COMPLETE = 9, ///< Context-only request finished kv cache transmission.
|
||||
kDISAGG_GENERATION_TRANS_IN_PROGRESS = 10, ///< For disaggregated serving only: transmitting the kv cache
|
||||
kDISAGG_CONTEXT_INIT_AND_TRANS = 11, ///< For disaggregated serving only:
|
||||
/// Context phase starts and cache transmission is in progress
|
||||
};
|
||||
|
||||
enum LlmRequestType
|
||||
|
||||
@ -507,11 +507,6 @@ perf/test_perf.py::test_perf[starcoder2_3b-bench-pytorch-float16-input_output_le
|
||||
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales_cuda_graph_padding SKIP (https://nvbugs/5303555)
|
||||
test_e2e.py::test_trtllm_bench_llmapi_launch[pytorch_backend-llama-v3-llama3-8b] SKIP (https://nvbugs/5302895)
|
||||
test_e2e.py::test_trtllm_bench_llmapi_launch[trt_backend-llama-v3-llama3-8b] SKIP (https://nvbugs/5302895)
|
||||
cpp/test_multi_gpu.py::TestDisagg::test_orchestrator_params[llama-mpi_kvcache-90] SKIP (https://nvbugs/5305210)
|
||||
cpp/test_multi_gpu.py::TestDisagg::test_orchestrator_params[llama-nixl_kvcache-90] SKIP (https://nvbugs/5305210)
|
||||
cpp/test_multi_gpu.py::TestDisagg::test_orchestrator_params[llama-ucx_kvcache-90] SKIP (https://nvbugs/5305210)
|
||||
cpp/test_multi_gpu.py::TestDisagg::test_spawn_orchestrator[llama-nixl_kvcache-90] SKIP (https://nvbugs/5305210)
|
||||
cpp/test_multi_gpu.py::TestDisagg::test_spawn_orchestrator[llama-ucx_kvcache-90] SKIP (https://nvbugs/5305210)
|
||||
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_llm_decoder SKIP (https://nvbugs/5292517)
|
||||
accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_auto_dtype[False] SKIP (https://nvbugs/5303573)
|
||||
accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_auto_dtype[True] SKIP (https://nvbugs/5303573)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user