diff --git a/cpp/tensorrt_llm/executor/cache_transmission/mooncake_utils/transferAgent.cpp b/cpp/tensorrt_llm/executor/cache_transmission/mooncake_utils/transferAgent.cpp index ce46e10351..bd956862ec 100644 --- a/cpp/tensorrt_llm/executor/cache_transmission/mooncake_utils/transferAgent.cpp +++ b/cpp/tensorrt_llm/executor/cache_transmission/mooncake_utils/transferAgent.cpp @@ -98,13 +98,14 @@ TransferState MooncakeTransferStatus::wait(int64_t timeout_ms) const mBatchFreed = true; TLLM_LOG_DEBUG("Batch ID %lu freed in wait()", mBatchId); syncSegmentCache(mEngine); + std::this_thread::sleep_for(std::chrono::milliseconds(1)); return TransferState::kSUCCESS; } // If timeout_ms < 0, wait indefinitely if (timeout_ms < 0) { - std::this_thread::yield(); + std::this_thread::sleep_for(std::chrono::milliseconds(1)); continue; } @@ -117,7 +118,7 @@ TransferState MooncakeTransferStatus::wait(int64_t timeout_ms) const return TransferState::kIN_PROGRESS; } - std::this_thread::yield(); + std::this_thread::sleep_for(std::chrono::milliseconds(1)); } } diff --git a/cpp/tests/unit_tests/multi_gpu/cacheTransceiverTest.cpp b/cpp/tests/unit_tests/multi_gpu/cacheTransceiverTest.cpp index 5e7528b8dd..8732d46cae 100644 --- a/cpp/tests/unit_tests/multi_gpu/cacheTransceiverTest.cpp +++ b/cpp/tests/unit_tests/multi_gpu/cacheTransceiverTest.cpp @@ -1304,6 +1304,11 @@ TEST_P(AsymmetricalCacheTest, TestCase) { GTEST_SKIP() << "Temporarily skipping cache transceiver tests with NIXL and MOONCAKE backend for CP."; } + if (isIndexerKCache && tensorrt_llm::common::getEnvUseMooncakeKvCache()) + { + // https://nvbugs/5760737 + GTEST_SKIP() << "Temporarily skipping cache transceiver tests with Mooncake backend for Indexer KCache."; + } std::vector lenList = {30, 10, 60, 80}; if (genCp > 1) { diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt index ddde25ba75..f907e8961f 100644 --- a/tests/integration/test_lists/waives.txt +++ b/tests/integration/test_lists/waives.txt @@ -252,7 +252,6 @@ examples/test_granite.py::test_llm_granite[granite-3.0-1b-a400m-instruct-bfloat1 examples/test_granite.py::test_llm_granite[granite-3.0-2b-instruct-bfloat16] SKIP (https://nvbugs/5608979) unittest/executor/test_base_worker.py::TestWorkerBase SKIP (https://nvbugs/5759698) triton_server/test_triton.py::test_gpt_disaggregated_serving_bls[gpt-disaggregated-serving-bls] SKIP (https://nvbugs/5582118) -cpp/test_multi_gpu.py::test_cache_transceiver[8proc-mooncake_kvcache-90] SKIP (https://nvbugs/5760737) accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_eagle3_4gpus[trtllm-two_model-no_overlap_scheduler] SKIP (https://nvbugs/5760747) unittest/_torch/multi_gpu/test_mnnvl_allreduce.py::test_row_linear_residual_norm_fusion[no_fusion-strategy:8-dtype:bfloat16-hidden:8192-seqlen:[15]] SKIP (https://nvbugs/5761364) accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=2-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False-enable_chunked_prefill=False] SKIP (https://nvbugs/5759338)