[https://nvbugs/5740377][fix] Lock resource to fix potential access to released data (#10827)

Signed-off-by: Hui Gao <huig@nvidia.com>
This commit is contained in:
HuiGao-NV 2026-01-21 14:17:29 +08:00 committed by GitHub
parent d60d6ff6fd
commit 1592dfab6d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 10 additions and 4 deletions

View File

@ -368,6 +368,9 @@ private:
std::optional<std::chrono::steady_clock::time_point::duration> mExpirationTime;
// Hash for the event manager
size_t mHash;
// Mutex for the next blocks
mutable std::mutex mNextBlocksMutex;
};
class GenerationRequest
@ -1021,7 +1024,7 @@ private:
std::shared_ptr<kv_connector::KvCacheConnectorManager> mKvCacheConnectorManager;
// Mutex for the cached blocks root
std::mutex mCachedBlocksRootMutex;
mutable std::mutex mCachedBlocksRootMutex;
// Record which sequence is using the block
std::map<KVCacheBlock::IdType, LlmRequest::RequestIdType> mBlockToSequence;

View File

@ -416,6 +416,7 @@ void KVCacheBlock::setPrevBlockInSeq(BlockPtr prevBlock)
void KVCacheBlock::addNextBlock(BlockKey const& blockKey, BlockPtr block)
{
std::lock_guard<std::mutex> lock(mNextBlocksMutex);
if (mNextBlocks.find(blockKey) == mNextBlocks.end())
{
mNextBlocks[blockKey] = std::move(block);
@ -425,6 +426,8 @@ void KVCacheBlock::addNextBlock(BlockKey const& blockKey, BlockPtr block)
std::tuple<bool, SizeType32, BlockPtr> KVCacheBlock::findMatchingBlock(
BlockKey const& blockKey, bool enablePartialReuse, bool copyOnPartialReuse) const
{
std::lock_guard<std::mutex> lock(mNextBlocksMutex);
if (blockKey.uniqueTokens.size() == 0 || mNextBlocks.size() == 0)
{
return {false, 0, nullptr};
@ -474,11 +477,13 @@ void KVCacheBlock::freeLeafBlock()
void KVCacheBlock::removeNextBlock(BlockKey const& blockKey)
{
std::lock_guard<std::mutex> lock(mNextBlocksMutex);
mNextBlocks.erase(blockKey);
}
void KVCacheBlock::freeDescendantsRecursively()
{
std::lock_guard<std::mutex> lock(mNextBlocksMutex);
bool hasChildren = !mNextBlocks.empty();
if (hasChildren)
{
@ -1176,6 +1181,7 @@ std::optional<BlockKey> WindowBlockManager::findNewContextBlock(
auto blockKeys = buildBlockKeys(blockedUniqueTokens, llmRequest);
BlockKey ret;
ret.loraTaskId = llmRequest.getLoraTaskId();
std::lock_guard<std::mutex> lock(mCachedBlocksRootMutex);
auto searchRoot = mCachedBlocksRoot;
for (auto const& blockKey : blockKeys)
{

View File

@ -239,13 +239,10 @@ full:sm89/accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_
accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_trtllm] SKIP (https://nvbugs/5721672)
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_4gpus[tp4-fp8kv=True-attn_backend=FLASHINFER-torch_compile=True] SKIP (https://nvbugs/5741304)
unittest/executor/test_rpc.py::TestRpcCorrectness::test_incremental_task_async SKIP (https://nvbugs/5741476)
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_4gpus[pp4-fp8kv=True-attn_backend=TRTLLM-torch_compile=False] SKIP (https://nvbugs/5740377)
accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[dep4_latency_moe_cutlass-torch_compile=False] SKIP (https://nvbugs/5740377)
test_e2e.py::test_trtllm_bench_llmapi_launch[pytorch_backend-llama-v3-llama3-8b] SKIP (https://nvbugs/5744432)
test_e2e.py::test_trtllm_serve_multimodal_example SKIP (https://nvbugs/5747920)
test_e2e.py::test_trtllm_serve_example SKIP (https://nvbugs/5747938)
triton_server/test_triton.py::test_opt[opt] SKIP (https://nvbugs/5739981)
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus_online_eplb[fp8kv=True-moe_backend=TRTLLM] SKIP (https://nvbugs/5740377)
cpp/test_multi_gpu.py::TestDisagg::test_symmetric_executor[gpt-2proc-mpi_kvcache-90] SKIP (https://nvbugs/5755941)
examples/test_granite.py::test_llm_granite[granite-3.0-1b-a400m-instruct-bfloat16] SKIP (https://nvbugs/5608979)
examples/test_granite.py::test_llm_granite[granite-3.0-2b-instruct-bfloat16] SKIP (https://nvbugs/5608979)