diff --git a/cpp/tensorrt_llm/batch_manager/kvCacheManager.cpp b/cpp/tensorrt_llm/batch_manager/kvCacheManager.cpp index aaae98ff2a..b69db6d1bc 100644 --- a/cpp/tensorrt_llm/batch_manager/kvCacheManager.cpp +++ b/cpp/tensorrt_llm/batch_manager/kvCacheManager.cpp @@ -2145,7 +2145,7 @@ SizeType32 KVCacheManager::getNeededBlocksOneStep( return 0; } - auto const numCurrTokens = mSequences.at(req.mRequestId).getNumTokens(); + auto const numCurrTokens = getSequence(req.mRequestId).getNumTokens(); auto const generatedTokens = numCurrTokens - req.getPromptLen(); auto const maxTokensToAddToKVCache = req.mMaxNewTokens - generatedTokens; auto const tokensPerStep = req.getNumDraftTokens() + 1; @@ -2409,7 +2409,13 @@ void KVCacheManager::addSequence( void KVCacheManager::storeContextBlocks(LlmRequest const& llmRequest) { auto const requestId = llmRequest.mRequestId; - if (mSequences.find(requestId) != mSequences.end()) + bool found = false; + { + // protect the mSequences + std::scoped_lock lock(mSequencesMtx); + found = mSequences.find(requestId) != mSequences.end(); + } + if (found) { auto& sequence = getSequence(requestId); if (mEnableBlockReuse && !llmRequest.isDummyRequest())