[https://nvbugs/5578175][fix] Fix block range index (#8470)

Signed-off-by: Chuang Zhu <111838961+chuangz0@users.noreply.github.com>
Signed-off-by: Mike Iovine <6158008+mikeiovine@users.noreply.github.com>
Signed-off-by: Mike Iovine <miovine@nvidia.com>
This commit is contained in:
Chuang Zhu 2025-10-29 02:42:23 +08:00 committed by Mike Iovine
parent eca68e4465
commit 8846dac9b4

View File

@ -2145,7 +2145,7 @@ SizeType32 KVCacheManager::getNeededBlocksOneStep(
return 0;
}
auto const numCurrTokens = mSequences.at(req.mRequestId).getNumTokens();
auto const numCurrTokens = getSequence(req.mRequestId).getNumTokens();
auto const generatedTokens = numCurrTokens - req.getPromptLen();
auto const maxTokensToAddToKVCache = req.mMaxNewTokens - generatedTokens;
auto const tokensPerStep = req.getNumDraftTokens() + 1;
@ -2409,7 +2409,13 @@ void KVCacheManager::addSequence(
void KVCacheManager::storeContextBlocks(LlmRequest const& llmRequest)
{
auto const requestId = llmRequest.mRequestId;
if (mSequences.find(requestId) != mSequences.end())
bool found = false;
{
// protect the mSequences
std::scoped_lock lock(mSequencesMtx);
found = mSequences.find(requestId) != mSequences.end();
}
if (found)
{
auto& sequence = getSequence(requestId);
if (mEnableBlockReuse && !llmRequest.isDummyRequest())