mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
[https://nvbugs/5578175][fix] Fix block range index (#8470)
Signed-off-by: Chuang Zhu <111838961+chuangz0@users.noreply.github.com> Signed-off-by: Mike Iovine <6158008+mikeiovine@users.noreply.github.com> Signed-off-by: Mike Iovine <miovine@nvidia.com>
This commit is contained in:
parent
eca68e4465
commit
8846dac9b4
@ -2145,7 +2145,7 @@ SizeType32 KVCacheManager::getNeededBlocksOneStep(
|
||||
return 0;
|
||||
}
|
||||
|
||||
auto const numCurrTokens = mSequences.at(req.mRequestId).getNumTokens();
|
||||
auto const numCurrTokens = getSequence(req.mRequestId).getNumTokens();
|
||||
auto const generatedTokens = numCurrTokens - req.getPromptLen();
|
||||
auto const maxTokensToAddToKVCache = req.mMaxNewTokens - generatedTokens;
|
||||
auto const tokensPerStep = req.getNumDraftTokens() + 1;
|
||||
@ -2409,7 +2409,13 @@ void KVCacheManager::addSequence(
|
||||
void KVCacheManager::storeContextBlocks(LlmRequest const& llmRequest)
|
||||
{
|
||||
auto const requestId = llmRequest.mRequestId;
|
||||
if (mSequences.find(requestId) != mSequences.end())
|
||||
bool found = false;
|
||||
{
|
||||
// protect the mSequences
|
||||
std::scoped_lock lock(mSequencesMtx);
|
||||
found = mSequences.find(requestId) != mSequences.end();
|
||||
}
|
||||
if (found)
|
||||
{
|
||||
auto& sequence = getSequence(requestId);
|
||||
if (mEnableBlockReuse && !llmRequest.isDummyRequest())
|
||||
|
||||
Loading…
Reference in New Issue
Block a user