diff --git a/cpp/tensorrt_llm/batch_manager/kvCacheManager.cpp b/cpp/tensorrt_llm/batch_manager/kvCacheManager.cpp index f20019a0d1..e533c00476 100644 --- a/cpp/tensorrt_llm/batch_manager/kvCacheManager.cpp +++ b/cpp/tensorrt_llm/batch_manager/kvCacheManager.cpp @@ -1636,6 +1636,11 @@ std::pair> WindowBlockManager::sto TLLM_CHECK_WITH_INFO(block->getBlockId() == bid, "Block id mismatch " + std::to_string(block->getBlockId()) + " != " + std::to_string(bid)); needMatch = false; // no matching needed for following blocks + + if (block->getPrevBlock() != nullptr) + { + block->getPrevBlock()->removeNextBlock(block->getBlockKey()); + } block->setBlockKey(blockKey, static_cast(blockKey.uniqueTokens.size()) == mTokensPerBlock); block->setPrevBlock(searchRoot); block->setPrevBlockInSeq(searchRoot);