diff --git a/cpp/tensorrt_llm/batch_manager/kvCacheManager.cpp b/cpp/tensorrt_llm/batch_manager/kvCacheManager.cpp index 4138e4c605..f20019a0d1 100644 --- a/cpp/tensorrt_llm/batch_manager/kvCacheManager.cpp +++ b/cpp/tensorrt_llm/batch_manager/kvCacheManager.cpp @@ -2915,6 +2915,18 @@ void KVCacheManager::removeToken(RequestIdType requestId) void KVCacheManager::rewindKVCache(RequestIdType requestId, SizeType32 rewindLengths) { + // Check if the sequence still exists before rewinding + // In overlap mode with MTP, the request may have been terminated and removed + // from mSequences before rewindKVCache is called + { + std::scoped_lock lck(mSequencesMtx); + if (mSequences.find(requestId) == mSequences.end()) + { + TLLM_LOG_DEBUG("Request %lu has already been removed from KV cache manager, skipping rewind", requestId); + return; + } + } + for (SizeType32 si = 0; si < rewindLengths; ++si) { removeToken(requestId);