From d548b29a4110d94b35e8fe1b60c1832818f71885 Mon Sep 17 00:00:00 2001 From: Patrice Castonguay <55748270+pcastonguay@users.noreply.github.com> Date: Sat, 24 Jan 2026 07:19:54 -0500 Subject: [PATCH] [None][fix] Bugfix/mtp with async scheduler (#10941) Signed-off-by: Patrice Castonguay <55748270+pcastonguay@users.noreply.github.com> Co-authored-by: rongwei --- cpp/tensorrt_llm/batch_manager/kvCacheManager.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/cpp/tensorrt_llm/batch_manager/kvCacheManager.cpp b/cpp/tensorrt_llm/batch_manager/kvCacheManager.cpp index 4138e4c605..f20019a0d1 100644 --- a/cpp/tensorrt_llm/batch_manager/kvCacheManager.cpp +++ b/cpp/tensorrt_llm/batch_manager/kvCacheManager.cpp @@ -2915,6 +2915,18 @@ void KVCacheManager::removeToken(RequestIdType requestId) void KVCacheManager::rewindKVCache(RequestIdType requestId, SizeType32 rewindLengths) { + // Check if the sequence still exists before rewinding + // In overlap mode with MTP, the request may have been terminated and removed + // from mSequences before rewindKVCache is called + { + std::scoped_lock lck(mSequencesMtx); + if (mSequences.find(requestId) == mSequences.end()) + { + TLLM_LOG_DEBUG("Request %lu has already been removed from KV cache manager, skipping rewind", requestId); + return; + } + } + for (SizeType32 si = 0; si < rewindLengths; ++si) { removeToken(requestId);