[None][fix] Bugfix/mtp with async scheduler (#10941)

Signed-off-by: Patrice Castonguay <55748270+pcastonguay@users.noreply.github.com> Co-authored-by: rongwei <scutizhang@tencent.com>
2026-02-05 02:31:33 +08:00 · 2026-01-24 07:19:54 -05:00 · 2026-01-24 07:19:54 -05:00 · d548b29a41
commit d548b29a41
parent 6f07fa81d7
1 changed files with 12 additions and 0 deletions
--- a/cpp/tensorrt_llm/batch_manager/kvCacheManager.cpp
+++ b/cpp/tensorrt_llm/batch_manager/kvCacheManager.cpp
@ -2915,6 +2915,18 @@ void KVCacheManager::removeToken(RequestIdType requestId)

 void KVCacheManager::rewindKVCache(RequestIdType requestId, SizeType32 rewindLengths)
 {
+    // Check if the sequence still exists before rewinding
+    // In overlap mode with MTP, the request may have been terminated and removed
+    // from mSequences before rewindKVCache is called
+    {
+        std::scoped_lock lck(mSequencesMtx);
+        if (mSequences.find(requestId) == mSequences.end())
+        {
+            TLLM_LOG_DEBUG("Request %lu has already been removed from KV cache manager, skipping rewind", requestId);
+            return;
+        }
+    }
+
    for (SizeType32 si = 0; si < rewindLengths; ++si)
    {
        removeToken(requestId);