mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 22:18:36 +08:00
fix mtp rewind corner case
This commit is contained in:
parent
09d9878385
commit
526b3ca235
@ -2861,6 +2861,18 @@ void KVCacheManager::removeToken(RequestIdType requestId)
|
||||
|
||||
void KVCacheManager::rewindKVCache(RequestIdType requestId, SizeType32 rewindLengths)
|
||||
{
|
||||
// Check if the sequence still exists before rewinding
|
||||
// In overlap mode with MTP, the request may have been terminated and removed
|
||||
// from mSequences before rewindKVCache is called
|
||||
{
|
||||
std::scoped_lock lck(mSequencesMtx);
|
||||
if (mSequences.find(requestId) == mSequences.end())
|
||||
{
|
||||
TLLM_LOG_DEBUG("Request %lu has already been removed from KV cache manager, skipping rewind", requestId);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
for (SizeType32 si = 0; si < rewindLengths; ++si)
|
||||
{
|
||||
removeToken(requestId);
|
||||
|
||||
Loading…
Reference in New Issue
Block a user