mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-02-05 02:31:33 +08:00
[None][fix] Bugfix/mtp with async scheduler (#10941)
Signed-off-by: Patrice Castonguay <55748270+pcastonguay@users.noreply.github.com> Co-authored-by: rongwei <scutizhang@tencent.com>
This commit is contained in:
parent
6f07fa81d7
commit
d548b29a41
@ -2915,6 +2915,18 @@ void KVCacheManager::removeToken(RequestIdType requestId)
|
||||
|
||||
void KVCacheManager::rewindKVCache(RequestIdType requestId, SizeType32 rewindLengths)
|
||||
{
|
||||
// Check if the sequence still exists before rewinding
|
||||
// In overlap mode with MTP, the request may have been terminated and removed
|
||||
// from mSequences before rewindKVCache is called
|
||||
{
|
||||
std::scoped_lock lck(mSequencesMtx);
|
||||
if (mSequences.find(requestId) == mSequences.end())
|
||||
{
|
||||
TLLM_LOG_DEBUG("Request %lu has already been removed from KV cache manager, skipping rewind", requestId);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
for (SizeType32 si = 0; si < rewindLengths; ++si)
|
||||
{
|
||||
removeToken(requestId);
|
||||
|
||||
Loading…
Reference in New Issue
Block a user