TensorRT-LLMs/cpp/tensorrt_llm/batch_manager/sequenceSlotManager.cpp
Robin Kobus 6d4b045d1f
refactor: Remove enforced sorted order of batch slots (#3502)
Signed-off-by: Robin Kobus <19427718+Funatiq@users.noreply.github.com>
2025-07-14 17:23:02 +02:00

111 lines
3.4 KiB
C++

/*
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "tensorrt_llm/batch_manager/sequenceSlotManager.h"
#include "tensorrt_llm/common/logger.h"
namespace tensorrt_llm::batch_manager
{
SequenceSlotManager::SequenceSlotManager(SlotIdType maxNumSlots, uint64_t maxSequenceIdleMicroseconds)
: mMaxNumSlots(maxNumSlots)
, mMaxSequenceIdleMicroseconds{std::chrono::microseconds(maxSequenceIdleMicroseconds)}
{
mSequenceIdToSlot.reserve(maxNumSlots);
for (SlotIdType slot = 0; slot < mMaxNumSlots; ++slot)
{
mAvailableSlots.emplace(slot);
}
mLastTimepoint.resize(mMaxNumSlots);
}
std::optional<SequenceSlotManager::SlotIdType> SequenceSlotManager::getSequenceSlot(
bool const& startFlag, SequenceIdType const& sequenceId)
{
std::optional<SlotIdType> slot;
if (startFlag)
{
// Check if correlation_id already exists
if (mSequenceIdToSlot.find(sequenceId) != mSequenceIdToSlot.end())
{
TLLM_LOG_ERROR("Already specified start flag for sequence id: %lu", sequenceId);
}
if (!mAvailableSlots.empty())
{
slot = mAvailableSlots.front();
mAvailableSlots.pop();
mSequenceIdToSlot.emplace(sequenceId, slot.value());
}
else
{
TLLM_LOG_ERROR("All available sequence slots are used");
}
}
else
{
auto const it = mSequenceIdToSlot.find(sequenceId);
if (it == mSequenceIdToSlot.end())
{
TLLM_LOG_ERROR("Could not find sequence id %lu in allocated sequence slots", sequenceId);
}
else
{
slot = it->second;
}
}
if (slot)
{
mLastTimepoint[slot.value()] = std::chrono::steady_clock::now();
}
return slot;
}
void SequenceSlotManager::freeSequenceSlot(SequenceIdType sequenceId)
{
auto const it = mSequenceIdToSlot.find(sequenceId);
if (it != mSequenceIdToSlot.end())
{
auto const slot = it->second;
mSequenceIdToSlot.erase(it);
mAvailableSlots.push(slot);
}
}
void SequenceSlotManager::freeIdleSequenceSlots()
{
auto const now = std::chrono::steady_clock::now();
for (auto it = mSequenceIdToSlot.begin(); it != mSequenceIdToSlot.end();)
{
auto const& [sequenceId, slot] = *it;
auto const idleMicroseconds = std::chrono::duration_cast<std::chrono::microseconds>(now - mLastTimepoint[slot]);
if (idleMicroseconds > mMaxSequenceIdleMicroseconds)
{
TLLM_LOG_INFO("Releasing idle sequence with correlation id %lu idle time %li us", sequenceId,
idleMicroseconds.count());
it = mSequenceIdToSlot.erase(it);
mAvailableSlots.push(slot);
}
else
{
++it;
}
}
}
} // namespace tensorrt_llm::batch_manager