/* * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "tensorrt_llm/batch_manager/sequenceSlotManager.h" #include "tensorrt_llm/common/logger.h" namespace tensorrt_llm::batch_manager { SequenceSlotManager::SequenceSlotManager(SlotIdType maxNumSlots, uint64_t maxSequenceIdleMicroseconds) : mMaxNumSlots(maxNumSlots) , mMaxSequenceIdleMicroseconds{std::chrono::microseconds(maxSequenceIdleMicroseconds)} { mSequenceIdToSlot.reserve(maxNumSlots); for (SlotIdType slot = 0; slot < mMaxNumSlots; ++slot) { mAvailableSlots.emplace(slot); } mLastTimepoint.resize(mMaxNumSlots); } std::optional SequenceSlotManager::getSequenceSlot( bool const& startFlag, SequenceIdType const& sequenceId) { std::optional slot; if (startFlag) { // Check if correlation_id already exists if (mSequenceIdToSlot.find(sequenceId) != mSequenceIdToSlot.end()) { TLLM_LOG_ERROR("Already specified start flag for sequence id: %lu", sequenceId); } if (!mAvailableSlots.empty()) { slot = mAvailableSlots.front(); mAvailableSlots.pop(); mSequenceIdToSlot.emplace(sequenceId, slot.value()); } else { TLLM_LOG_ERROR("All available sequence slots are used"); } } else { auto const it = mSequenceIdToSlot.find(sequenceId); if (it == mSequenceIdToSlot.end()) { TLLM_LOG_ERROR("Could not find sequence id %lu in allocated sequence slots", sequenceId); } else { slot = it->second; } } if (slot) { mLastTimepoint[slot.value()] = std::chrono::steady_clock::now(); } return slot; } void SequenceSlotManager::freeSequenceSlot(SequenceIdType sequenceId) { auto const it = mSequenceIdToSlot.find(sequenceId); if (it != mSequenceIdToSlot.end()) { auto const slot = it->second; mSequenceIdToSlot.erase(it); mAvailableSlots.push(slot); } } void SequenceSlotManager::freeIdleSequenceSlots() { auto const now = std::chrono::steady_clock::now(); for (auto it = mSequenceIdToSlot.begin(); it != mSequenceIdToSlot.end();) { auto const& [sequenceId, slot] = *it; auto const idleMicroseconds = std::chrono::duration_cast(now - mLastTimepoint[slot]); if (idleMicroseconds > mMaxSequenceIdleMicroseconds) { TLLM_LOG_INFO("Releasing idle sequence with correlation id %lu idle time %li us", sequenceId, idleMicroseconds.count()); it = mSequenceIdToSlot.erase(it); mAvailableSlots.push(slot); } else { ++it; } } } } // namespace tensorrt_llm::batch_manager