mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
106 lines
3.6 KiB
C++
106 lines
3.6 KiB
C++
/*
|
|
* Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
#include "userbuffersManager.h"
|
|
#include "tensorrt_llm/common/logger.h"
|
|
|
|
namespace tensorrt_llm::runtime::ub
|
|
{
|
|
|
|
void UserBufferDeleter::operator()(void* ptr)
|
|
{
|
|
UserBuffersManager::get_instance().release_buffer(ptr);
|
|
}
|
|
|
|
UserBuffersManager& UserBuffersManager::get_instance()
|
|
{
|
|
static UserBuffersManager allocator;
|
|
return allocator;
|
|
}
|
|
|
|
void UserBuffersManager::initialize(
|
|
int64_t tp_size, int64_t pp_size, int64_t cp_size, int64_t rank, int64_t gpus_per_node, int64_t buffer_size)
|
|
{
|
|
std::lock_guard<std::mutex> lock(mutex_);
|
|
tensorrt_llm::runtime::WorldConfig world_config(tp_size, pp_size, cp_size, rank, gpus_per_node);
|
|
tensorrt_llm::runtime::ub::ub_initialize(world_config);
|
|
TLLM_CHECK(tensorrt_llm::runtime::ub::ub_is_initialized());
|
|
buffer_size_ = buffer_size;
|
|
}
|
|
|
|
std::pair<UBBufferPtr, UBBuffer> UserBuffersManager::allocate_userbuffers(int64_t buffer_size)
|
|
{
|
|
std::lock_guard<std::mutex> lock(mutex_);
|
|
TLLM_CHECK(buffer_size <= buffer_size_);
|
|
|
|
// Check for all unused buffers
|
|
int i = 0;
|
|
for (auto& buffer : buffers_)
|
|
{
|
|
if (buffer.second)
|
|
{
|
|
i++;
|
|
continue;
|
|
}
|
|
buffer.second = true;
|
|
TLLM_LOG_DEBUG("Reusing buffer %d", i);
|
|
return std::make_pair(std::unique_ptr<void, UserBufferDeleter>(buffer.first.addr), buffer.first);
|
|
}
|
|
|
|
auto new_ub = tensorrt_llm::runtime::ub::ub_allocate(buffer_size_);
|
|
TLLM_CHECK(!new_ub.invalid());
|
|
buffers_.push_back({new_ub, true});
|
|
TLLM_LOG_DEBUG("Creating new buffer %d", static_cast<int>(buffers_.size() - 1));
|
|
return std::make_pair(std::unique_ptr<void, UserBufferDeleter>(new_ub.addr), new_ub);
|
|
}
|
|
|
|
void UserBuffersManager::release_buffer(void* addr)
|
|
{
|
|
std::lock_guard<std::mutex> lock(mutex_);
|
|
auto buffer_iter = std::find_if(
|
|
buffers_.begin(), buffers_.end(), [addr](auto const& buffer) { return buffer.first.addr == addr; });
|
|
// The UB should be assigned to a tensor
|
|
TLLM_CHECK(buffer_iter != buffers_.end());
|
|
TLLM_CHECK(buffer_iter->second);
|
|
TLLM_CHECK(!buffer_iter->first.invalid());
|
|
TLLM_LOG_DEBUG("Releasing buffer %d", static_cast<int>(std::distance(buffers_.begin(), buffer_iter)));
|
|
buffer_iter->second = false;
|
|
}
|
|
|
|
tensorrt_llm::runtime::ub::UBBuffer UserBuffersManager::search_buffer(void* addr)
|
|
{
|
|
std::lock_guard<std::mutex> lock(mutex_);
|
|
auto buffer_iter = std::find_if(
|
|
buffers_.begin(), buffers_.end(), [addr](auto const& buffer) { return buffer.first.addr == addr; });
|
|
if (buffer_iter == buffers_.end())
|
|
{
|
|
return tensorrt_llm::runtime::ub::UBBuffer();
|
|
}
|
|
return buffer_iter->first;
|
|
}
|
|
|
|
tensorrt_llm::runtime::ub::communicator* UserBuffersManager::comm()
|
|
{
|
|
return tensorrt_llm::runtime::ub::ub_comm();
|
|
}
|
|
|
|
void initialize_userbuffers_manager(
|
|
int64_t tp_size, int64_t pp_size, int64_t cp_size, int64_t rank, int64_t gpus_per_node, int64_t buffer_size)
|
|
{
|
|
UserBuffersManager::get_instance().initialize(tp_size, pp_size, cp_size, rank, gpus_per_node, buffer_size);
|
|
}
|
|
|
|
} // namespace tensorrt_llm::runtime::ub
|