TensorRT-LLMs/cpp/tensorrt_llm/executor/requestWithId.cpp
2025-03-11 21:13:42 +08:00

75 lines
2.9 KiB
C++

/*
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "requestWithId.h"
#include "tensorrt_llm/executor/serializeUtils.h"
#include <istream>
#include <ostream>
#include <sstream>
using namespace tensorrt_llm::executor;
namespace su = tensorrt_llm::executor::serialize_utils;
std::vector<char> tensorrt_llm::executor::RequestWithId::serializeReqWithIds(
std::vector<RequestWithId> const& reqWithIds)
{
// Compute the size of serialized buffer
size_t totalSize = 0;
totalSize += sizeof(size_t);
for (auto const& reqWithId : reqWithIds)
{
totalSize += su::serializedSize(reqWithId.id);
totalSize += su::serializedSize(reqWithId.req);
totalSize += su::serializedSize(reqWithId.childReqIds);
totalSize += su::serializedSize(static_cast<uint64_t>(reqWithId.queuedStart.time_since_epoch().count()));
}
std::vector<char> buffer(totalSize);
std::stringbuf strbuf{std::ios_base::out | std::ios_base::in};
strbuf.pubsetbuf(buffer.data(), static_cast<std::streamsize>(buffer.size()));
std::ostream ostream{&strbuf};
su::serialize(reqWithIds.size(), ostream);
for (auto const& reqWithId : reqWithIds)
{
su::serialize(reqWithId.id, ostream);
su::serialize(reqWithId.req, ostream);
su::serialize(reqWithId.childReqIds, ostream);
su::serialize(static_cast<uint64_t>(reqWithId.queuedStart.time_since_epoch().count()), ostream);
}
return buffer;
}
std::vector<RequestWithId> tensorrt_llm::executor::RequestWithId::deserializeReqWithIds(std::vector<char>& buffer)
{
std::vector<RequestWithId> reqWithIds;
su::VectorWrapBuf<char> strbuf{buffer};
std::istream istream{&strbuf};
auto numReq = su::deserialize<size_t>(istream);
for (size_t req = 0; req < numReq; ++req)
{
auto const id = su::deserialize<IdType>(istream);
auto const request = Serialization::deserializeRequest(istream);
auto const childReqIds = su::deserialize<std::vector<IdType>>(istream);
auto const queuedStart = std::chrono::steady_clock::time_point{
std::chrono::steady_clock::duration{su::deserialize<uint64_t>(istream)}};
reqWithIds.emplace_back(RequestWithId{request, id, childReqIds, queuedStart});
}
return reqWithIds;
}