TensorRT-LLMs/cpp/tests/utils/common.h
Kaiyu Xie bca9a33b02
Update TensorRT-LLM (#2008)
* Update TensorRT-LLM

---------

Co-authored-by: Timur Abishev <abishev.timur@gmail.com>
Co-authored-by: MahmoudAshraf97 <hassouna97.ma@gmail.com>
Co-authored-by: Saeyoon Oh <saeyoon.oh@furiosa.ai>
Co-authored-by: hattizai <hattizai@gmail.com>
2024-07-23 23:05:09 +08:00

147 lines
5.0 KiB
C++

/*
* SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: NVIDIA TensorRT Source Code License Agreement
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#pragma once
#include "tensorrt_llm/executor/types.h"
#include "tensorrt_llm/runtime/bufferManager.h"
#include "tensorrt_llm/runtime/common.h"
#include "tensorrt_llm/runtime/iBuffer.h"
#include "tensorrt_llm/runtime/iTensor.h"
#include <cmath>
#include <filesystem>
#include <utility>
#include <vector>
namespace tensorrt_llm::testing
{
namespace fs = std::filesystem;
namespace tr = tensorrt_llm::runtime;
using tr::SizeType32;
using tr::TokenIdType;
using tr::ITensor;
using tr::MemoryType;
using executor::VecTokens;
class ModelIds
{
public:
ModelIds() = default;
constexpr ModelIds(TokenIdType endId, TokenIdType padId)
: endId{endId}
, padId{padId}
{
}
TokenIdType endId{};
TokenIdType padId{};
};
class BeamResult
{
public:
explicit BeamResult(SizeType32 beamWidth)
: beamWidth{beamWidth} {};
BeamResult(SizeType32 beamWidth, fs::path resultsFile, fs::path contextLogitsFile, fs::path genLogitsFile,
fs::path cumLogProbsFile, fs::path logProbsFile)
: beamWidth{beamWidth}
, resultsFile{std::move(resultsFile)}
, contextLogitsFile{std::move(contextLogitsFile)}
, genLogitsFile{std::move(genLogitsFile)}
, cumLogProbsFile{std::move(cumLogProbsFile)}
, logProbsFile{std::move(logProbsFile)} {};
SizeType32 beamWidth;
fs::path resultsFile;
fs::path contextLogitsFile;
fs::path genLogitsFile;
fs::path cumLogProbsFile;
fs::path logProbsFile;
};
using BeamResults = std::vector<BeamResult>;
class TestData
{
public:
explicit TestData(SizeType32 nbGivenInputs, SizeType32 beamWidth)
: nbGivenInputs{nbGivenInputs}
, beamWidth{beamWidth}
{
expectedOutputLengths.resize(nbGivenInputs * beamWidth);
draftTokens.resize(nbGivenInputs);
draftLogits.resize(nbGivenInputs);
acceptedDraftTokensLengths.resize(nbGivenInputs);
expectedGenerationLogits.resize(nbGivenInputs);
expectedContextLogits.resize(nbGivenInputs);
expectedCumLogProbs.resize(nbGivenInputs);
expectedLogProbs.resize(nbGivenInputs);
}
void loadLogProbs(fs::path const& cumLogProbsFile, fs::path const& logProbsFile, tr::BufferManager const& manager);
void loadContextLogits(fs::path const& contextLogitsFile, std::vector<SizeType32> const& givenInputLengths,
tr::BufferManager const& manager);
void loadGenerationLogits(fs::path const& genLogitsFile, tr::BufferManager const& manager);
void makeDraft(SizeType32 maxDraftTokens, bool acceptDraftByLogits, fs::path const& genLogitsFile,
std::vector<SizeType32> const& givenInputLengths, tr::BufferManager const& manager);
SizeType32 nbGivenInputs{};
SizeType32 beamWidth{};
SizeType32 maxSeqLen{};
ITensor::SharedPtr expectedOutputIds;
std::vector<SizeType32> expectedOutputLengths;
std::vector<TokenIdType> endIds;
std::vector<VecTokens> draftTokens;
std::vector<ITensor::SharedPtr> draftLogits;
std::vector<SizeType32> acceptedDraftTokensLengths;
std::vector<ITensor::SharedPtr> expectedGenerationLogits;
std::vector<ITensor::SharedPtr> expectedContextLogits;
std::vector<ITensor::SharedPtr> expectedCumLogProbs;
std::vector<ITensor::SharedPtr> expectedLogProbs;
};
inline bool almostEqual(float a, float b, float atol = 1e-2, float rtol = 1e-3)
{
// Params: a = value to compare and b = reference
// This function follows implementation of numpy.isclose(), which checks
// abs(a - b) <= (atol + rtol * abs(b)).
// Note that the inequality above is asymmetric where b is considered as
// a reference value. To account into both absolute/relative errors, it
// uses absolute tolerance and relative tolerance at the same time. The
// default values of atol and rtol borrowed from numpy.isclose(). For the
// case of nan value, the result will be true.
if (std::isnan(a) && std::isnan(b))
{
return true;
}
return fabs(a - b) <= (atol + rtol * fabs(b));
}
bool compareLogits(ITensor const& groundTruthLogits, ITensor const& outputLogits, float atol = 1e-2, float rtol = 1e-3);
std::tuple<SizeType32, SizeType32> getRequestGivenInputIdxLength(
std::uint64_t requestId, SizeType32 nbGivenInputs, std::vector<SizeType32> const& givenInputLengths);
std::tuple<std::vector<SizeType32>, SizeType32, SizeType32> getGivenInputLengths(
ITensor const& givenInput, SizeType32 padId);
} // namespace tensorrt_llm::testing