TensorRT-LLMs/cpp/tests/layers/randomLlm.cpp
Kaiyu Xie bf0a5afc92
Update TensorRT-LLM (#1598)
* Update TensorRT-LLM
2024-05-14 16:43:41 +08:00

236 lines
7.5 KiB
C++

#include "tests/layers/randomLlm.h"
namespace tensorrt_llm::tests::layers
{
TensorPtr initTensor(std::string str, std::optional<ITensor::Shape> shape)
{
auto shape1d = ITensor::makeShape({static_cast<SizeType32>(str.size())});
if (shape)
{
TLLM_CHECK(ITensor::volume(shape1d) == ITensor::volume(shape.value()));
}
TensorPtr tensor = BufferManager::cpu(shape.value_or(shape1d), nvinfer1::DataType::kINT32);
auto tensorRange = BufferRange<TokenIdType>(*tensor);
std::copy(str.begin(), str.end(), tensorRange.begin());
return tensor;
}
TensorPtr RandomTokenLogits::tokenToLogits(TokenIdType token) const
{
TensorPtr logits = BufferManager::cpu(mVocabulary->getShape(), nvinfer1::DataType::kFLOAT);
tokenToLogits(logits, token);
return logits;
}
void RandomTokenLogits::tokenToLogits(TensorPtr logits, TokenIdType token) const
{
TLLM_CHECK(ITensor::volume(logits->getShape()) == getVocabSize());
auto logitsRange = BufferRange<float>(*logits);
auto vocabRange = BufferRange<TokenIdType>(*mVocabulary);
auto itl = logitsRange.begin();
auto itv = vocabRange.begin();
for (; itl != logitsRange.end() && itv != vocabRange.end(); itl++, itv++)
{
bool match = (*itv == token) || (token == -1 && *itv == getInvalidToken());
*itl = (match ? 1.0 : 0.0) + (static_cast<float>(rand() % 256) / 1000.0);
}
}
TokenIdType RandomTokenLogits::logitsToToken(TensorPtr logits) const
{
TLLM_CHECK(ITensor::volume(logits->getShape()) == getVocabSize());
auto logitsRange = BufferRange<float>(*logits);
auto vocabRange = BufferRange<TokenIdType>(*mVocabulary);
float max = -FLT_MAX;
TokenIdType result;
auto itl = logitsRange.begin();
auto itv = vocabRange.begin();
for (; itl != logitsRange.end() && itv != vocabRange.end(); itl++, itv++)
{
float cur = exp(*itl);
if (cur > max)
{
max = cur;
result = *itv;
}
}
return result;
}
std::list<TensorPtr> RandomTokenLogits::stringToLogits(std::string tokens) const
{
std::list<TensorPtr> result;
for (auto& token : tokens)
{
result.push_back(tokenToLogits(static_cast<TokenIdType>(token)));
}
return result;
}
void RandomTokenLogits::stringToLogits(TensorPtr logits, std::string tokens) const
{
TLLM_CHECK(ITensor::volume(logits->getShape()) == tokens.size() * getVocabSize());
auto i = 0;
for (auto& token : tokens)
{
tokenToLogits(ITensor::slice(logits, i++, 1), static_cast<TokenIdType>(token));
}
}
void RandomTokenLogits::tensorToLogits(TensorPtr logits, TensorPtr tokens) const
{
TLLM_CHECK(ITensor::volume(logits->getShape()) == ITensor::volume(tokens->getShape()) * getVocabSize());
auto tokensRange = BufferRange<TokenIdType>(*tokens);
auto i = 0;
for (auto it = tokensRange.begin(); it != tokensRange.end(); it++)
{
tokenToLogits(ITensor::slice(logits, i++, 1), *it);
}
}
std::string RandomTokenLogits::logitsToString(std::list<TensorPtr> logits) const
{
std::string result;
for (auto& token : logits)
{
result.push_back(logitsToToken(token));
}
return result;
}
std::string RandomTokenLogits::logitsToString(TensorPtr logits) const
{
auto len = logits->getShape().d[0];
std::string result;
for (auto i = 0; i < len; i++)
{
result.push_back(logitsToToken(ITensor::slice(logits, i, 1)));
}
return result;
}
TensorPtr RandomTokenLogits::logitsToTensor(TensorPtr logits) const
{
auto len = logits->getShape().d[0];
TensorPtr result = BufferManager::cpu(ITensor::makeShape({len}), nvinfer1::DataType::kINT32);
auto resultRange = BufferRange<TokenIdType>(*result);
for (auto i = 0; i < len; i++)
{
resultRange[i] = logitsToToken(ITensor::slice(logits, i, 1));
}
return result;
}
SizeType32 RandomTokenLogits::getVocabSize() const
{
return ITensor::volume(mVocabulary->getShape());
}
TokenIdType RandomTokenLogits::getInvalidToken() const
{
auto vocabRange = BufferRange<TokenIdType>(*mVocabulary);
return *(vocabRange.end() - 1);
}
TokenIdType RandomTokenLogits::getEndToken() const
{
auto vocabRange = BufferRange<TokenIdType>(*mVocabulary);
return *(vocabRange.end() - 2);
}
void RandomLlm::sampleByMask(TensorPtr inout, TensorPtr mask) const
{
auto len = ITensor::volume(mask->getShape());
TLLM_CHECK(len == ITensor::volume(mask->getShape()));
auto inoutRange = BufferRange<TokenIdType>(*inout);
auto maskRange = BufferRange<TokenIdType>(*mask);
auto invalid = mTable->getInvalidToken();
for (SizeType32 i = 0; i < len; i++)
{
if (!maskRange[i])
{
inoutRange[i] = invalid;
}
}
}
bool RandomLlm::verify(SizeType32 const offset, TensorPtr const script) const
{
auto oracleRange = BufferRange<TokenIdType>(*mOracle);
auto scriptRange = BufferRange<TokenIdType>(*script);
auto len = ITensor::volume(script->getShape());
return std::equal(oracleRange.begin() + offset, oracleRange.begin() + offset + len, scriptRange.begin());
}
void RandomLlm::forward(TensorPtr output, TensorPtr const input, TensorPtr const position) const
{
TLLM_CHECK(ITensor::volume(input->getShape()) == ITensor::volume(position->getShape()));
TensorPtr tokens = BufferManager::cpu(input->getShape(), nvinfer1::DataType::kINT32);
foretell(tokens, input, position);
mTable->tensorToLogits(output, tokens);
}
void LookaheadRandomLlm::foretell(TensorPtr output, TensorPtr const input, TensorPtr const position) const
{
TLLM_CHECK(ITensor::volume(input->getShape()) == ITensor::volume(position->getShape()));
TLLM_CHECK(ITensor::volume(output->getShape()) >= ITensor::volume(input->getShape()));
auto outputRange = BufferRange<TokenIdType>(*output);
auto inputRange = BufferRange<TokenIdType>(*input);
auto positionRange = BufferRange<TokenIdType>(*position);
auto oracleRange = BufferRange<TokenIdType>(*mOracle);
auto len = ITensor::volume(input->getShape());
auto olen = ITensor::volume(mOracle->getShape());
std::vector<std::vector<bool>> mask(len, std::vector<bool>(len, false));
std::vector<std::pair<SizeType32, SizeType32>> stack;
stack.push_back(std::make_pair(0, positionRange[0]));
mask[0][0] = true;
for (auto i = 1; i < len; i++)
{
auto cur = positionRange[i];
while (cur <= stack.back().second)
{
stack.pop_back();
}
TLLM_CHECK(cur == stack.back().second + 1);
stack.push_back(std::make_pair(i, cur));
for (auto prev : stack)
{
mask[i][prev.first] = true;
}
}
auto verifyStart = 2;
for (; verifyStart < len - 1; verifyStart++)
{
if (positionRange[verifyStart] == positionRange[0] + 1)
{
break;
}
}
auto invalid = mTable->getInvalidToken();
for (auto i = 0; i < len; i++)
{
bool legal = positionRange[i] + 1 < olen;
bool right = true;
for (auto j = 0; j < len; j++)
{
right &= mask[i][j] ? oracleRange[positionRange[j]] == inputRange[j] : true;
}
if (i < verifyStart)
{ // lookahead might be right
outputRange[i] = ((right || rand() % 5) && legal) ? oracleRange[positionRange[i] + 1] : invalid;
}
else
{ // verify should be wrong.
outputRange[i] = (right && legal) ? oracleRange[positionRange[i] + 1] : invalid;
}
}
}
} // namespace tensorrt_llm::tests::layers