TensorRT-LLMs/cpp/tensorrt_llm/layers/lookaheadDecodingUtils.cpp
Kaiyu Xie 9bd15f1937
TensorRT-LLM v0.10 update
* TensorRT-LLM Release 0.10.0

---------

Co-authored-by: Loki <lokravi@amazon.com>
Co-authored-by: meghagarwal <16129366+megha95@users.noreply.github.com>
2024-06-05 20:43:25 +08:00

76 lines
1.9 KiB
C++

#include <sstream>
#include "tensorrt_llm/common/logger.h"
#include "tensorrt_llm/layers/lookaheadDecodingUtils.h"
namespace tensorrt_llm::layers
{
using namespace tensorrt_llm::runtime;
using TensorPtr = ITensor::SharedPtr;
void printTokens2d(char const* name, TensorPtr const& tensor)
{
auto M = tensor->getShape().d[0];
auto N = tensor->getShape().d[1];
auto tr = BufferRange<TokenIdType>(*tensor);
std::ostringstream buf;
buf << name << ": " << tensor->getShape() << "(\n";
for (SizeType32 mi = 0; mi < M; mi++)
{
for (SizeType32 ni = 0; ni < N; ni++)
{
auto token = tr[mi * N + ni];
if (token >= 0 && token <= 255)
{
buf << "'" << static_cast<char>(token) << "'";
}
else
{
buf << token << "'";
}
buf << (ni == (N - 1) ? ';' : ',');
}
if (mi != M - 1)
{
buf << std::endl;
}
}
buf << ")" << std::endl;
TLLM_LOG_DEBUG(buf.str());
}
void printTokens(char const* name, TensorPtr const& tensor)
{
std::ostringstream buf;
buf << name << ": " << tensor->getShape() << "(";
for (auto const& token : BufferRange<TokenIdType>(*tensor))
{
if (token >= 0 && token <= 255)
{
buf << "'" << static_cast<char>(token) << "',";
}
else
{
buf << token << ",";
}
}
buf << ")" << std::endl << std::flush;
TLLM_LOG_DEBUG(buf.str());
}
void printTensor(char const* name, TensorPtr const& tensor)
{
std::ostringstream buf;
buf << name << ": " << tensor->getShape() << "(";
for (auto const& token : BufferRange<TokenIdType>(*tensor))
{
buf << token << ",";
}
buf << ")" << std::endl << std::flush;
TLLM_LOG_DEBUG(buf.str());
}
} // namespace tensorrt_llm::layers