/* * Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #pragma once #include "tensorrt_llm/runtime/bufferManager.h" #include "tensorrt_llm/runtime/iTensor.h" namespace tensorrt_llm::layers { template class BufferLocation : public runtime::BufferRange { public: using typename runtime::BufferRange::size_type; using runtime::BufferRange::begin; using runtime::BufferRange::operator[]; BufferLocation(T* data, size_type size) : runtime::BufferRange{data, size} { } template , bool> = true> explicit BufferLocation(runtime::ITensor& tensor) : BufferLocation(runtime::bufferCast(tensor), tensor.getSize()) { mStrides = runtime::ITensor::strides(tensor.getShape()); } template , bool> = true> explicit BufferLocation(runtime::ITensor const& tensor) : BufferLocation(runtime::bufferCast(tensor), tensor.getSize()) { mStrides = runtime::ITensor::strides(tensor.getShape()); } inline T& at(runtime::ITensor::Shape const& dims) { return *ptr(dims); } inline T& at(std::initializer_list const& dims) { return *ptr(dims); } template inline T& at(Args... args) { runtime::ITensor::DimType64 offset = 0; runtime::ITensor::DimType64 dims = 0; atHelper(offset, dims, args...); return *(begin() + offset); } inline T& operator[](runtime::ITensor::Shape const& dims) { return *ptr(dims); } inline T& operator[](std::initializer_list const& dims) { return *ptr(dims); } inline T* ptr(runtime::ITensor::Shape const& dims) { return begin() + offset(dims); } inline T* ptr(std::initializer_list const& dims) { return ptr(runtime::ITensor::makeShape(dims)); } runtime::ITensor::DimType64 offset(runtime::ITensor::Shape const& dims) { TLLM_CHECK(mStrides.nbDims == dims.nbDims); runtime::ITensor::DimType64 result = 0; for (runtime::ITensor::DimType64 di = 0; di < mStrides.nbDims; di++) { result += dims.d[di] * mStrides.d[di]; } return result; } runtime::ITensor::DimType64 offset(std::initializer_list const& dims) { return offset(runtime::ITensor::makeShape(dims)); } private: inline void atHelper(runtime::ITensor::DimType64& offset, runtime::ITensor::DimType64& dims) {} template inline void atHelper(runtime::ITensor::DimType64& offset, runtime::ITensor::DimType64& dims, int dim, Args... args) { offset += dim * mStrides.d[dims++]; atHelper(offset, dims, args...); } private: runtime::ITensor::Shape mStrides; }; class DebugTensor { public: DebugTensor(runtime::ITensor const& tensor, char const* name) : mTensor(tensor) , mName(name) { } DebugTensor(runtime::ITensor::SharedConstPtr tensor, char const* name) : DebugTensor(*tensor, name) { } uint8_t const& u8(std::initializer_list const& dims) { return (BufferLocation(mTensor))[dims]; } uint8_t const& u8(int32_t idx) { return (BufferLocation(mTensor))[idx]; } int8_t const& i8(std::initializer_list const& dims) { return (BufferLocation(mTensor))[dims]; } int8_t const& i8(int32_t idx) { return (BufferLocation(mTensor))[idx]; } int32_t const& i32(std::initializer_list const& dims) { return (BufferLocation(mTensor))[dims]; } int32_t const& i32(int32_t idx) { return (BufferLocation(mTensor))[idx]; } int64_t const& i64(std::initializer_list const& dims) { return (BufferLocation(mTensor))[dims]; } int64_t const& i64(int32_t idx) { return (BufferLocation(mTensor))[idx]; } float const& f(std::initializer_list const& dims) { return (BufferLocation(mTensor))[dims]; } float const& f(int32_t idx) { return (BufferLocation(mTensor))[idx]; } runtime::BufferManager::ITensorPtr copyToHostOptional() { runtime::BufferManager::ITensorPtr hostPtr{nullptr}; if (mTensor.getMemoryType() == runtime::MemoryType::kGPU) { runtime::BufferManager manager{std::make_shared()}; hostPtr = manager.copyFrom(mTensor, runtime::MemoryType::kCPU); manager.getStream().synchronize(); } return hostPtr; } std::string string(void) { runtime::BufferManager::ITensorPtr hostPtr = copyToHostOptional(); runtime::BufferRange range(hostPtr ? (*hostPtr) : mTensor); std::string result(range.size(), '\0'); std::copy(range.begin(), range.end(), result.begin()); return result; } std::string tokens(void) { using namespace tensorrt_llm::runtime; std::ostringstream buf; auto shape = mTensor.getShape(); runtime::BufferManager::ITensorPtr hostPtr = copyToHostOptional(); runtime::BufferRange tensorRange(hostPtr ? (*hostPtr) : mTensor); buf << mName << ": " << mTensor.getMemoryTypeName() << ',' << mTensor.getDataTypeName() << ',' << shape; auto line = [&buf](TokenIdType const* array, SizeType32 size) { buf << '['; for (SizeType32 i = 0; i < size; i++) { auto token = array[i]; if (token >= ' ' && token <= '~') { buf << '\'' << static_cast(token) << '\''; } else { buf << token; } if (i != size - 1) { buf << ','; } } buf << ']'; }; if (shape.nbDims == 0) { buf << "[]"; } else if (shape.nbDims == 1) { line(tensorRange.begin(), shape.d[0]); } else if (shape.nbDims == 2) { buf << '['; for (runtime::SizeType32 i = 0; i < shape.d[0]; i++) { buf << "\n " << i << ": "; line(tensorRange.begin() + i * shape.d[1], shape.d[1]); } buf << ']'; } else { buf << "Too Large to be printed"; } return buf.str(); } template std::string values(void) { using namespace tensorrt_llm::runtime; std::ostringstream buf; auto shape = mTensor.getShape(); runtime::BufferManager::ITensorPtr hostPtr = copyToHostOptional(); runtime::BufferRange tensorRange(hostPtr ? (*hostPtr) : mTensor); buf << mName << ": " << mTensor.getMemoryTypeName() << ',' << mTensor.getDataTypeName() << ',' << shape; auto line = [&buf](T const* array, SizeType32 size) { buf << '['; for (SizeType32 i = 0; i < size; i++) { buf << static_cast(array[i]); if (i != size - 1) { buf << ','; } } buf << ']'; }; if (shape.nbDims == 0) { buf << "[]"; } else if (shape.nbDims == 1) { line(tensorRange.begin(), shape.d[0]); } else if (shape.nbDims == 2) { buf << '['; for (runtime::SizeType32 i = 0; i < shape.d[0]; i++) { buf << "\n " << i << ": "; line(tensorRange.begin() + i * shape.d[1], shape.d[1]); } buf << ']'; } else { buf << "Too Large to be printed"; } return buf.str(); } std::string values(void) { switch (mTensor.getDataType()) { case nvinfer1::DataType::kBOOL: return values(); case nvinfer1::DataType::kFLOAT: return values(); case nvinfer1::DataType::kINT8: return values(); case nvinfer1::DataType::kINT32: return values(); case nvinfer1::DataType::kINT64: return values(); case nvinfer1::DataType::kUINT8: return values(); default: return std::string(mName + ": Unsupported data type"); } } std::string shape(void) { using namespace tensorrt_llm::runtime; std::ostringstream buf; buf << mName << ": " << mTensor.getShape(); return buf.str(); } void print_tokens(void) { TLLM_LOG_DEBUG(tokens()); } void print_values(void) { TLLM_LOG_DEBUG(values()); } void print_shape(void) { TLLM_LOG_DEBUG(shape()); } private: runtime::ITensor const& mTensor; std::string mName; }; #define D(x) tensorrt_llm::layers::DebugTensor(x, #x) #define PRINT_TOKENS(x) D(x).print_tokens() #define PRINT_VALUES(x) D(x).print_values() #define PRINT_SHAPE(x) D(x).print_shape() } // namespace tensorrt_llm::layers