TensorRT-LLMs/cpp/include/tensorrt_llm/runtime/iTensor.h
2023-10-15 21:26:20 +08:00

239 lines
7.6 KiB
C++

/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "tensorrt_llm/common/assert.h"
#include "tensorrt_llm/runtime/common.h"
#include "tensorrt_llm/runtime/iBuffer.h"
#include <NvInferRuntime.h>
#include <cstdint>
#include <functional>
#include <initializer_list>
#include <memory>
#include <numeric>
#include <ostream>
#include <stdexcept>
#include <string>
#include <type_traits>
namespace nvinfer1
{
class IExecutionContext;
}
namespace tensorrt_llm::runtime
{
class ITensor : virtual public IBuffer
{
public:
using UniquePtr = std::unique_ptr<ITensor>;
using SharedPtr = std::shared_ptr<ITensor>;
using UniqueConstPtr = std::unique_ptr<ITensor const>;
using SharedConstPtr = std::shared_ptr<ITensor const>;
using Shape = nvinfer1::Dims;
//!
//! \brief Returns the tensor dimensions.
//!
[[nodiscard]] virtual Shape const& getShape() const = 0;
//!
//! \brief Sets the tensor dimensions. The new size of the tensor will be `volume(dims)`
//!
virtual void reshape(Shape const& dims) = 0;
~ITensor() override = default;
//!
//! \brief Not allowed to copy.
//!
ITensor(ITensor const&) = delete;
//!
//! \brief Not allowed to copy.
//!
ITensor& operator=(ITensor const&) = delete;
//!
//! \brief Returns the volume of the dimensions. Returns -1 if `d.nbDims < 0`.
//!
static std::int64_t volume(Shape const& dims)
{
{
return dims.nbDims < 0 ? -1
: dims.nbDims == 0
? 0
: std::accumulate(dims.d, dims.d + dims.nbDims, std::int64_t{1}, std::multiplies<>{});
}
}
//!
//! \brief Returns the volume of the dimensions. Throws if `d.nbDims < 0`.
//!
static std::size_t volumeNonNegative(Shape const& shape)
{
auto const vol = volume(shape);
TLLM_CHECK_WITH_INFO(0 <= vol, "Invalid tensor shape");
return static_cast<std::size_t>(vol);
}
//!
//! \brief Removes the given *unit* dimension from `shape`.
//!
//! \param shape The shape to squeeze.
//! \param dim The dimension that should be removed ("squeezed").
//! \return A new shape without the unit dimension.
//!
static Shape squeeze(Shape const& shape, SizeType dim)
{
TLLM_CHECK_WITH_INFO(shape.nbDims > 0, "Cannot squeeze 1-dimensional tensor");
TLLM_CHECK_WITH_INFO(
dim < shape.nbDims, common::fmtstr("Invalid index %d, tensor has %d dimensions", dim, shape.nbDims));
TLLM_CHECK_WITH_INFO(shape.d[dim] == 1, "Can only squeeze dimension of size 1");
Shape newDims{shape.nbDims - 1};
std::copy(shape.d, shape.d + dim, newDims.d);
std::copy(shape.d + dim + 1, shape.d + shape.nbDims, newDims.d + dim);
return newDims;
}
//!
//! \brief Removes the given *unit* dimensions from this tensor.
//!
void squeeze(SizeType dim)
{
reshape(squeeze(getShape(), dim));
}
//!
//! \brief Creates a sliced view on the underlying `tensor`. The view will have the same data type as `tensor`.
//!
//! \param tensor The tensor to view.
//! \param offset The offset of the view w.r.t. dimension 0 of the tensor.
//! \param size The size of the view w.r.t. dimension 0 of the tensor.
//! \return A view on the `buffer`.
//!
static UniquePtr slice(SharedPtr tensor, std::size_t offset, std::size_t size);
template <typename TConstPtr, std::enable_if_t<std::is_const_v<PointerElementType<TConstPtr>>, int> = 0>
static UniqueConstPtr slice(TConstPtr&& tensor, std::size_t offset, std::size_t size)
{
return ITensor::slice(constPointerCast(std::forward<TConstPtr>(tensor)), offset, size);
}
static UniquePtr slice(SharedPtr tensor, std::size_t offset)
{
auto const dims = tensor->getShape();
auto const size = (dims.nbDims > 0 ? dims.d[0] : 0) - offset;
return ITensor::slice(std::move(tensor), offset, size);
}
template <typename TConstPtr, std::enable_if_t<std::is_const_v<PointerElementType<TConstPtr>>, int> = 0>
static UniqueConstPtr slice(TConstPtr&& tensor, std::size_t offset)
{
return ITensor::slice(constPointerCast(std::forward<TConstPtr>(tensor)), offset);
}
//!
//! \brief Returns a view on the underlying `buffer` (or tensor) with the given shape.
//!
//! \param tensor The tensor to view.
//! \param shape The shape of the view.
//! \return A view on the `tensor`.
//!
static UniquePtr view(IBuffer::SharedPtr buffer, Shape const& dims);
template <typename TConstPtr, std::enable_if_t<std::is_const_v<PointerElementType<TConstPtr>>, int> = 0>
static UniqueConstPtr view(TConstPtr&& tensor, Shape const& dims)
{
return ITensor::view(constPointerCast(std::forward<TConstPtr>(tensor)), dims);
}
//!
//! \brief Returns a view on the underlying `tensor` which can be independently reshaped.
//!
//! \param tensor The tensor to view.
//! \return A view on the `tensor`.
//!
static UniquePtr view(SharedPtr tensor)
{
auto shapes = tensor->getShape();
return ITensor::view(std::move(tensor), shapes);
}
//!
//! \brief Wraps the given `data` in an `ITensor`. The `ITensor` will not own the underlying `data` and cannot
//! be reshaped beyond `capacity`.
//!
//! \param data The data to wrap.
//! \param type The data type of the `data`.
//! \param shape The shape of the tensor.
//! \param capacity The capacity of the buffer.
//! \return An `ITensor`.
static UniquePtr wrap(void* data, nvinfer1::DataType type, Shape const& shape, std::size_t capacity);
static UniquePtr wrap(void* data, nvinfer1::DataType type, Shape const& shape)
{
return wrap(data, type, shape, volumeNonNegative(shape));
}
template <typename T>
static UniquePtr wrap(T* data, Shape const& shape, std::size_t capacity)
{
return wrap(data, TRTDataType<T>::value, shape, capacity);
}
template <typename T>
static UniquePtr wrap(T* data, Shape const& shape)
{
return wrap<T>(data, shape, volumeNonNegative(shape));
}
template <typename T>
static UniquePtr wrap(std::vector<T>& v, Shape const& shape)
{
return wrap<T>(v.data(), shape, v.capacity());
}
//!
//! \brief A convenience function to create a tensor shape with the given dimensions.
//!
static Shape makeShape(std::initializer_list<SizeType> const& dims);
//!
//! \brief A convenience function for converting a tensor shape to a `string`.
//!
static std::string toString(Shape const& dims);
protected:
ITensor() = default;
};
//! \brief Utility function to print a shape.
inline std::ostream& operator<<(std::ostream& output, ITensor::Shape const& dims)
{
return output << ITensor::toString(dims);
}
//! \brief Utility function to print a tensor with its shape.
std::ostream& operator<<(std::ostream& output, ITensor const& tensor);
} // namespace tensorrt_llm::runtime