/* * Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #pragma once #include "tensorrt_llm/common/assert.h" #include "tensorrt_llm/runtime/common.h" #include "tensorrt_llm/runtime/iBuffer.h" #include #include #include #include #include #include #include #include #include #include #include namespace nvinfer1 { class IExecutionContext; } namespace tensorrt_llm::runtime { class ITensor : virtual public IBuffer { public: using UniquePtr = std::unique_ptr; using SharedPtr = std::shared_ptr; using UniqueConstPtr = std::unique_ptr; using SharedConstPtr = std::shared_ptr; using Shape = nvinfer1::Dims; using DimType64 = std::remove_reference_t; using TensorMap = runtime::StringPtrMap; static_assert(std::is_same_v, "This version of TRT-LLM requires TensorRT 10.0 or later."); ~ITensor() override = default; //! //! \brief Returns the tensor dimensions. //! [[nodiscard]] virtual Shape const& getShape() const = 0; //! //! \brief Returns the tensor n-th dimension. If n is negative, returns the (nbDims - n)th dimension. //! TODO: replace with constexpr parameter when moving to C++20. //! template [[nodiscard]] DimType64 getDimension() const { auto const shape = getShape(); static_assert(n < shape.MAX_DIMS && n >= -shape.MAX_DIMS, "Trying to access the dimension of a tensor, when its maximal shape cannot have that dimension."); if constexpr (n < 0) { return shape.d[shape.nbDims + n]; } else { return shape.d[n]; } } //! //! \brief Sets the tensor dimensions. The new size of the tensor will be `volume(dims)` //! virtual void reshape(Shape const& dims) = 0; void resize(std::size_t newSize) override { if (newSize == getSize()) return; reshape(makeShape({castSize(newSize)})); } //! //! \brief Not allowed to copy. //! ITensor(ITensor const&) = delete; //! //! \brief Not allowed to copy. //! ITensor& operator=(ITensor const&) = delete; //! //! \brief Returns the volume of the dimensions. Returns -1 if `d.nbDims < 0`. //! static std::int64_t volume(Shape const& dims) { { return dims.nbDims < 0 ? -1 : dims.nbDims == 0 ? 0 : std::accumulate(dims.d, dims.d + dims.nbDims, std::int64_t{1}, std::multiplies<>{}); } } //! //! \brief Returns the volume of the dimensions. Throws if `d.nbDims < 0`. //! static std::size_t volumeNonNegative(Shape const& shape) { auto const vol = volume(shape); TLLM_CHECK_WITH_INFO(0 <= vol, "Invalid tensor shape"); return static_cast(vol); } //! //! \brief Returns the strides of each dimemsion in a Shape. //! static Shape strides(Shape const& dims) { auto const nbDims = dims.nbDims; Shape strides{}; strides.nbDims = nbDims; if (nbDims > 0) { strides.d[nbDims - 1] = 1; } for (int i = nbDims - 2; i >= 0; i--) { strides.d[i] = dims.d[i + 1] * strides.d[i + 1]; } return strides; } //! //! \brief Removes the given *unit* dimension from `shape`. //! //! \param shape The shape to squeeze. //! \param dim The dimension that should be removed ("squeezed"). //! \return A new shape without the unit dimension. //! static Shape squeeze(Shape const& shape, SizeType32 dim); //! //! \brief Add a *unit* dimension to `shape` at the specified position. //! //! \param shape The shape to unsqueeze. //! \param dim The dimension where unit dimension should be added. //! \return A new shape with the added unit dimension. //! static Shape unsqueeze(Shape const& shape, SizeType32 dim); //! //! \brief Removes the given *unit* dimensions from this tensor. //! void squeeze(SizeType32 dim) { reshape(squeeze(getShape(), dim)); } //! //! \brief Adds a *unit* dimension at the specified position //! void unsqueeze(SizeType32 dim) { reshape(unsqueeze(getShape(), dim)); } //! //! \brief Creates a sliced view on the underlying `tensor`. The view will have the same data type as `tensor`. //! //! \param tensor The tensor to view. //! \param offset The offset of the view w.r.t. dimension 0 of the tensor. //! \param size The size of the view w.r.t. dimension 0 of the tensor. //! \return A view on the `buffer`. //! static UniquePtr slice(SharedPtr tensor, std::size_t offset, std::size_t size); template >, int> = 0> static UniqueConstPtr slice(TConstPtr&& tensor, std::size_t offset, std::size_t size) { return ITensor::slice(constPointerCast(std::forward(tensor)), offset, size); } static UniquePtr slice(SharedPtr tensor, std::size_t offset) { auto const dims = tensor->getShape(); auto const size = (dims.nbDims > 0 ? dims.d[0] : 0) - offset; return ITensor::slice(std::move(tensor), offset, size); } template >, int> = 0> static UniqueConstPtr slice(TConstPtr&& tensor, std::size_t offset) { return ITensor::slice(constPointerCast(std::forward(tensor)), offset); } //! //! \param offsetDims The offset in multiple dimensions. //! //! \param tensor The tensor to view. //! \param offsetDims The offset dimensions of the view. //! \param size The size of the view w.r.t. the last dimension in offsetDims. //! \return A view of shape [size, the rest dimensions] //! or [size] when \param offsetDims specifies all dimensions. //! \throw Whenever offset overflows or the last dimension offset+size overflows. //! static UniquePtr slice(SharedPtr tensor, Shape const& offsetDims, DimType64 size); static UniquePtr slice(SharedPtr tensor, std::initializer_list const& offsetDims, DimType64 size) { return slice(std::move(tensor), makeShape(offsetDims), size); } template >, int> = 0> static UniqueConstPtr slice(TConstPtr&& tensor, Shape const& offsetDims, std::size_t size) { return slice(constPointerCast(std::forward(tensor)), offsetDims, size); } template >, int> = 0> static UniqueConstPtr slice( TConstPtr&& tensor, std::initializer_list const& offsetDims, std::size_t size) { return slice(constPointerCast(std::forward(tensor)), offsetDims, size); } //! //! \brief return the rest slices at the last dimension when `size` omitted. //! static UniquePtr slice(SharedPtr tensor, Shape const& offsetDims) { auto const dims = tensor->getShape(); auto const nbDims = offsetDims.nbDims; auto const size = (dims.nbDims > 0 && nbDims > 0) ? dims.d[nbDims - 1] - offsetDims.d[nbDims - 1] : 0; return ITensor::slice(std::move(tensor), offsetDims, size); } static UniquePtr slice(SharedPtr tensor, std::initializer_list const& offsetDims) { return slice(std::move(tensor), makeShape(offsetDims)); } template >, int> = 0> static UniqueConstPtr slice(TConstPtr&& tensor, Shape const& offsetDims) { return slice(constPointerCast(std::forward(tensor)), offsetDims); } template >, int> = 0> static UniqueConstPtr slice(TConstPtr&& tensor, std::initializer_list const& offsetDims) { return slice(constPointerCast(std::forward(tensor)), offsetDims); } //! //! \return Just the block at the point, with shape of [the rest dimensions] //! or [1] when \param offsetDims specifies all dimensions. //! static UniquePtr at(SharedPtr tensor, Shape const& offsetDims) { auto result = slice(std::move(tensor), offsetDims, 1); if (result->getShape().nbDims > 1) { result->squeeze(0); } return result; } static UniquePtr at(SharedPtr tensor, std::initializer_list const& offsetDims) { return at(std::move(tensor), makeShape(offsetDims)); } template >, int> = 0> static UniqueConstPtr at(TConstPtr&& tensor, Shape const& offsetDims) { return at(constPointerCast(std::forward(tensor)), offsetDims); } template >, int> = 0> static ITensor::UniqueConstPtr at(TConstPtr&& tensor, std::initializer_list const& offsetDims) { return at(constPointerCast(std::forward(tensor)), offsetDims); } //! //! \brief Returns a view on the underlying `buffer` (or tensor) with the given shape. //! //! \param tensor The tensor to view. //! \param shape The shape of the view. //! \return A view on the `tensor`. //! static UniquePtr view(IBuffer::SharedPtr buffer, Shape const& dims); template >, int> = 0> static UniqueConstPtr view(TConstPtr&& tensor, Shape const& dims) { return ITensor::view(constPointerCast(std::forward(tensor)), dims); } //! //! \brief Returns a view on the underlying `tensor` which can be independently reshaped. //! //! \param tensor The tensor to view. //! \return A view on the `tensor`. //! static UniquePtr view(SharedPtr tensor) { auto shapes = tensor->getShape(); return ITensor::view(std::move(tensor), shapes); } //! //! \brief Returns a flattened view on the underlying `tensor` which can be independently reshaped. //! //! \param tensor The tensor to flatten. //! \param sliceN Slice the first N elements after flattening. -1 means take the whole flattened tensor. //! \return A flatten view on the `tensor`. //! static UniquePtr flattenN(SharedPtr tensor, std::int64_t sliceN = -1) { UniquePtr flatten = ITensor::view(tensor, ITensor::makeShape({ITensor::volume(tensor->getShape()), 1})); if (sliceN > 0) { flatten = ITensor::slice(std::move(flatten), 0, sliceN); } return flatten; } //! //! \brief Wraps the given `data` in an `ITensor`. The `ITensor` will not own the underlying `data` and cannot //! be reshaped beyond `capacity`. //! //! \param data The data to wrap. //! \param type The data type of the `data`. //! \param shape The shape of the tensor. //! \param capacity The capacity of the buffer. //! \return An `ITensor`. static UniquePtr wrap(void* data, nvinfer1::DataType type, Shape const& shape, std::size_t capacity); static UniquePtr wrap(void* data, nvinfer1::DataType type, Shape const& shape) { return wrap(data, type, shape, volumeNonNegative(shape)); } template static UniquePtr wrap(T* data, Shape const& shape, std::size_t capacity) { return wrap(data, TRTDataType::value, shape, capacity); } template static UniquePtr wrap(T* data, Shape const& shape) { return wrap(data, shape, volumeNonNegative(shape)); } template static UniquePtr wrap(std::vector& v, Shape const& shape) { return wrap(v.data(), shape, v.capacity()); } //! //! \brief A convenience function to create a tensor shape with the given dimensions. //! static Shape makeShape(std::initializer_list const& dims); //! //! \brief A convenience function for converting a tensor shape to a `string`. //! static std::string toString(Shape const& dims); //! //! \brief A convenience function to compare shapes. //! static bool shapeEquals(Shape const& lhs, Shape const& rhs) { return shapeEquals(lhs, rhs.d, rhs.nbDims); } //! //! \brief A convenience function to compare shapes. //! template static bool shapeEquals(Shape const& lhs, T const* dims, SizeType32 count) { return lhs.nbDims == count && std::equal(lhs.d, lhs.d + lhs.nbDims, dims); } [[nodiscard]] bool shapeEquals(Shape const& other) const { return shapeEquals(getShape(), other); } [[nodiscard]] bool shapeEquals(std::initializer_list const& other) const { return shapeEquals(getShape(), other.begin(), other.size()); } template bool shapeEquals(T const* dims, SizeType32 count) const { return shapeEquals(getShape(), dims, count); } protected: ITensor() = default; static DimType64 castSize(size_t newSize) { TLLM_CHECK_WITH_INFO( newSize <= std::numeric_limits::max(), "New size is too large. Use reshape() instead."); return static_cast(newSize); } }; //! \brief Utility function to print a shape. inline std::ostream& operator<<(std::ostream& output, ITensor::Shape const& dims) { return output << ITensor::toString(dims); } //! \brief Utility function to print a tensor with its shape. std::ostream& operator<<(std::ostream& output, ITensor const& tensor); /// @brief Retrieves a T const typed pointer to the underlying data of the tensor pointed to by the tensorPtr, or /// nullptr if the tensorPtr is null. /// @details This overload has to be declared to avoid ambiguity when an implicit conversion to IBuffer is involved. /// @tparam T The type of the underlying data. /// @param tensorPtr A possibly null shared ptr. /// @return A pointer to T const, possibly nullptr. template T const* bufferCastOrNull(ITensor::SharedConstPtr const& tensorPtr) { return bufferCastOrNull(static_cast(tensorPtr)); } /// @brief Retrieves a T typed pointer to the underlying data of the buffer pointed to by the tensorPtr, or nullptr if /// the tensorPtr is null. /// @details This overload has to be declared to avoid ambiguity when an implicit conversion to IBuffer is involved. /// @tparam T The type of the underlying data. /// @param tensorPtr A possibly null shared ptr. /// @return A pointer to T, possibly nullptr. template T* bufferCastOrNull(ITensor::SharedPtr const& tensorPtr) { return bufferCastOrNull(static_cast(tensorPtr)); } /// @brief Retrieves a T typed pointer to the underlying data of the tensor pointed to by the tensor pointer /// contained in the optionalBufferPtr, or nullptr if the optional doesn't have a value. /// @details This overload has to be declared to avoid ambiguity when an implicit conversion to IBuffer is involved. /// @tparam T The type of the underlying data. /// @param optionalBufferPtr A possibly empty optional. /// @return A pointer to T, possibly nullptr. template T* bufferCastOrNull(std::optional const& optionalTensorPtr) { return bufferCastOrNull(static_cast>(optionalTensorPtr)); } /// @brief Retrieves a T const typed pointer to the underlying data of the tensor pointed to by the tensor pointer /// contained in the optionalBufferPtr, or nullptr if the optional doesn't have a value. /// @details This overload has to be declared to avoid ambiguity when an implicit conversion to IBuffer is involved. /// @tparam T The type of the underlying data. /// @param optionalBufferPtr A possibly empty optional. /// @return A pointer to const T, possibly nullptr. template T const* bufferCastOrNull(std::optional const& optionalTensorPtr) { return bufferCastOrNull(static_cast>(optionalTensorPtr)); } } // namespace tensorrt_llm::runtime