/* * Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #pragma once #include "tensorrt_llm/common/assert.h" #include "tensorrt_llm/common/cudaBf16Wrapper.h" #include "tensorrt_llm/common/cudaFp8Utils.h" #include "tensorrt_llm/common/cudaUtils.h" #include "tensorrt_llm/common/logger.h" #include "tensorrt_llm/common/stringUtils.h" #include #include #include #include #include #include #include #include #include #include #include namespace tensorrt_llm { namespace common { typedef enum datatype_enum { TYPE_INVALID, TYPE_BOOL, TYPE_UINT8, TYPE_UINT16, TYPE_UINT32, TYPE_UINT64, TYPE_INT8, TYPE_INT16, TYPE_INT32, TYPE_INT64, TYPE_FP16, TYPE_FP32, TYPE_FP64, TYPE_BYTES, TYPE_BF16, TYPE_FP8_E4M3, TYPE_STR, TYPE_VOID, TYPE_INT32_PTR, } DataType; template struct TensorDataType { }; template <> struct TensorDataType { static constexpr DataType value = TYPE_BOOL; }; template <> struct TensorDataType { static constexpr DataType value = TYPE_UINT8; }; template <> struct TensorDataType { static constexpr DataType value = TYPE_UINT16; }; template <> struct TensorDataType { static constexpr DataType value = TYPE_UINT32; }; template <> struct TensorDataType { static constexpr DataType value = TYPE_UINT64; }; #if !defined(_WIN32) template <> struct TensorDataType { static constexpr DataType value = TYPE_UINT64; }; #endif // !defined(_WIN32) static_assert(sizeof(std::uint64_t) == sizeof(unsigned long long), ""); template <> struct TensorDataType { static constexpr DataType value = TYPE_INT8; }; template <> struct TensorDataType { static constexpr DataType value = TYPE_INT16; }; template <> struct TensorDataType { static constexpr DataType value = TYPE_INT32; }; template <> struct TensorDataType { static constexpr DataType value = TYPE_INT64; }; template <> struct TensorDataType { static constexpr DataType value = TYPE_FP16; }; template <> struct TensorDataType { static constexpr DataType value = TYPE_FP32; }; template <> struct TensorDataType { static constexpr DataType value = TYPE_FP64; }; template <> struct TensorDataType { static constexpr DataType value = TYPE_BYTES; }; #ifdef ENABLE_BF16 template <> struct TensorDataType<__nv_bfloat16> { static constexpr DataType value = TYPE_BF16; }; #endif #ifdef ENABLE_FP8 template <> struct TensorDataType<__nv_fp8_e4m3> { static constexpr DataType value = TYPE_FP8_E4M3; }; #endif template <> struct TensorDataType { static constexpr DataType value = TYPE_STR; }; template <> struct TensorDataType { static constexpr DataType value = TYPE_VOID; }; template <> struct TensorDataType { static constexpr DataType value = TYPE_INT32_PTR; }; template <> struct TensorDataType { static constexpr DataType value = TYPE_INT32_PTR; }; template DataType getTensorType() { return TensorDataType::type>::value; } typedef enum memorytype_enum { MEMORY_CPU, MEMORY_CPU_PINNED, MEMORY_GPU } MemoryType; class Tensor { public: // Do not write to these variables directly. Use copy / move constructors instead. MemoryType where; DataType type; std::vector shape; void const* data; // TODO modify from const void* to void* const Tensor(); Tensor(MemoryType _where, DataType _type, std::vector const& _shape, void const* _data); std::size_t size() const; std::size_t sizeBytes() const; std::string whereToString() const; std::string toString() const; std::string getNumpyTypeDesc(DataType type) const; static size_t getTypeSize(DataType type); template inline T getVal(size_t index) const { TLLM_LOG_TRACE("%s start", __PRETTY_FUNCTION__); TLLM_CHECK(where == MEMORY_CPU); TLLM_CHECK(data != nullptr); TLLM_CHECK_WITH_INFO(index < size(), "index is larger than buffer size"); if (getTensorType() != type) { TLLM_LOG_DEBUG("getVal with type %s, but data type is: %s", getNumpyTypeDesc(getTensorType()).c_str(), getNumpyTypeDesc(type).c_str()); } return ((T*) data)[index]; } template inline T getVal() const { TLLM_LOG_TRACE("%s start", __PRETTY_FUNCTION__); if (getTensorType() != type) { TLLM_LOG_DEBUG("getVal with type %s, but data type is: %s", getNumpyTypeDesc(getTensorType()).c_str(), getNumpyTypeDesc(type).c_str()); } return getVal(0); } template inline T* getPtr() const { TLLM_LOG_TRACE("%s start", __PRETTY_FUNCTION__); if (getTensorType() != type) { TLLM_LOG_DEBUG("getPtr with type %s, but data type is: %s", getNumpyTypeDesc(getTensorType()).c_str(), getNumpyTypeDesc(type).c_str()); } return (T*) data; } inline void* getPtrWithOffset(size_t offset) const { TLLM_LOG_TRACE("%s start", __PRETTY_FUNCTION__); if (data == nullptr) { return (void*) data; } else { TLLM_CHECK_WITH_INFO(offset < size(), "offset is larger than buffer size"); return (void*) ((char*) data + offset * Tensor::getTypeSize(type)); } } template inline T* getPtrWithOffset(size_t offset) const { TLLM_LOG_TRACE("%s start", __PRETTY_FUNCTION__); if (getTensorType() != type) { TLLM_LOG_DEBUG("getVal with type %s, but data type is: %s", getNumpyTypeDesc(getTensorType()).c_str(), getNumpyTypeDesc(type).c_str()); } if (data == nullptr) { return (T*) data; } else { TLLM_CHECK_WITH_INFO( offset < size(), fmtstr("offset (%lu) is larger than buffer size (%lu)", offset, size())); return ((T*) data) + offset; } } template T max() const { if (getTensorType() != type) { TLLM_LOG_DEBUG("getVal with type %s, but data type is: %s", getNumpyTypeDesc(getTensorType()).c_str(), getNumpyTypeDesc(type).c_str()); } TLLM_CHECK_WITH_INFO(shape.size() > 0 && data != nullptr, "Should be a non-empty tensor."); TLLM_CHECK_WITH_INFO(where == MEMORY_CPU || where == MEMORY_CPU_PINNED, "max() supports MEMORY_CPU or MEMORY_CPU_PINNED tensor."); size_t max_idx = 0; T max_val = getVal(max_idx); for (size_t i = 1; i < size(); ++i) { T val = getVal(i); if (val > max_val) { max_idx = i; max_val = val; } } return max_val; } template T min() const { if (getTensorType() != type) { TLLM_LOG_DEBUG("getVal with type %s, but data type is: %s", getNumpyTypeDesc(getTensorType()).c_str(), getNumpyTypeDesc(type).c_str()); } TLLM_CHECK_WITH_INFO(shape.size() > 0 && data != nullptr, "Should be a non-empty tensor."); TLLM_CHECK_WITH_INFO(where == MEMORY_CPU || where == MEMORY_CPU_PINNED, "min() supports MEMORY_CPU or MEMORY_CPU_PINNED tensor."); size_t min_idx = 0; T min_val = getVal(min_idx); for (size_t i = 1; i < size(); ++i) { T val = getVal(i); if (val < min_val) { min_idx = i; min_val = val; } } return min_val; } template T any(T val) const { if (getTensorType() != type) { TLLM_LOG_DEBUG("getVal with type %s, but data type is: %s", getNumpyTypeDesc(getTensorType()).c_str(), getNumpyTypeDesc(type).c_str()); } TLLM_CHECK_WITH_INFO(shape.size() > 0 && data != nullptr, "Should be a non-empty tensor."); TLLM_CHECK_WITH_INFO(where == MEMORY_CPU || where == MEMORY_CPU_PINNED, "any() supports MEMORY_CPU or MEMORY_CPU_PINNED tensor."); for (size_t i = 0; i < size(); ++i) { if (getVal(i) == val) { return true; } } return false; } template T all(T val) const { if (getTensorType() != type) { TLLM_LOG_DEBUG("getVal with type %s, but data type is: %s", getNumpyTypeDesc(getTensorType()).c_str(), getNumpyTypeDesc(type).c_str()); } TLLM_CHECK_WITH_INFO(shape.size() > 0 && data != nullptr, "Should be a non-empty tensor."); TLLM_CHECK_WITH_INFO(where == MEMORY_CPU || where == MEMORY_CPU_PINNED, "all() supports MEMORY_CPU or MEMORY_CPU_PINNED tensor."); for (size_t i = 0; i < size(); ++i) { if (getVal(i) != val) { return false; } } return true; } void updateShape(size_t idx, size_t val) { // TODO: find a better way to update the shape std::vector& shape_ref = const_cast&>(shape); shape_ref[idx] = val; } inline bool isValid() const { return size() > 0 && data != nullptr; } Tensor slice(std::vector shape, size_t offset = 0) const; }; class TensorMap { private: std::unordered_map tensor_map_; public: TensorMap() = default; TensorMap(std::unordered_map const& tensor_map); TensorMap(std::vector const& tensor_map); TensorMap(std::initializer_list> tensor_map); ~TensorMap(); inline size_t size() const { return tensor_map_.size(); } inline bool contains(std::string const& key) const { TLLM_LOG_TRACE("%s for key: %s", __PRETTY_FUNCTION__, key.c_str()); return tensor_map_.find(key) != tensor_map_.end(); } std::vector keys() const; inline void insert(std::string const& key, Tensor const& value) { TLLM_CHECK_WITH_INFO(!contains(key), fmtstr("Duplicated key %s", key.c_str())); TLLM_CHECK_WITH_INFO( value.isValid(), fmtstr("A none tensor or nullptr is not allowed (key is %s)", key.c_str())); tensor_map_.insert({key, value}); } inline void insertIfValid(std::string const& key, Tensor const& value) { if (value.isValid()) { insert({key, value}); } } inline void insert(std::pair p) { tensor_map_.insert(p); } // prevent converting int or size_t to string automatically Tensor at(int tmp) = delete; Tensor at(size_t tmp) = delete; inline Tensor& at(std::string const& key) { TLLM_LOG_TRACE("%s for key %s", __PRETTY_FUNCTION__, key.c_str()); TLLM_CHECK_WITH_INFO(contains(key), fmtstr( "Cannot find a tensor of name %s in the tensor map (keys: %s)", key.c_str(), vec2str(keys()).c_str())); return tensor_map_.at(key); } inline Tensor at(std::string const& key) const { TLLM_CHECK_WITH_INFO(contains(key), fmtstr( "Cannot find a tensor of name %s in the tensor map (keys: %s)", key.c_str(), vec2str(keys()).c_str())); return tensor_map_.at(key); } inline std::optional atOpt(std::string const& key) const { if (contains(key)) return tensor_map_.at(key); else return std::nullopt; } inline Tensor& at(std::string const& key, Tensor& default_tensor) { TLLM_LOG_TRACE("%s for key %s", __PRETTY_FUNCTION__, key.c_str()); if (contains(key)) { return tensor_map_.at(key); } return default_tensor; } inline Tensor at(std::string const& key, Tensor& default_tensor) const { TLLM_LOG_TRACE("%s for key %s", __PRETTY_FUNCTION__, key.c_str()); if (contains(key)) { return tensor_map_.at(key); } return default_tensor; } inline Tensor& at(std::string const& key, Tensor&& default_tensor) { TLLM_LOG_TRACE("%s for key %s", __PRETTY_FUNCTION__, key.c_str()); if (contains(key)) { return tensor_map_.at(key); } return default_tensor; } inline Tensor at(std::string const& key, Tensor&& default_tensor) const { if (contains(key)) { return tensor_map_.at(key); } return default_tensor; } template inline T getVal(std::string const& key) const { TLLM_CHECK_WITH_INFO(contains(key), fmtstr( "Cannot find a tensor of name %s in the tensor map (keys: %s)", key.c_str(), vec2str(keys()).c_str())); return tensor_map_.at(key).getVal(); } template inline std::optional getValOpt(std::string const& key) const { if (contains(key)) { return tensor_map_.at(key).getVal(); } else { return std::nullopt; } } template inline T getVal(std::string const& key, T default_value) const { if (contains(key)) { return tensor_map_.at(key).getVal(); } return default_value; } template inline T getValWithOffset(std::string const& key, size_t index) const { TLLM_CHECK_WITH_INFO(contains(key), fmtstr( "Cannot find a tensor of name %s in the tensor map (keys: %s)", key.c_str(), vec2str(keys()).c_str())); return tensor_map_.at(key).getVal(index); } template inline T getValWithOffset(std::string const& key, size_t index, T default_value) const { if (contains(key)) { return tensor_map_.at(key).getVal(index); } return default_value; } template inline T* getPtr(std::string const& key) const { TLLM_CHECK_WITH_INFO(contains(key), fmtstr( "Cannot find a tensor of name %s in the tensor map (keys: %s)", key.c_str(), vec2str(keys()).c_str())); return tensor_map_.at(key).getPtr(); } template inline T* getPtr(std::string const& key, T* default_ptr) const { if (contains(key)) { return tensor_map_.at(key).getPtr(); } return default_ptr; } template inline T* getPtrWithOffset(std::string const& key, size_t index) const { TLLM_CHECK_WITH_INFO(contains(key), fmtstr( "Cannot find a tensor of name %s in the tensor map (keys: %s)", key.c_str(), vec2str(keys()).c_str())); return tensor_map_.at(key).getPtrWithOffset(index); } template inline T* getPtrWithOffset(std::string const& key, size_t index, T* default_ptr) const { if (contains(key)) { return tensor_map_.at(key).getPtrWithOffset(index); } return default_ptr; } inline std::unordered_map getMap() const { return tensor_map_; } inline std::unordered_map::iterator begin() { return tensor_map_.begin(); } inline std::unordered_map::iterator end() { return tensor_map_.end(); } std::string toString(); }; } // namespace common } // namespace tensorrt_llm