/* * Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #pragma once #include "tensorrt_llm/common/tensor.h" #include "tensorrt_llm/runtime/iTensor.h" #include #include namespace tensorrt_llm::common::conversion { inline DataType toTllmDataType(nvinfer1::DataType type) { switch (type) { case nvinfer1::DataType::kFLOAT: return DataType::TYPE_FP32; case nvinfer1::DataType::kHALF: return DataType::TYPE_FP16; case nvinfer1::DataType::kBF16: return DataType::TYPE_BF16; case nvinfer1::DataType::kFP8: return DataType::TYPE_FP8_E4M3; case nvinfer1::DataType::kINT8: return DataType::TYPE_INT8; case nvinfer1::DataType::kUINT8: return DataType::TYPE_UINT8; case nvinfer1::DataType::kINT32: return DataType::TYPE_INT32; case nvinfer1::DataType::kINT64: return DataType::TYPE_INT64; case nvinfer1::DataType::kBOOL: return DataType::TYPE_BOOL; default: TLLM_THROW("Unsupported data type: %d", static_cast(type)); } } inline MemoryType toTllmMemoryType(runtime::MemoryType type) { switch (type) { case runtime::MemoryType::kGPU: return MemoryType::MEMORY_GPU; case runtime::MemoryType::kCPU: return MemoryType::MEMORY_CPU; case runtime::MemoryType::kPINNED: return MemoryType::MEMORY_CPU_PINNED; default: TLLM_THROW("Unsupported memory type: %d", static_cast(type)); } } inline Tensor toTllmTensor(runtime::ITensor const& tensor) { MemoryType memoryType = toTllmMemoryType(tensor.getMemoryType()); DataType dataType = toTllmDataType(tensor.getDataType()); auto const& dims = tensor.getShape(); std::vector shape(dims.d, dims.d + dims.nbDims); auto* data = tensor.data(); return Tensor(memoryType, dataType, shape, data); } inline Tensor toTllmTensor(runtime::IBuffer const& buffer) { MemoryType memoryType = toTllmMemoryType(buffer.getMemoryType()); DataType dataType = toTllmDataType(buffer.getDataType()); std::vector shape{buffer.getSize()}; auto* data = buffer.data(); return Tensor(memoryType, dataType, shape, data); } template Tensor toTllmTensor(MemoryType memoryType, std::vector const& shape, T* data) { return Tensor{memoryType, getTensorType(), shape, data}; } template Tensor toTllmTensor(std::vector const& data) { return Tensor{MemoryType::MEMORY_CPU, getTensorType(), {data.size()}, data.data()}; } template Tensor scalarToTllmTensor(T& data) { return Tensor{MemoryType::MEMORY_CPU, getTensorType(), {1}, &data}; } } // namespace tensorrt_llm::common::conversion