TensorRT-LLMs/cpp/tests/unit_tests/runtime/torchTest.cpp
Dan Blanaru 16d2467ea8 Update TensorRT-LLM (#2755)
* Update TensorRT-LLM

---------

Co-authored-by: Denis Kayshev <topenkoff@gmail.com>
Co-authored-by: akhoroshev <arthoroshev@gmail.com>
Co-authored-by: Patrick Reiter Horn <patrick.horn@gmail.com>

Update
2025-02-11 03:01:00 +00:00

154 lines
5.9 KiB
C++

/*
* Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include "tensorrt_llm/runtime/bufferManager.h"
#include "tensorrt_llm/runtime/torch.h"
#include "tensorrt_llm/runtime/torchView.h"
#include <memory>
#include <vector>
using namespace tensorrt_llm::runtime;
namespace tc = tensorrt_llm::common;
class TorchTest : public ::testing::Test // NOLINT(cppcoreguidelines-pro-type-member-init)
{
protected:
void SetUp() override
{
mDeviceCount = tc::getDeviceCount();
if (mDeviceCount > 0)
{
mStream = std::make_unique<CudaStream>();
Torch::setCurrentStream(*mStream);
}
else
{
GTEST_SKIP();
}
}
void TearDown() override {}
int mDeviceCount;
BufferManager::CudaStreamPtr mStream;
};
namespace
{
template <nvinfer1::DataType DType>
void checkFilled(IBuffer& buffer, int fillValue)
{
if (DType == buffer.getDataType())
{
EXPECT_THAT(BufferRange<typename DataTypeTraits<DType>::type>(buffer), ::testing::Each(fillValue));
}
}
} // namespace
TEST_F(TorchTest, Aten)
{
BufferManager manager(mStream);
auto const shapeTllm = ITensor::makeShape({1, 2, 3, 4});
auto const shapeAten = TorchUtils::shape(shapeTllm);
auto const shapeSmall = ITensor::makeShape({1, 2, 3, 2});
ASSERT_GT(ITensor::volume(shapeTllm), ITensor::volume(shapeSmall));
auto const shapeLarge = ITensor::makeShape({1, 2, 3, 8});
ASSERT_LT(ITensor::volume(shapeTllm), ITensor::volume(shapeLarge));
for (int i = 0; i < shapeAten.size(); ++i)
{
EXPECT_EQ(shapeAten[i], shapeTllm.d[i]) << i;
}
auto constexpr fillValue = 1;
auto tensorHostBase = manager.allocate(MemoryType::kPINNED, shapeTllm, nvinfer1::DataType::kINT64);
for (auto memoryType : {MemoryType::kCPU, MemoryType::kGPU, MemoryType::kPINNED})
{
for (auto dtype : {nvinfer1::DataType::kFLOAT, nvinfer1::DataType::kHALF, nvinfer1::DataType::kINT8,
nvinfer1::DataType::kUINT8, nvinfer1::DataType::kINT32, nvinfer1::DataType::kINT64,
nvinfer1::DataType::kBF16, nvinfer1::DataType::kFP8, nvinfer1::DataType::kBOOL})
{
ITensor::SharedPtr tensorTllm{manager.allocate(memoryType, shapeTllm, dtype)};
// Conversion to ATen tensor
auto tensorAten = Torch::tensor(tensorTllm);
EXPECT_TRUE(
(memoryType == MemoryType::kGPU && tensorAten.device().is_cuda()) || tensorAten.device().is_cpu());
EXPECT_EQ(memoryType == MemoryType::kPINNED, tensorAten.is_pinned());
EXPECT_EQ(TorchUtils::dataType(dtype), tensorAten.dtype());
EXPECT_THAT(tensorAten.sizes(), ::testing::ElementsAreArray(shapeAten));
EXPECT_EQ(tensorAten.data_ptr(), tensorTllm->data());
if (dtype != nvinfer1::DataType::kFP8)
{
tensorAten.fill_(c10::Scalar(fillValue));
auto tensorHost = ITensor::wrap(tensorHostBase->data(), dtype, shapeTllm);
manager.copy(*tensorTllm, *tensorHost);
mStream->synchronize();
checkFilled<nvinfer1::DataType::kFLOAT>(*tensorHost, fillValue);
checkFilled<nvinfer1::DataType::kHALF>(*tensorHost, fillValue);
checkFilled<nvinfer1::DataType::kINT8>(*tensorHost, fillValue);
checkFilled<nvinfer1::DataType::kUINT8>(*tensorHost, fillValue);
checkFilled<nvinfer1::DataType::kINT32>(*tensorHost, fillValue);
checkFilled<nvinfer1::DataType::kINT64>(*tensorHost, fillValue);
checkFilled<nvinfer1::DataType::kBF16>(*tensorHost, fillValue);
checkFilled<nvinfer1::DataType::kBOOL>(*tensorHost, fillValue);
}
// Conversion back to TRT-LLM tensor
auto tensorView = TorchView::of(tensorAten);
EXPECT_EQ(tensorView->getDataType(), dtype);
EXPECT_EQ(tensorView->getMemoryType(), memoryType);
EXPECT_EQ(tensorView->getSize(), tensorTllm->getSize());
EXPECT_EQ(tensorView->getCapacity(), tensorTllm->getCapacity());
EXPECT_EQ(TorchUtils::shape(tensorView->getShape()), shapeAten);
EXPECT_EQ(tensorView->data(), tensorTllm->data());
tensorView->reshape(shapeSmall);
EXPECT_EQ(TorchUtils::shape(tensorView->getShape()), TorchUtils::shape(shapeSmall));
EXPECT_LT(tensorView->getSize(), tensorTllm->getSize());
EXPECT_EQ(tensorView->getSize(), tensorAten.numel());
EXPECT_EQ(tensorView->data(), tensorTllm->data());
EXPECT_THROW(tensorView->reshape(shapeLarge), tc::TllmException);
tensorView->release();
EXPECT_EQ(tensorView->data(), nullptr);
EXPECT_EQ(tensorView->getSize(), 0);
}
}
}
TEST_F(TorchTest, Resize)
{
auto devices = {at::Device{at::kCUDA, 0}, at::Device{at::kCPU}};
for (auto device : devices)
{
auto tensorAten = at::randn({1, 2, 3, 4}).to(device);
auto tensorView = TorchView::of(tensorAten);
auto size = tensorView->getSize();
EXPECT_NO_THROW(tensorView->resize(size / 2));
EXPECT_EQ(tensorView->getSize(), size / 2);
EXPECT_NO_THROW(tensorView->resize(size * 2));
EXPECT_EQ(tensorView->getSize(), size * 2);
}
}