TensorRT-LLMs/cpp/tests/unit_tests/runtime/virtualMemoryTest.cpp
Yuan Tong a2f271c8e0
[TRTLLM-4406][feat] LLM sleep & wakeup Part 1: virtual device memory (#5034)
Signed-off-by: Yuan Tong <13075180+tongyuantongyu@users.noreply.github.com>
2025-08-04 13:51:01 +08:00

1573 lines
56 KiB
C++

/*
* Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <gtest/gtest.h>
#include <nvml.h>
#include "tensorrt_llm/common/cudaUtils.h"
#include "tensorrt_llm/runtime/bufferManager.h"
#include "tensorrt_llm/runtime/tllmBuffers.h"
#include "tensorrt_llm/runtime/virtualMemory.h"
#include <cstdint>
#include <random>
#include <unistd.h>
#include <vector>
using namespace tensorrt_llm::runtime;
namespace tc = tensorrt_llm::common;
struct DummyException : std::runtime_error
{
DummyException()
: runtime_error("dummy exception")
{
}
};
class VirtualMemoryTestBase : public ::testing::Test
{
protected:
void SetUp() override
{
if (tc::getDeviceCount() == 0)
{
GTEST_SKIP() << "This test suite cannot run on systems with no devices.";
}
TLLM_CU_CHECK(cuInit(0));
CUdevice dev;
TLLM_CU_CHECK(cuDeviceGet(&dev, 0));
CUcontext ctx;
TLLM_CU_CHECK(cuDevicePrimaryCtxRetain(&ctx, dev));
TLLM_CU_CHECK(cuCtxSetCurrent(ctx));
// Initialize NVML
nvmlReturn_t nvmlResult = nvmlInit();
TLLM_CHECK_WITH_INFO(nvmlResult == NVML_SUCCESS, "Failed to initialize NVML: %s", nvmlErrorString(nvmlResult));
if (!memoryInfoAvailable())
{
TLLM_LOG_WARNING("Per process memory information unavailable.");
}
TLLM_CUDA_CHECK(cudaDeviceSynchronize());
}
void TearDown() override
{
TLLM_CUDA_CHECK(cudaDeviceSynchronize());
}
static bool memoryInfoAvailable()
{
static bool available = []
{
auto blob = BufferManager::gpuSync(4096);
auto usage = getCurrentProcessMemoryInfo();
return usage != 0;
}();
return available;
}
static size_t getCurrentProcessMemoryInfo()
{
// Get current process ID
uint32_t currentPid = static_cast<uint32_t>(getpid());
// Get device handle for GPU 0
nvmlDevice_t device;
auto nvmlResult = nvmlDeviceGetHandleByIndex(0, &device);
TLLM_CHECK_WITH_INFO(
nvmlResult == NVML_SUCCESS, "Failed to get device handle: %s", nvmlErrorString(nvmlResult));
// Get running processes
unsigned int processCount = 1;
std::vector<nvmlProcessInfo_v2_t> processes(processCount);
nvmlResult = NVML_ERROR_INSUFFICIENT_SIZE;
while (nvmlResult == NVML_ERROR_INSUFFICIENT_SIZE)
{
nvmlResult = nvmlDeviceGetComputeRunningProcesses_v3(device, &processCount, processes.data());
TLLM_CHECK_WITH_INFO(nvmlResult == NVML_SUCCESS || nvmlResult == NVML_ERROR_INSUFFICIENT_SIZE,
"Failed to get process count: %s", nvmlErrorString(nvmlResult));
processes.resize(processCount);
}
// Find current process
for (auto const& process : processes)
{
if (process.pid == currentPid)
{
return process.usedGpuMemory;
}
}
return 0;
}
};
class VirtualMemoryTest : public VirtualMemoryTestBase
{
};
// Test CUDAVirtualMemoryChunk materialize and release memory correctly
TEST_F(VirtualMemoryTest, TestBasic)
{
CUdeviceptr address{};
std::size_t constexpr size = 256 * 1024 * 1024;
TLLM_CU_CHECK(cuMemAddressReserve(&address, size, 0, {}, 0));
CUDAVirtualMemoryChunk::CreatorPtr creator
= std::make_unique<LocalCreator<>>(CUmemAllocationProp{CU_MEM_ALLOCATION_TYPE_PINNED, CU_MEM_HANDLE_TYPE_NONE,
{
CU_MEM_LOCATION_TYPE_DEVICE,
0,
}},
size);
CUDAVirtualMemoryChunk::Configurators configurators;
configurators.push_back(std::make_unique<UnicastConfigurator>(address, size,
CUmemAccessDesc{{
CU_MEM_LOCATION_TYPE_DEVICE,
0,
},
CU_MEM_ACCESS_FLAGS_PROT_READWRITE}));
CUDAVirtualMemoryChunk vm(std::move(creator), std::move(configurators));
ASSERT_EQ(vm.status(), CUDAVirtualMemoryChunk::RELEASED);
auto memoryBegin = getCurrentProcessMemoryInfo();
vm.materialize();
ASSERT_EQ(vm.status(), CUDAVirtualMemoryChunk::MATERIALIZED);
auto memoryMaterialized = getCurrentProcessMemoryInfo();
if (memoryInfoAvailable())
{
ASSERT_EQ(memoryBegin + size, memoryMaterialized) << "materialize does not allocate memory";
}
auto result = cuMemsetD8_v2(address, 255, size);
ASSERT_EQ(result, CUDA_SUCCESS) << "Accessing memory returned failure (first materialize)";
TLLM_CU_CHECK(cuStreamSynchronize(nullptr));
vm.release();
ASSERT_EQ(vm.status(), CUDAVirtualMemoryChunk::RELEASED);
auto memoryReleased = getCurrentProcessMemoryInfo();
if (memoryInfoAvailable())
{
ASSERT_EQ(memoryBegin, memoryReleased) << "release does not release memory";
}
vm.materialize();
ASSERT_EQ(vm.status(), CUDAVirtualMemoryChunk::MATERIALIZED);
result = cuMemsetD8_v2(address, 255, size);
ASSERT_EQ(result, CUDA_SUCCESS) << "Accessing memory returned failure (second materialize)";
TLLM_CU_CHECK(cuStreamSynchronize(nullptr));
vm.release();
ASSERT_EQ(vm.status(), CUDAVirtualMemoryChunk::RELEASED);
memoryReleased = getCurrentProcessMemoryInfo();
if (memoryInfoAvailable())
{
ASSERT_EQ(memoryBegin, memoryReleased) << "release does not release memory";
}
}
// Test BackedConfigurator refills memory correctly for both CPU and PINNED memory types
class VirtualMemoryOffloadConfigurator : public VirtualMemoryTest, public ::testing::WithParamInterface<MemoryType>
{
};
TEST_P(VirtualMemoryOffloadConfigurator, Test)
{
MemoryType backType = GetParam();
CUdeviceptr address{};
std::size_t constexpr size = 4 * 1024 * 1024;
TLLM_CU_CHECK(cuMemAddressReserve(&address, size, 0, {}, 0));
CudaStream stream;
CUDAVirtualMemoryChunk::CreatorPtr creator
= std::make_unique<LocalCreator<>>(CUmemAllocationProp{CU_MEM_ALLOCATION_TYPE_PINNED, CU_MEM_HANDLE_TYPE_NONE,
{
CU_MEM_LOCATION_TYPE_DEVICE,
0,
}},
size);
CUDAVirtualMemoryChunk::Configurators configurators;
configurators.push_back(std::make_unique<UnicastConfigurator>(address, size,
CUmemAccessDesc{{
CU_MEM_LOCATION_TYPE_DEVICE,
0,
},
CU_MEM_ACCESS_FLAGS_PROT_READWRITE}));
configurators.push_back(std::make_unique<OffloadConfigurator>(address, size, backType, stream.get(), false));
CUDAVirtualMemoryChunk vm(std::move(creator), std::move(configurators));
std::vector<uint64_t> data(size / sizeof(uint64_t), 0);
std::generate(data.begin(), data.end(), [engine = std::mt19937_64(address)]() mutable { return engine(); });
vm.materialize();
auto pointer = reinterpret_cast<void*>(address);
auto result = cudaMemcpyAsync(pointer, data.data(), size, cudaMemcpyHostToDevice, stream.get());
ASSERT_EQ(result, CUDA_SUCCESS) << "Copying memory returned failure";
vm.release();
vm.materialize();
std::fill(data.begin(), data.end(), 0);
result = cudaMemcpyAsync(data.data(), pointer, size, cudaMemcpyDeviceToHost, stream.get());
stream.synchronize();
ASSERT_EQ(result, CUDA_SUCCESS) << "Copying memory returned failure";
auto engine = std::mt19937_64(static_cast<uint64_t>(address));
for (size_t i = 0; i < data.size(); ++i)
{
ASSERT_EQ(data[i], engine()) << "Mismatched at index " << i;
}
}
INSTANTIATE_TEST_SUITE_P(
Backends, VirtualMemoryOffloadConfigurator, ::testing::Values(MemoryType::kCPU, MemoryType::kPINNED));
// Test CUDAVirtualMemoryChunk calls creator and configurators in correct order
TEST_F(VirtualMemoryTest, TestOrder)
{
// Order tracking - local counter to track call sequence
int callOrder = 0;
// OrderTrackingCreator that records when its methods are called
class OrderTrackingCreator : public CUDAVirtualMemoryChunk::Creator
{
public:
int& mCallOrder;
int createOrder = -1;
int releaseOrder = -1;
CUmemGenericAllocationHandle createdHandle = 0;
OrderTrackingCreator(int& callOrder)
: mCallOrder(callOrder)
{
}
CUmemGenericAllocationHandle create() override
{
createOrder = ++mCallOrder;
createdHandle = 0xbaadf00dbaadf00d;
return createdHandle;
}
void release(CUmemGenericAllocationHandle handle, bool destructing) override
{
releaseOrder = ++mCallOrder;
ASSERT_EQ(handle, createdHandle);
}
};
// OrderTrackingConfigurator that records when its methods are called
class OrderTrackingConfigurator : public CUDAVirtualMemoryChunk::Configurator
{
public:
int& mCallOrder;
std::string name;
int setupOrder = -1;
int teardownOrder = -1;
OrderTrackingConfigurator(int& callOrder, std::string n)
: mCallOrder(callOrder)
, name(std::move(n))
{
}
void setup(CUmemGenericAllocationHandle handle) override
{
setupOrder = ++mCallOrder;
}
void teardown(CUmemGenericAllocationHandle handle, bool destructing) override
{
teardownOrder = ++mCallOrder;
}
};
// Create creator and configurators
auto creator = std::make_unique<OrderTrackingCreator>(callOrder);
auto* creatorPtr = creator.get();
auto config1 = std::make_unique<OrderTrackingConfigurator>(callOrder, "config1");
auto config2 = std::make_unique<OrderTrackingConfigurator>(callOrder, "config2");
auto config3 = std::make_unique<OrderTrackingConfigurator>(callOrder, "config3");
auto* config1Ptr = config1.get();
auto* config2Ptr = config2.get();
auto* config3Ptr = config3.get();
CUDAVirtualMemoryChunk::Configurators configurators;
configurators.push_back(std::move(config1));
configurators.push_back(std::move(config2));
configurators.push_back(std::move(config3));
CUDAVirtualMemoryChunk vm(std::move(creator), std::move(configurators));
// Test materialize() order: creator.create() first, then configurators.setup() in order
vm.materialize();
// Verify materialize order
EXPECT_EQ(creatorPtr->createOrder, 1); // creator.create() should be called first
EXPECT_EQ(config1Ptr->setupOrder, 2); // config1.setup() should be called second
EXPECT_EQ(config2Ptr->setupOrder, 3); // config2.setup() should be called third
EXPECT_EQ(config3Ptr->setupOrder, 4); // config3.setup() should be called fourth
// Verify release() hasn't been called yet
EXPECT_EQ(creatorPtr->releaseOrder, -1);
EXPECT_EQ(config1Ptr->teardownOrder, -1);
EXPECT_EQ(config2Ptr->teardownOrder, -1);
EXPECT_EQ(config3Ptr->teardownOrder, -1);
// Test release() order: configurators.teardown() in reverse order, then creator.release()
vm.release();
// Verify release order
EXPECT_EQ(config3Ptr->teardownOrder, 5); // config3.teardown() should be called first (reverse order)
EXPECT_EQ(config2Ptr->teardownOrder, 6); // config2.teardown() should be called second
EXPECT_EQ(config1Ptr->teardownOrder, 7); // config1.teardown() should be called third
EXPECT_EQ(creatorPtr->releaseOrder, 8); // creator.release() should be called last
}
// Test CUDAVirtualMemoryChunk behaves correctly when exceptions were thrown
TEST_F(VirtualMemoryTest, TestException)
{
// Dummy Creator that can be configured to throw on create() or release()
class DummyCreator : public CUDAVirtualMemoryChunk::Creator
{
public:
bool throwOnCreate = false;
bool throwOnRelease = false;
bool createCalled = false;
bool releaseCalled = false;
CUmemGenericAllocationHandle createdHandle = 0;
CUmemGenericAllocationHandle create() override
{
createCalled = true;
if (throwOnCreate)
{
throw DummyException();
}
createdHandle = 0xbaadf00dbaadf00d;
return createdHandle;
}
void release(CUmemGenericAllocationHandle handle, bool destructing) override
{
releaseCalled = true;
ASSERT_EQ(handle, createdHandle);
if (throwOnRelease)
{
throw DummyException();
}
}
};
// Dummy Configurator that can be configured to throw on setup() or teardown()
class DummyConfigurator : public CUDAVirtualMemoryChunk::Configurator
{
public:
bool throwOnSetup = false;
bool throwOnTeardown = false;
bool setupCalled = false;
bool teardownCalled = false;
std::string name;
DummyConfigurator(std::string n)
: name(std::move(n))
{
}
void setup(CUmemGenericAllocationHandle) override
{
setupCalled = true;
if (throwOnSetup)
{
throw DummyException();
}
}
void teardown(CUmemGenericAllocationHandle handle, bool destructing) override
{
teardownCalled = true;
if (throwOnTeardown)
{
throw DummyException();
}
}
};
// Test 1: Exception in creator->create()
{
auto creator = std::make_unique<DummyCreator>();
creator->throwOnCreate = true;
auto* creatorPtr = creator.get();
auto config1 = std::make_unique<DummyConfigurator>("config1");
auto config2 = std::make_unique<DummyConfigurator>("config2");
auto* config1Ptr = config1.get();
auto* config2Ptr = config2.get();
CUDAVirtualMemoryChunk::Configurators configurators;
configurators.push_back(std::move(config1));
configurators.push_back(std::move(config2));
CUDAVirtualMemoryChunk vm(std::move(creator), std::move(configurators));
// materialize() should throw due to creator->create() exception
EXPECT_THROW(vm.materialize(), DummyException);
// Verify creator->create() was called but no configurators were setup
EXPECT_TRUE(creatorPtr->createCalled);
EXPECT_FALSE(config1Ptr->setupCalled);
EXPECT_FALSE(config2Ptr->setupCalled);
// Internal state is still valid.
// If the failure from creator is temporary, materialize() can be reattempted.
EXPECT_EQ(vm.status(), CUDAVirtualMemoryChunk::RELEASED);
}
// Test 2: Exception in first configurator setup()
{
auto creator = std::make_unique<DummyCreator>();
auto* creatorPtr = creator.get();
auto config1 = std::make_unique<DummyConfigurator>("config1");
auto config2 = std::make_unique<DummyConfigurator>("config2");
config1->throwOnSetup = true;
auto* config1Ptr = config1.get();
auto* config2Ptr = config2.get();
CUDAVirtualMemoryChunk::Configurators configurators;
configurators.push_back(std::move(config1));
configurators.push_back(std::move(config2));
CUDAVirtualMemoryChunk vm(std::move(creator), std::move(configurators));
// materialize() should throw due to first configurator exception
EXPECT_THROW(vm.materialize(), DummyException);
// Verify creator->create() was called and first configurator setup() was called
EXPECT_TRUE(creatorPtr->createCalled);
EXPECT_TRUE(config1Ptr->setupCalled);
EXPECT_FALSE(config2Ptr->setupCalled);
// Status should be ERRORED
EXPECT_EQ(vm.status(), CUDAVirtualMemoryChunk::ERRORED);
// release() should still work and only teardown what was set up
vm.release();
EXPECT_TRUE(creatorPtr->releaseCalled);
EXPECT_FALSE(config1Ptr->teardownCalled); // Failed setup, so no teardown
EXPECT_FALSE(config2Ptr->teardownCalled); // Never setup
}
// Test 3: Exception in second configurator setup()
{
auto creator = std::make_unique<DummyCreator>();
auto* creatorPtr = creator.get();
auto config1 = std::make_unique<DummyConfigurator>("config1");
auto config2 = std::make_unique<DummyConfigurator>("config2");
config2->throwOnSetup = true;
auto* config1Ptr = config1.get();
auto* config2Ptr = config2.get();
CUDAVirtualMemoryChunk::Configurators configurators;
configurators.push_back(std::move(config1));
configurators.push_back(std::move(config2));
CUDAVirtualMemoryChunk vm(std::move(creator), std::move(configurators));
// materialize() should throw due to second configurator exception
EXPECT_THROW(vm.materialize(), DummyException);
// Verify both creator and first configurator were called
EXPECT_TRUE(creatorPtr->createCalled);
EXPECT_TRUE(config1Ptr->setupCalled);
EXPECT_TRUE(config2Ptr->setupCalled);
// Status should be ERRORED
EXPECT_EQ(vm.status(), CUDAVirtualMemoryChunk::ERRORED);
// release() should teardown the first configurator (successful setup) but not the second
vm.release();
EXPECT_TRUE(creatorPtr->releaseCalled);
EXPECT_TRUE(config1Ptr->teardownCalled); // Successful setup, so teardown called
EXPECT_FALSE(config2Ptr->teardownCalled); // Failed setup, so no teardown
}
// Test 4: Exception in configurator teardown() during release()
{
auto creator = std::make_unique<DummyCreator>();
auto* creatorPtr = creator.get();
auto config1 = std::make_unique<DummyConfigurator>("config1");
auto config2 = std::make_unique<DummyConfigurator>("config2");
config2->throwOnTeardown = true;
auto* config1Ptr = config1.get();
auto* config2Ptr = config2.get();
CUDAVirtualMemoryChunk::Configurators configurators;
configurators.push_back(std::move(config1));
configurators.push_back(std::move(config2));
CUDAVirtualMemoryChunk vm(std::move(creator), std::move(configurators));
// materialize() should succeed
vm.materialize();
EXPECT_EQ(vm.status(), CUDAVirtualMemoryChunk::MATERIALIZED);
// release() should throw due to teardown exception but still complete cleanup
EXPECT_THROW(vm.release(), DummyException);
// Verify all teardown methods were called despite exception
EXPECT_TRUE(config1Ptr->teardownCalled);
EXPECT_TRUE(config2Ptr->teardownCalled);
EXPECT_TRUE(creatorPtr->releaseCalled);
// Status should be ERRORED due to exception
EXPECT_EQ(vm.status(), CUDAVirtualMemoryChunk::ERRORED);
}
// Test 5: Exception in creator->release()
{
auto creator = std::make_unique<DummyCreator>();
creator->throwOnRelease = true;
auto* creatorPtr = creator.get();
auto config1 = std::make_unique<DummyConfigurator>("config1");
auto* config1Ptr = config1.get();
CUDAVirtualMemoryChunk::Configurators configurators;
configurators.push_back(std::move(config1));
CUDAVirtualMemoryChunk vm(std::move(creator), std::move(configurators));
// materialize() should succeed
vm.materialize();
EXPECT_EQ(vm.status(), CUDAVirtualMemoryChunk::MATERIALIZED);
// release() should throw due to creator exception but still complete configurator cleanup
EXPECT_THROW(vm.release(), DummyException);
// Verify configurator teardown was called despite creator exception
EXPECT_TRUE(config1Ptr->teardownCalled);
EXPECT_TRUE(creatorPtr->releaseCalled);
// Status should be ERRORED due to exception
EXPECT_EQ(vm.status(), CUDAVirtualMemoryChunk::ERRORED);
}
}
// Test various class facilities
TEST_F(VirtualMemoryTest, TestFacilities)
{
// Test default constructed CUDAVirtualMemoryChunk
{
CUDAVirtualMemoryChunk defaultVm;
// Should be invalid
EXPECT_FALSE(defaultVm);
EXPECT_EQ(defaultVm.status(), CUDAVirtualMemoryChunk::INVALID);
}
CUdeviceptr address{};
std::size_t constexpr size = 64 * 1024 * 1024;
TLLM_CU_CHECK(cuMemAddressReserve(&address, size, 0, {}, 0));
// Test move semantic
{
// Create original CUDAVirtualMemoryChunk
CUDAVirtualMemoryChunk::CreatorPtr creator
= std::make_unique<LocalCreator<>>(CUmemAllocationProp{CU_MEM_ALLOCATION_TYPE_PINNED,
CU_MEM_HANDLE_TYPE_NONE, {CU_MEM_LOCATION_TYPE_DEVICE, 0}},
size);
CUDAVirtualMemoryChunk::Configurators configurators;
configurators.push_back(std::make_unique<UnicastConfigurator>(
address, size, CUmemAccessDesc{{CU_MEM_LOCATION_TYPE_DEVICE, 0}, CU_MEM_ACCESS_FLAGS_PROT_READWRITE}));
CUDAVirtualMemoryChunk original(std::move(creator), std::move(configurators));
original.materialize();
EXPECT_EQ(original.status(), CUDAVirtualMemoryChunk::MATERIALIZED);
// Test move constructor
CUDAVirtualMemoryChunk moved{std::move(original)};
EXPECT_FALSE(original); // Original should be invalid after move
EXPECT_TRUE(moved); // Moved-to object should be valid
EXPECT_EQ(moved.status(), CUDAVirtualMemoryChunk::MATERIALIZED);
// Test move assignment
CUDAVirtualMemoryChunk assigned;
EXPECT_FALSE(assigned); // Default constructed, should be invalid
assigned = std::move(moved);
EXPECT_FALSE(moved); // moved should be invalid after move
EXPECT_TRUE(assigned); // assigned should be valid
EXPECT_EQ(assigned.status(), CUDAVirtualMemoryChunk::MATERIALIZED);
// Clean up
assigned.release();
}
}
// Test destructor
TEST_F(VirtualMemoryTest, TestDestructor)
{
// Dummy Creator for testing destructor behavior
class DummyCreator : public CUDAVirtualMemoryChunk::Creator
{
public:
bool& createCalledRef;
bool& releaseCalledRef;
CUmemGenericAllocationHandle createdHandle = 0;
DummyCreator(bool& createRef, bool& releaseRef)
: createCalledRef(createRef)
, releaseCalledRef(releaseRef)
{
}
CUmemGenericAllocationHandle create() override
{
createCalledRef = true;
createdHandle = 0xbaadf00dbaadf00d;
return createdHandle;
}
void release(CUmemGenericAllocationHandle handle, bool destructing) override
{
releaseCalledRef = true;
ASSERT_EQ(handle, createdHandle);
}
};
// Dummy Configurator for testing destructor behavior
class DummyConfigurator : public CUDAVirtualMemoryChunk::Configurator
{
public:
bool& setupCalledRef;
bool& teardownCalledRef;
std::string name;
DummyConfigurator(std::string n, bool& setupRef, bool& teardownRef)
: setupCalledRef(setupRef)
, teardownCalledRef(teardownRef)
, name(std::move(n))
{
}
void setup(CUmemGenericAllocationHandle) override
{
setupCalledRef = true;
}
void teardown(CUmemGenericAllocationHandle, bool) override
{
teardownCalledRef = true;
}
};
// Test destructor calls release automatically for materialized memory
{
bool createCalled = false;
bool releaseCalled = false;
bool setupCalled = false;
bool teardownCalled = false;
auto creator = std::make_unique<DummyCreator>(createCalled, releaseCalled);
auto config1 = std::make_unique<DummyConfigurator>("config1", setupCalled, teardownCalled);
CUDAVirtualMemoryChunk::Configurators configurators;
configurators.push_back(std::move(config1));
alignas(CUDAVirtualMemoryChunk) std::byte storage[sizeof(CUDAVirtualMemoryChunk)];
CUDAVirtualMemoryChunk* vm = new (storage) CUDAVirtualMemoryChunk(std::move(creator), std::move(configurators));
vm->materialize();
// Verify materialize was called
EXPECT_TRUE(createCalled);
EXPECT_TRUE(setupCalled);
EXPECT_FALSE(releaseCalled);
EXPECT_FALSE(teardownCalled);
EXPECT_EQ(vm->status(), CUDAVirtualMemoryChunk::MATERIALIZED);
vm->~CUDAVirtualMemoryChunk();
// Verify destructor called release
EXPECT_TRUE(releaseCalled);
EXPECT_TRUE(teardownCalled);
}
// Test destructor doesn't double-release for manually released memory
{
// Local variables to track calls (persist after object destruction)
bool createCalled = false;
bool releaseCalled = false;
bool setupCalled = false;
bool teardownCalled = false;
auto creator = std::make_unique<DummyCreator>(createCalled, releaseCalled);
auto config1 = std::make_unique<DummyConfigurator>("config1", setupCalled, teardownCalled);
CUDAVirtualMemoryChunk::Configurators configurators;
configurators.push_back(std::move(config1));
alignas(CUDAVirtualMemoryChunk) std::byte storage[sizeof(CUDAVirtualMemoryChunk)];
auto* vm = new (storage) CUDAVirtualMemoryChunk(std::move(creator), std::move(configurators));
vm->materialize();
vm->release(); // Manual release
// Verify manual release was called
EXPECT_TRUE(releaseCalled);
EXPECT_TRUE(teardownCalled);
EXPECT_EQ(vm->status(), CUDAVirtualMemoryChunk::RELEASED);
// Reset flags to verify destructor doesn't call release again
releaseCalled = false;
teardownCalled = false;
vm->~CUDAVirtualMemoryChunk();
// Verify destructor did NOT call release again (no double-release)
EXPECT_FALSE(releaseCalled);
EXPECT_FALSE(teardownCalled);
}
// Test destructor behavior with ERRORED state
{
// Local variables to track calls (persist after object destruction)
bool createCalled = false;
bool releaseCalled = false;
bool config1SetupCalled = false;
bool config1TeardownCalled = false;
bool throwingSetupCalled = false;
bool throwingTeardownCalled = false;
class ThrowingConfigurator : public CUDAVirtualMemoryChunk::Configurator
{
public:
bool& setupCalledRef;
bool& teardownCalledRef;
ThrowingConfigurator(bool& setupRef, bool& teardownRef)
: setupCalledRef(setupRef)
, teardownCalledRef(teardownRef)
{
}
void setup(CUmemGenericAllocationHandle) override
{
setupCalledRef = true;
throw DummyException();
}
void teardown(CUmemGenericAllocationHandle, bool) override
{
teardownCalledRef = true;
}
};
auto creator = std::make_unique<DummyCreator>(createCalled, releaseCalled);
auto config1 = std::make_unique<DummyConfigurator>("config1", config1SetupCalled, config1TeardownCalled);
auto throwingConfig = std::make_unique<ThrowingConfigurator>(throwingSetupCalled, throwingTeardownCalled);
CUDAVirtualMemoryChunk::Configurators configurators;
configurators.push_back(std::move(config1));
configurators.push_back(std::move(throwingConfig));
alignas(CUDAVirtualMemoryChunk) std::byte storage[sizeof(CUDAVirtualMemoryChunk)];
auto* vm = new (storage) CUDAVirtualMemoryChunk(std::move(creator), std::move(configurators));
// Materialize should throw and leave VM in ERRORED state
EXPECT_THROW(vm->materialize(), DummyException);
EXPECT_EQ(vm->status(), CUDAVirtualMemoryChunk::ERRORED);
// Verify partial setup occurred
EXPECT_TRUE(createCalled);
EXPECT_TRUE(config1SetupCalled);
EXPECT_TRUE(throwingSetupCalled);
EXPECT_FALSE(releaseCalled);
vm->~CUDAVirtualMemoryChunk();
// Verify destructor called release to clean up the errored state
EXPECT_TRUE(releaseCalled);
EXPECT_TRUE(config1TeardownCalled);
// throwingConfig's teardown should NOT be called since setup failed
EXPECT_FALSE(throwingTeardownCalled);
}
}
// Test edge cases and error scenarios
TEST_F(VirtualMemoryTest, TestEdgeCases)
{
// Dummy Creator for testing edge cases
class DummyCreator : public CUDAVirtualMemoryChunk::Creator
{
public:
CUmemGenericAllocationHandle createdHandle = 0xbaadf00dbaadf00d;
CUmemGenericAllocationHandle create() override
{
return createdHandle;
}
void release(CUmemGenericAllocationHandle handle, bool destructing) override
{
ASSERT_EQ(handle, createdHandle);
}
};
// Test multiple materialize calls (should throw)
{
auto creator = std::make_unique<DummyCreator>();
CUDAVirtualMemoryChunk vm(std::move(creator), {});
vm.materialize();
EXPECT_EQ(vm.status(), CUDAVirtualMemoryChunk::MATERIALIZED);
// Second materialize should throw
EXPECT_THROW(vm.materialize(), tc::TllmException);
EXPECT_EQ(vm.status(), CUDAVirtualMemoryChunk::MATERIALIZED);
vm.release();
}
// Test multiple release calls (should throw)
{
auto creator = std::make_unique<DummyCreator>();
CUDAVirtualMemoryChunk vm(std::move(creator), {});
vm.materialize();
vm.release();
EXPECT_EQ(vm.status(), CUDAVirtualMemoryChunk::RELEASED);
// Second release should throw
EXPECT_THROW(vm.release(), tc::TllmException);
EXPECT_EQ(vm.status(), CUDAVirtualMemoryChunk::RELEASED);
}
// Test release on RELEASED state (should throw)
{
auto creator = std::make_unique<DummyCreator>();
CUDAVirtualMemoryChunk vm(std::move(creator), {});
EXPECT_EQ(vm.status(), CUDAVirtualMemoryChunk::RELEASED);
EXPECT_THROW(vm.release(), tc::TllmException); // Should throw on RELEASED state
EXPECT_EQ(vm.status(), CUDAVirtualMemoryChunk::RELEASED);
}
// Test materialize on ERRORED state after exception recovery
{
// Create a VM that will go into ERRORED state
class ThrowingConfigurator : public CUDAVirtualMemoryChunk::Configurator
{
public:
bool shouldThrow = true;
void setup(CUmemGenericAllocationHandle) override
{
if (shouldThrow)
{
throw DummyException();
}
}
void teardown(CUmemGenericAllocationHandle, bool) override {}
};
auto creator = std::make_unique<DummyCreator>();
auto throwingConfig = std::make_unique<ThrowingConfigurator>();
CUDAVirtualMemoryChunk::Configurators configurators;
configurators.push_back(std::move(throwingConfig));
CUDAVirtualMemoryChunk vm(std::move(creator), std::move(configurators));
// First materialize should throw and leave VM in ERRORED state
EXPECT_THROW(vm.materialize(), DummyException);
EXPECT_EQ(vm.status(), CUDAVirtualMemoryChunk::ERRORED);
// Should be able to release from ERRORED state
vm.release();
EXPECT_EQ(vm.status(), CUDAVirtualMemoryChunk::RELEASED);
}
}
class VirtualMemoryManagerTest : public VirtualMemoryTestBase // NOLINT(cppcoreguidelines-pro-type-member-init)
{
using Base = VirtualMemoryTestBase;
protected:
auto& entries()
{
return mVMManager->mEntries;
}
auto& memories()
{
return mVMManager->mMemories;
}
auto& badHandles()
{
return mVMManager->mBadHandles;
}
void SetUp() override
{
this->Base::SetUp();
mVMManager = std::make_unique<CudaVirtualMemoryManager>();
}
void TearDown() override
{
this->Base::TearDown();
ASSERT_TRUE(!mVMManager || entries().size() == 0) << "Leftover memory in manager";
}
std::unique_ptr<CudaVirtualMemoryManager> mVMManager = nullptr;
};
TEST_F(VirtualMemoryManagerTest, TestBasic)
{
CUdeviceptr address{};
std::size_t constexpr size = 256 * 1024 * 1024;
TLLM_CU_CHECK(cuMemAddressReserve(&address, size, 0, {}, 0));
uintptr_t handle = static_cast<uintptr_t>(address);
std::string tag = "test_tag";
CUDAVirtualMemoryChunk::CreatorPtr creator
= std::make_unique<LocalCreator<>>(CUmemAllocationProp{CU_MEM_ALLOCATION_TYPE_PINNED, CU_MEM_HANDLE_TYPE_NONE,
{
CU_MEM_LOCATION_TYPE_DEVICE,
0,
}},
size);
CUDAVirtualMemoryChunk::Configurators configurators;
configurators.push_back(std::make_unique<UnicastConfigurator>(address, size,
CUmemAccessDesc{{
CU_MEM_LOCATION_TYPE_DEVICE,
0,
},
CU_MEM_ACCESS_FLAGS_PROT_READWRITE}));
auto memoryBegin = getCurrentProcessMemoryInfo();
// Add to manager - this automatically materializes
mVMManager->add(handle, tag, std::move(creator), std::move(configurators));
auto memoryMaterialized = getCurrentProcessMemoryInfo();
if (memoryInfoAvailable())
{
ASSERT_EQ(memoryBegin + size, memoryMaterialized) << "add/materialize does not allocate memory";
}
// Test memory access after materialization
auto result = cuMemsetD8_v2(address, 255, size);
ASSERT_EQ(result, CUDA_SUCCESS) << "Accessing memory returned failure (first materialize)";
TLLM_CU_CHECK(cuStreamSynchronize(nullptr));
// Release memory through manager
auto releaseCount = mVMManager->releaseWithTag(tag);
ASSERT_EQ(releaseCount, 1) << "Expected to release 1 memory object";
auto memoryReleased = getCurrentProcessMemoryInfo();
if (memoryInfoAvailable())
{
ASSERT_EQ(memoryBegin, memoryReleased) << "releaseWithTag does not release memory";
}
// Materialize again through manager
auto materializeCount = mVMManager->materializeWithTag(tag);
ASSERT_EQ(materializeCount, 1) << "Expected to materialize 1 memory object";
auto memoryRematerialized = getCurrentProcessMemoryInfo();
if (memoryInfoAvailable())
{
ASSERT_EQ(memoryBegin + size, memoryRematerialized) << "materializeWithTag does not allocate memory";
}
// Test memory access after rematerialization
result = cuMemsetD8_v2(address, 255, size);
ASSERT_EQ(result, CUDA_SUCCESS) << "Accessing memory returned failure (second materialize)";
TLLM_CU_CHECK(cuStreamSynchronize(nullptr));
// Clean up - remove from manager
{
auto removedMemory = mVMManager->remove(handle);
ASSERT_TRUE(removedMemory) << "Expected to successfully remove memory from manager";
}
auto memoryAfterRemove = getCurrentProcessMemoryInfo();
if (memoryInfoAvailable())
{
ASSERT_EQ(memoryBegin, memoryAfterRemove) << "remove does not release memory";
}
auto unknownMemory = mVMManager->remove(0);
ASSERT_FALSE(unknownMemory) << "Expect invalid memory for unknown handle";
}
TEST_F(VirtualMemoryManagerTest, TestTags)
{
// Dummy Creator for testing tag functionality
class DummyCreator : public CUDAVirtualMemoryChunk::Creator
{
public:
bool createCalled = false;
bool releaseCalled = false;
CUmemGenericAllocationHandle createdHandle = 0xbaadf00dbaadf00d;
CUmemGenericAllocationHandle create() override
{
createCalled = true;
return createdHandle;
}
void release(CUmemGenericAllocationHandle handle, bool destructing) override
{
releaseCalled = true;
ASSERT_EQ(handle, createdHandle);
}
};
// Create creators for different virtual memories
auto creator1 = std::make_unique<DummyCreator>();
auto creator2 = std::make_unique<DummyCreator>();
auto creator3 = std::make_unique<DummyCreator>();
auto creator4 = std::make_unique<DummyCreator>();
// Keep pointers to track state
auto* creator1Ptr = creator1.get();
auto* creator2Ptr = creator2.get();
auto* creator3Ptr = creator3.get();
auto* creator4Ptr = creator4.get();
mVMManager->add(0x1000, "tag_A", std::move(creator1), {});
mVMManager->add(0x2000, "tag_B", std::move(creator2), {});
mVMManager->add(0x3000, "tag_A", std::move(creator3), {});
mVMManager->add(0x4000, "tag_C", std::move(creator4), {});
// All should be materialized initially (since add() materializes automatically)
EXPECT_TRUE(creator1Ptr->createCalled);
EXPECT_TRUE(creator2Ptr->createCalled);
EXPECT_TRUE(creator3Ptr->createCalled);
EXPECT_TRUE(creator4Ptr->createCalled);
// Reset create flags to test materializeWithTag later
creator1Ptr->createCalled = false;
creator2Ptr->createCalled = false;
creator3Ptr->createCalled = false;
creator4Ptr->createCalled = false;
// Test releaseWithTag - should release only memories with "tag_A"
auto releaseCount = mVMManager->releaseWithTag("tag_A");
EXPECT_EQ(releaseCount, 2); // Should release 2 memories with tag_A
// Verify only tag_A memories were released
EXPECT_TRUE(creator1Ptr->releaseCalled); // tag_A
EXPECT_FALSE(creator2Ptr->releaseCalled); // tag_B
EXPECT_TRUE(creator3Ptr->releaseCalled); // tag_A
EXPECT_FALSE(creator4Ptr->releaseCalled); // tag_C
// Test materializeWithTag - should materialize only memories with "tag_A"
auto materializeCount = mVMManager->materializeWithTag("tag_A");
EXPECT_EQ(materializeCount, 2); // Should materialize 2 memories with tag_A
// Verify only tag_A memories were materialized
EXPECT_TRUE(creator1Ptr->createCalled); // tag_A
EXPECT_FALSE(creator2Ptr->createCalled); // tag_B
EXPECT_TRUE(creator3Ptr->createCalled); // tag_A
EXPECT_FALSE(creator4Ptr->createCalled); // tag_C
// Reset flags and test releasing with a different tag
creator2Ptr->releaseCalled = false;
releaseCount = mVMManager->releaseWithTag("tag_B");
EXPECT_EQ(releaseCount, 1); // Should release 1 memory with tag_B
EXPECT_TRUE(creator2Ptr->releaseCalled); // tag_B should now be released
// Test with non-existent tag
releaseCount = mVMManager->releaseWithTag("nonexistent_tag");
EXPECT_EQ(releaseCount, 0); // Should release 0 memories
materializeCount = mVMManager->materializeWithTag("nonexistent_tag");
EXPECT_EQ(materializeCount, 0); // Should materialize 0 memories
// Clean up - remove all memories
mVMManager->remove(0x1000);
mVMManager->remove(0x2000);
mVMManager->remove(0x3000);
mVMManager->remove(0x4000);
}
TEST_F(VirtualMemoryManagerTest, TestAddException)
{
// Dummy Creator that succeeds
class DummyCreator : public CUDAVirtualMemoryChunk::Creator
{
public:
CUmemGenericAllocationHandle createdHandle = 0xbaadf00dbaadf00d;
CUmemGenericAllocationHandle create() override
{
return createdHandle;
}
void release(CUmemGenericAllocationHandle handle, bool destructing) override
{
ASSERT_EQ(handle, createdHandle);
}
};
// Dummy Configurator that throws during setup
class ThrowingConfigurator : public CUDAVirtualMemoryChunk::Configurator
{
public:
void setup(CUmemGenericAllocationHandle) override
{
throw DummyException();
}
void teardown(CUmemGenericAllocationHandle, bool) override
{
ASSERT_TRUE(false) << "Unreachable";
}
};
uintptr_t handle = 0x12345678;
std::string tag = "test_tag";
// Verify initial state is clean
EXPECT_TRUE(memories().empty());
EXPECT_TRUE(entries().empty());
EXPECT_TRUE(badHandles().empty());
auto creator = std::make_unique<DummyCreator>();
CUDAVirtualMemoryChunk::Configurators configurators;
configurators.push_back(std::make_unique<ThrowingConfigurator>());
// add() should throw because materialize() will fail due to ThrowingConfigurator
EXPECT_THROW(mVMManager->add(handle, tag, std::move(creator), std::move(configurators)), DummyException);
// Verify that the manager state is clean after the exception
// The ScopeGuards in add() should have cleaned up properly
EXPECT_TRUE(memories().empty()) << "mMemories should be empty after failed add()";
EXPECT_TRUE(entries().empty()) << "mEntries should be empty after failed add()";
EXPECT_TRUE(badHandles().empty()) << "mBadHandles should be empty after failed add()";
// Test that we can successfully add a memory with the same handle after the failure
auto successCreator = std::make_unique<DummyCreator>();
CUDAVirtualMemoryChunk::Configurators successConfigurators; // Empty configurators should work
// This should succeed without throwing
EXPECT_NO_THROW(mVMManager->add(handle, tag, std::move(successCreator), std::move(successConfigurators)));
// Verify that the manager now has the entry
EXPECT_EQ(memories().size(), 1);
EXPECT_EQ(entries().size(), 1);
EXPECT_TRUE(badHandles().empty());
// Clean up
auto removedMemory = mVMManager->remove(handle);
EXPECT_TRUE(removedMemory);
}
TEST_F(VirtualMemoryManagerTest, TestMaterializeException)
{
// State structure to track create/release order and can throw on a specific call
struct CreatorState
{
int& createCounter; // Reference to shared counter
int throwOnCreateIdx = 0; // 1-based index to throw on create
int myCreateIdx = INT_MAX;
bool createCalled = false;
bool releaseCalled = false;
CUmemGenericAllocationHandle createdHandle = 0xbaadf00dbaadf00d;
CreatorState(int& sharedCounter)
: createCounter(sharedCounter)
{
}
};
// Dummy Creator that uses external state
class TestMatEx_DummyCreator : public CUDAVirtualMemoryChunk::Creator
{
public:
CreatorState& state;
TestMatEx_DummyCreator(CreatorState& state)
: state(state)
{
}
CUmemGenericAllocationHandle create() override
{
state.createCalled = true;
state.myCreateIdx = ++state.createCounter;
if (state.throwOnCreateIdx > 0 && state.myCreateIdx == state.throwOnCreateIdx)
{
throw DummyException();
}
return state.createdHandle;
}
void release(CUmemGenericAllocationHandle handle, bool destructing) override
{
state.releaseCalled = true;
ASSERT_EQ(handle, state.createdHandle);
}
};
// Create shared counter
int sharedCreateCounter = 0;
// Create state objects for each creator
CreatorState state1(sharedCreateCounter);
CreatorState state2(sharedCreateCounter);
CreatorState state3(sharedCreateCounter);
// We want the second memory (by create order) to throw
state1.throwOnCreateIdx = 2;
state2.throwOnCreateIdx = 2;
state3.throwOnCreateIdx = 2;
// Create creators and configurators
auto creator1 = std::make_unique<TestMatEx_DummyCreator>(state1);
auto creator2 = std::make_unique<TestMatEx_DummyCreator>(state2);
auto creator3 = std::make_unique<TestMatEx_DummyCreator>(state3);
// Add memories to manager in RELEASED state (don't auto-materialize by constructing manually)
CUDAVirtualMemoryChunk vm1(std::move(creator1), {});
CUDAVirtualMemoryChunk vm2(std::move(creator2), {});
CUDAVirtualMemoryChunk vm3(std::move(creator3), {});
mVMManager->add(0x1000, "test_tag", std::move(vm1));
mVMManager->add(0x2000, "test_tag", std::move(vm2));
mVMManager->add(0x3000, "test_tag", std::move(vm3));
// Verify initial state is clean
EXPECT_TRUE(badHandles().empty());
// materializeWithTag should stop at the first exception (second memory by create order)
// and attempt to rollback the first memory that succeeded
EXPECT_THROW(mVMManager->materializeWithTag("test_tag"), DummyException);
// Find which creators were called and in what order
std::vector<std::pair<uintptr_t, CreatorState*>> creators
= {{0x1000, &state1}, {0x2000, &state2}, {0x3000, &state3}};
// Sort by myCreateIdx (nonzero means create was called)
std::sort(creators.begin(), creators.end(),
[](auto const& a, auto const& b) { return a.second->myCreateIdx < b.second->myCreateIdx; });
// The first memory (by create order) should have been materialized then released during rollback
auto* first = creators[0].second;
EXPECT_TRUE(first->createCalled);
EXPECT_TRUE(first->releaseCalled); // Rolled back
// The second memory should have thrown during setup, so creator was called but setup failed
auto* second = creators[1].second;
EXPECT_TRUE(second->createCalled);
EXPECT_FALSE(second->releaseCalled);
// The third memory should not have been touched (myCreateIdx == 0)
auto* third = creators[2].second;
EXPECT_FALSE(third->createCalled);
EXPECT_FALSE(third->releaseCalled);
// The handle of the memory that threw should be the second one's handle
uintptr_t thrownHandle = creators[1].first;
// Verify bad handles tracking - memories that threw exceptions should be removed
auto badHandles = mVMManager->retrieveBadHandles();
EXPECT_EQ(badHandles.size(), 1);
EXPECT_EQ(badHandles[0], thrownHandle);
// Verify the memory that threw was removed from the manager
auto removedMem = mVMManager->remove(thrownHandle);
EXPECT_FALSE(removedMem); // Should have been removed due to exception
// The other two memories should still be in manager
for (int i = 0; i < 3; ++i)
{
if (creators[i].first != thrownHandle)
{
auto removed = mVMManager->remove(creators[i].first);
EXPECT_TRUE(removed);
}
}
}
TEST_F(VirtualMemoryManagerTest, TestReleaseException)
{
// State structure to track create/release calls
struct CreatorState
{
bool createCalled = false;
bool releaseCalled = false;
int& releaseCounter;
int throwOnReleaseCount;
CUmemGenericAllocationHandle createdHandle = 0xbaadf00dbaadf00d;
CreatorState(int& counter, int throwCount)
: releaseCounter(counter)
, throwOnReleaseCount(throwCount)
{
}
};
// State structure to track setup/teardown calls
struct ConfiguratorState
{
bool setupCalled = false;
bool teardownCalled = false;
int& teardownCounter;
int throwOnTeardownCount;
ConfiguratorState(int& counter, int throwCount)
: teardownCounter(counter)
, throwOnTeardownCount(throwCount)
{
}
};
// Dummy Creator that succeeds
class DummyCreator : public CUDAVirtualMemoryChunk::Creator
{
public:
CreatorState& state;
DummyCreator(CreatorState& state)
: state(state)
{
}
CUmemGenericAllocationHandle create() override
{
state.createCalled = true;
return state.createdHandle;
}
void release(CUmemGenericAllocationHandle handle, bool destructing) override
{
state.releaseCalled = true;
ASSERT_EQ(handle, state.createdHandle);
if (++state.releaseCounter == state.throwOnReleaseCount)
{
throw DummyException();
}
}
};
// Dummy Configurator that succeeds
class DummyConfigurator : public CUDAVirtualMemoryChunk::Configurator
{
public:
ConfiguratorState& state;
DummyConfigurator(ConfiguratorState& state)
: state(state)
{
}
void setup(CUmemGenericAllocationHandle) override
{
state.setupCalled = true;
}
void teardown(CUmemGenericAllocationHandle, bool) override
{
state.teardownCalled = true;
if (++state.teardownCounter == state.throwOnTeardownCount)
{
throw DummyException();
}
}
};
// Create counters for tracking release/teardown calls
int releaseCounter = 0;
int teardownCounter = 0;
// Create state objects for each creator and configurator
CreatorState state1(releaseCounter, 2); // Throw on 2nd release
CreatorState state2(releaseCounter, 2); // Throw on 2nd release
CreatorState state3(releaseCounter, 2); // Throw on 2nd release
CreatorState state4(releaseCounter, 2); // Throw on 2nd release
ConfiguratorState configState1(teardownCounter, 3); // Throw on 3rd teardown
ConfiguratorState configState2(teardownCounter, 3); // Throw on 3rd teardown
ConfiguratorState configState3(teardownCounter, 3); // Throw on 3rd teardown
ConfiguratorState configState4(teardownCounter, 3); // Throw on 3rd teardown
// Create creators and configurators
auto creator1 = std::make_unique<DummyCreator>(state1);
auto creator2 = std::make_unique<DummyCreator>(state2);
auto creator3 = std::make_unique<DummyCreator>(state3);
auto creator4 = std::make_unique<DummyCreator>(state4);
auto config1 = std::make_unique<DummyConfigurator>(configState1);
auto config2 = std::make_unique<DummyConfigurator>(configState2);
auto config3 = std::make_unique<DummyConfigurator>(configState3);
auto config4 = std::make_unique<DummyConfigurator>(configState4);
CUDAVirtualMemoryChunk::Configurators configurators1;
configurators1.push_back(std::move(config1));
CUDAVirtualMemoryChunk::Configurators configurators2;
configurators2.push_back(std::move(config2));
CUDAVirtualMemoryChunk::Configurators configurators3;
configurators3.push_back(std::move(config3));
CUDAVirtualMemoryChunk::Configurators configurators4;
configurators4.push_back(std::move(config4));
mVMManager->add(0x1000, "test_tag", std::move(creator1), std::move(configurators1));
mVMManager->add(0x2000, "test_tag", std::move(creator2), std::move(configurators2));
mVMManager->add(0x3000, "test_tag", std::move(creator3), std::move(configurators3));
mVMManager->add(0x4000, "other_tag", std::move(creator4), std::move(configurators4));
// Verify initial state
EXPECT_TRUE(badHandles().empty());
// releaseWithTag should call release on all memories with "test_tag"
// and continue despite exceptions
EXPECT_THROW(mVMManager->releaseWithTag("test_tag"), DummyException);
// Verify behavior:
// - All memories with "test_tag" should have had release() attempted
EXPECT_TRUE(state1.releaseCalled);
EXPECT_TRUE(configState1.teardownCalled);
EXPECT_TRUE(state2.releaseCalled);
EXPECT_TRUE(configState2.teardownCalled);
EXPECT_TRUE(state3.releaseCalled);
EXPECT_TRUE(configState3.teardownCalled);
// - Memory with different tag should not be affected
EXPECT_FALSE(state4.releaseCalled);
EXPECT_FALSE(configState4.teardownCalled);
// Verify bad handles tracking - memories that threw exceptions should be removed
auto badHandles = mVMManager->retrieveBadHandles();
EXPECT_EQ(badHandles.size(), 2);
EXPECT_NE(std::find(badHandles.begin(), badHandles.end(), 0x2000), badHandles.end());
EXPECT_NE(std::find(badHandles.begin(), badHandles.end(), 0x3000), badHandles.end());
// Verify the memories were removed from the manager
auto removedMem1 = mVMManager->remove(0x1000);
auto removedMem2 = mVMManager->remove(0x2000);
auto removedMem3 = mVMManager->remove(0x3000);
auto removedMem4 = mVMManager->remove(0x4000);
EXPECT_TRUE(removedMem1); // Should have been removed due to exception
EXPECT_FALSE(removedMem2); // Should have been removed due to exception
EXPECT_FALSE(removedMem3); // Should have been removed due to exception
EXPECT_TRUE(removedMem4); // Should still be in manager (different tag, not affected)
}
TEST_F(VirtualMemoryManagerTest, TestCudaVirtualMemoryAllocator)
{
std::size_t constexpr size = 64 * 1024 * 1024; // 64 MB
std::string tag = "test_allocator_tag";
// Create a CUDA stream for the allocator
CudaStream stream;
auto streamPtr = std::make_shared<CudaStream>(std::move(stream));
// Create configuration for the virtual address allocator
auto config = std::make_shared<CudaVirtualMemoryAllocator::Configuration>(
*mVMManager.get(), tag, CudaVirtualMemoryAllocator::RestoreMode::NONE, streamPtr);
auto memoryBegin = getCurrentProcessMemoryInfo();
// Create a buffer using the virtual address allocator
auto buffer = std::make_unique<VirtualAddressDeviceBuffer>(
size, nvinfer1::DataType::kINT8, CudaVirtualMemoryAllocator{config});
auto memoryAfterAllocation = getCurrentProcessMemoryInfo();
if (memoryInfoAvailable())
{
ASSERT_EQ(memoryBegin + size, memoryAfterAllocation) << "Buffer allocation does not allocate memory";
}
// Test that we can access the buffer data
ASSERT_NE(buffer->data(), nullptr) << "Buffer data should not be null";
ASSERT_EQ(buffer->getSize(), size) << "Buffer size should match requested size";
ASSERT_EQ(buffer->getDataType(), nvinfer1::DataType::kINT8) << "Buffer data type should be INT8";
ASSERT_EQ(buffer->getMemoryType(), MemoryType::kGPU) << "Buffer memory type should be GPU";
// Test memory access by setting memory to a known pattern
auto devicePtr = reinterpret_cast<CUdeviceptr>(buffer->data());
auto result = cuMemsetD8_v2(devicePtr, 0xAB, size);
ASSERT_EQ(result, CUDA_SUCCESS) << "Memory access should succeed";
TLLM_CU_CHECK(cuStreamSynchronize(nullptr));
// Test releasing memory with tag - this should free the virtual memory
auto releaseCount = mVMManager->releaseWithTag(tag);
ASSERT_EQ(releaseCount, 1) << "Expected to release 1 memory object";
auto memoryAfterRelease = getCurrentProcessMemoryInfo();
if (memoryInfoAvailable())
{
ASSERT_EQ(memoryBegin, memoryAfterRelease) << "Release should free the memory";
}
// Test materializing memory with tag - this should re-allocate the virtual memory
auto materializeCount = mVMManager->materializeWithTag(tag);
ASSERT_EQ(materializeCount, 1) << "Expected to materialize 1 memory object";
auto memoryAfterMaterialize = getCurrentProcessMemoryInfo();
if (memoryInfoAvailable())
{
ASSERT_EQ(memoryBegin + size, memoryAfterMaterialize) << "Materialize should allocate memory";
}
// Test memory access again after rematerialization
result = cuMemsetD8_v2(devicePtr, 0xCD, size);
ASSERT_EQ(result, CUDA_SUCCESS) << "Memory access should succeed after rematerialization";
TLLM_CU_CHECK(cuStreamSynchronize(nullptr));
// Clean up by destroying the buffer (this should automatically clean up the virtual memory)
buffer.reset();
auto memoryAfterCleanup = getCurrentProcessMemoryInfo();
if (memoryInfoAvailable())
{
ASSERT_EQ(memoryBegin, memoryAfterCleanup) << "Buffer destruction should free memory";
}
}