/* * Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include "tensorrt_llm/common/cudaUtils.h" #include "tensorrt_llm/runtime/bufferManager.h" #include "tensorrt_llm/runtime/tllmBuffers.h" #include "tensorrt_llm/runtime/virtualMemory.h" #include #include #include #include using namespace tensorrt_llm::runtime; namespace tc = tensorrt_llm::common; struct DummyException : std::runtime_error { DummyException() : runtime_error("dummy exception") { } }; class VirtualMemoryTestBase : public ::testing::Test { protected: void SetUp() override { if (tc::getDeviceCount() == 0) { GTEST_SKIP() << "This test suite cannot run on systems with no devices."; } TLLM_CU_CHECK(cuInit(0)); CUdevice dev; TLLM_CU_CHECK(cuDeviceGet(&dev, 0)); CUcontext ctx; TLLM_CU_CHECK(cuDevicePrimaryCtxRetain(&ctx, dev)); TLLM_CU_CHECK(cuCtxSetCurrent(ctx)); // Initialize NVML nvmlReturn_t nvmlResult = nvmlInit(); TLLM_CHECK_WITH_INFO(nvmlResult == NVML_SUCCESS, "Failed to initialize NVML: %s", nvmlErrorString(nvmlResult)); if (!memoryInfoAvailable()) { TLLM_LOG_WARNING("Per process memory information unavailable."); } TLLM_CUDA_CHECK(cudaDeviceSynchronize()); } void TearDown() override { TLLM_CUDA_CHECK(cudaDeviceSynchronize()); } static bool memoryInfoAvailable() { static bool available = [] { auto blob = BufferManager::gpuSync(4096); auto usage = getCurrentProcessMemoryInfo(); return usage != 0; }(); return available; } static size_t getCurrentProcessMemoryInfo() { // Get current process ID uint32_t currentPid = static_cast(getpid()); // Get device handle for GPU 0 nvmlDevice_t device; auto nvmlResult = nvmlDeviceGetHandleByIndex(0, &device); TLLM_CHECK_WITH_INFO( nvmlResult == NVML_SUCCESS, "Failed to get device handle: %s", nvmlErrorString(nvmlResult)); // Get running processes unsigned int processCount = 1; std::vector processes(processCount); nvmlResult = NVML_ERROR_INSUFFICIENT_SIZE; while (nvmlResult == NVML_ERROR_INSUFFICIENT_SIZE) { nvmlResult = nvmlDeviceGetComputeRunningProcesses_v3(device, &processCount, processes.data()); TLLM_CHECK_WITH_INFO(nvmlResult == NVML_SUCCESS || nvmlResult == NVML_ERROR_INSUFFICIENT_SIZE, "Failed to get process count: %s", nvmlErrorString(nvmlResult)); processes.resize(processCount); } // Find current process for (auto const& process : processes) { if (process.pid == currentPid) { return process.usedGpuMemory; } } return 0; } }; class VirtualMemoryTest : public VirtualMemoryTestBase { }; // Test CUDAVirtualMemoryChunk materialize and release memory correctly TEST_F(VirtualMemoryTest, TestBasic) { CUdeviceptr address{}; std::size_t constexpr size = 256 * 1024 * 1024; TLLM_CU_CHECK(cuMemAddressReserve(&address, size, 0, {}, 0)); CUDAVirtualMemoryChunk::CreatorPtr creator = std::make_unique>(CUmemAllocationProp{CU_MEM_ALLOCATION_TYPE_PINNED, CU_MEM_HANDLE_TYPE_NONE, { CU_MEM_LOCATION_TYPE_DEVICE, 0, }}, size); CUDAVirtualMemoryChunk::Configurators configurators; configurators.push_back(std::make_unique(address, size, CUmemAccessDesc{{ CU_MEM_LOCATION_TYPE_DEVICE, 0, }, CU_MEM_ACCESS_FLAGS_PROT_READWRITE})); CUDAVirtualMemoryChunk vm(std::move(creator), std::move(configurators)); ASSERT_EQ(vm.status(), CUDAVirtualMemoryChunk::RELEASED); auto memoryBegin = getCurrentProcessMemoryInfo(); vm.materialize(); ASSERT_EQ(vm.status(), CUDAVirtualMemoryChunk::MATERIALIZED); auto memoryMaterialized = getCurrentProcessMemoryInfo(); if (memoryInfoAvailable()) { ASSERT_EQ(memoryBegin + size, memoryMaterialized) << "materialize does not allocate memory"; } auto result = cuMemsetD8_v2(address, 255, size); ASSERT_EQ(result, CUDA_SUCCESS) << "Accessing memory returned failure (first materialize)"; TLLM_CU_CHECK(cuStreamSynchronize(nullptr)); vm.release(); ASSERT_EQ(vm.status(), CUDAVirtualMemoryChunk::RELEASED); auto memoryReleased = getCurrentProcessMemoryInfo(); if (memoryInfoAvailable()) { ASSERT_EQ(memoryBegin, memoryReleased) << "release does not release memory"; } vm.materialize(); ASSERT_EQ(vm.status(), CUDAVirtualMemoryChunk::MATERIALIZED); result = cuMemsetD8_v2(address, 255, size); ASSERT_EQ(result, CUDA_SUCCESS) << "Accessing memory returned failure (second materialize)"; TLLM_CU_CHECK(cuStreamSynchronize(nullptr)); vm.release(); ASSERT_EQ(vm.status(), CUDAVirtualMemoryChunk::RELEASED); memoryReleased = getCurrentProcessMemoryInfo(); if (memoryInfoAvailable()) { ASSERT_EQ(memoryBegin, memoryReleased) << "release does not release memory"; } } // Test BackedConfigurator refills memory correctly for both CPU and PINNED memory types class VirtualMemoryOffloadConfigurator : public VirtualMemoryTest, public ::testing::WithParamInterface { }; TEST_P(VirtualMemoryOffloadConfigurator, Test) { MemoryType backType = GetParam(); CUdeviceptr address{}; std::size_t constexpr size = 4 * 1024 * 1024; TLLM_CU_CHECK(cuMemAddressReserve(&address, size, 0, {}, 0)); CudaStream stream; CUDAVirtualMemoryChunk::CreatorPtr creator = std::make_unique>(CUmemAllocationProp{CU_MEM_ALLOCATION_TYPE_PINNED, CU_MEM_HANDLE_TYPE_NONE, { CU_MEM_LOCATION_TYPE_DEVICE, 0, }}, size); CUDAVirtualMemoryChunk::Configurators configurators; configurators.push_back(std::make_unique(address, size, CUmemAccessDesc{{ CU_MEM_LOCATION_TYPE_DEVICE, 0, }, CU_MEM_ACCESS_FLAGS_PROT_READWRITE})); configurators.push_back(std::make_unique(address, size, backType, stream.get(), false)); CUDAVirtualMemoryChunk vm(std::move(creator), std::move(configurators)); std::vector data(size / sizeof(uint64_t), 0); std::generate(data.begin(), data.end(), [engine = std::mt19937_64(address)]() mutable { return engine(); }); vm.materialize(); auto pointer = reinterpret_cast(address); auto result = cudaMemcpyAsync(pointer, data.data(), size, cudaMemcpyHostToDevice, stream.get()); ASSERT_EQ(result, CUDA_SUCCESS) << "Copying memory returned failure"; vm.release(); vm.materialize(); std::fill(data.begin(), data.end(), 0); result = cudaMemcpyAsync(data.data(), pointer, size, cudaMemcpyDeviceToHost, stream.get()); stream.synchronize(); ASSERT_EQ(result, CUDA_SUCCESS) << "Copying memory returned failure"; auto engine = std::mt19937_64(static_cast(address)); for (size_t i = 0; i < data.size(); ++i) { ASSERT_EQ(data[i], engine()) << "Mismatched at index " << i; } } INSTANTIATE_TEST_SUITE_P( Backends, VirtualMemoryOffloadConfigurator, ::testing::Values(MemoryType::kCPU, MemoryType::kPINNED)); // Test CUDAVirtualMemoryChunk calls creator and configurators in correct order TEST_F(VirtualMemoryTest, TestOrder) { // Order tracking - local counter to track call sequence int callOrder = 0; // OrderTrackingCreator that records when its methods are called class OrderTrackingCreator : public CUDAVirtualMemoryChunk::Creator { public: int& mCallOrder; int createOrder = -1; int releaseOrder = -1; CUmemGenericAllocationHandle createdHandle = 0; OrderTrackingCreator(int& callOrder) : mCallOrder(callOrder) { } CUmemGenericAllocationHandle create() override { createOrder = ++mCallOrder; createdHandle = 0xbaadf00dbaadf00d; return createdHandle; } void release(CUmemGenericAllocationHandle handle, bool destructing) override { releaseOrder = ++mCallOrder; ASSERT_EQ(handle, createdHandle); } }; // OrderTrackingConfigurator that records when its methods are called class OrderTrackingConfigurator : public CUDAVirtualMemoryChunk::Configurator { public: int& mCallOrder; std::string name; int setupOrder = -1; int teardownOrder = -1; OrderTrackingConfigurator(int& callOrder, std::string n) : mCallOrder(callOrder) , name(std::move(n)) { } void setup(CUmemGenericAllocationHandle handle) override { setupOrder = ++mCallOrder; } void teardown(CUmemGenericAllocationHandle handle, bool destructing) override { teardownOrder = ++mCallOrder; } }; // Create creator and configurators auto creator = std::make_unique(callOrder); auto* creatorPtr = creator.get(); auto config1 = std::make_unique(callOrder, "config1"); auto config2 = std::make_unique(callOrder, "config2"); auto config3 = std::make_unique(callOrder, "config3"); auto* config1Ptr = config1.get(); auto* config2Ptr = config2.get(); auto* config3Ptr = config3.get(); CUDAVirtualMemoryChunk::Configurators configurators; configurators.push_back(std::move(config1)); configurators.push_back(std::move(config2)); configurators.push_back(std::move(config3)); CUDAVirtualMemoryChunk vm(std::move(creator), std::move(configurators)); // Test materialize() order: creator.create() first, then configurators.setup() in order vm.materialize(); // Verify materialize order EXPECT_EQ(creatorPtr->createOrder, 1); // creator.create() should be called first EXPECT_EQ(config1Ptr->setupOrder, 2); // config1.setup() should be called second EXPECT_EQ(config2Ptr->setupOrder, 3); // config2.setup() should be called third EXPECT_EQ(config3Ptr->setupOrder, 4); // config3.setup() should be called fourth // Verify release() hasn't been called yet EXPECT_EQ(creatorPtr->releaseOrder, -1); EXPECT_EQ(config1Ptr->teardownOrder, -1); EXPECT_EQ(config2Ptr->teardownOrder, -1); EXPECT_EQ(config3Ptr->teardownOrder, -1); // Test release() order: configurators.teardown() in reverse order, then creator.release() vm.release(); // Verify release order EXPECT_EQ(config3Ptr->teardownOrder, 5); // config3.teardown() should be called first (reverse order) EXPECT_EQ(config2Ptr->teardownOrder, 6); // config2.teardown() should be called second EXPECT_EQ(config1Ptr->teardownOrder, 7); // config1.teardown() should be called third EXPECT_EQ(creatorPtr->releaseOrder, 8); // creator.release() should be called last } // Test CUDAVirtualMemoryChunk behaves correctly when exceptions were thrown TEST_F(VirtualMemoryTest, TestException) { // Dummy Creator that can be configured to throw on create() or release() class DummyCreator : public CUDAVirtualMemoryChunk::Creator { public: bool throwOnCreate = false; bool throwOnRelease = false; bool createCalled = false; bool releaseCalled = false; CUmemGenericAllocationHandle createdHandle = 0; CUmemGenericAllocationHandle create() override { createCalled = true; if (throwOnCreate) { throw DummyException(); } createdHandle = 0xbaadf00dbaadf00d; return createdHandle; } void release(CUmemGenericAllocationHandle handle, bool destructing) override { releaseCalled = true; ASSERT_EQ(handle, createdHandle); if (throwOnRelease) { throw DummyException(); } } }; // Dummy Configurator that can be configured to throw on setup() or teardown() class DummyConfigurator : public CUDAVirtualMemoryChunk::Configurator { public: bool throwOnSetup = false; bool throwOnTeardown = false; bool setupCalled = false; bool teardownCalled = false; std::string name; DummyConfigurator(std::string n) : name(std::move(n)) { } void setup(CUmemGenericAllocationHandle) override { setupCalled = true; if (throwOnSetup) { throw DummyException(); } } void teardown(CUmemGenericAllocationHandle handle, bool destructing) override { teardownCalled = true; if (throwOnTeardown) { throw DummyException(); } } }; // Test 1: Exception in creator->create() { auto creator = std::make_unique(); creator->throwOnCreate = true; auto* creatorPtr = creator.get(); auto config1 = std::make_unique("config1"); auto config2 = std::make_unique("config2"); auto* config1Ptr = config1.get(); auto* config2Ptr = config2.get(); CUDAVirtualMemoryChunk::Configurators configurators; configurators.push_back(std::move(config1)); configurators.push_back(std::move(config2)); CUDAVirtualMemoryChunk vm(std::move(creator), std::move(configurators)); // materialize() should throw due to creator->create() exception EXPECT_THROW(vm.materialize(), DummyException); // Verify creator->create() was called but no configurators were setup EXPECT_TRUE(creatorPtr->createCalled); EXPECT_FALSE(config1Ptr->setupCalled); EXPECT_FALSE(config2Ptr->setupCalled); // Internal state is still valid. // If the failure from creator is temporary, materialize() can be reattempted. EXPECT_EQ(vm.status(), CUDAVirtualMemoryChunk::RELEASED); } // Test 2: Exception in first configurator setup() { auto creator = std::make_unique(); auto* creatorPtr = creator.get(); auto config1 = std::make_unique("config1"); auto config2 = std::make_unique("config2"); config1->throwOnSetup = true; auto* config1Ptr = config1.get(); auto* config2Ptr = config2.get(); CUDAVirtualMemoryChunk::Configurators configurators; configurators.push_back(std::move(config1)); configurators.push_back(std::move(config2)); CUDAVirtualMemoryChunk vm(std::move(creator), std::move(configurators)); // materialize() should throw due to first configurator exception EXPECT_THROW(vm.materialize(), DummyException); // Verify creator->create() was called and first configurator setup() was called EXPECT_TRUE(creatorPtr->createCalled); EXPECT_TRUE(config1Ptr->setupCalled); EXPECT_FALSE(config2Ptr->setupCalled); // Status should be ERRORED EXPECT_EQ(vm.status(), CUDAVirtualMemoryChunk::ERRORED); // release() should still work and only teardown what was set up vm.release(); EXPECT_TRUE(creatorPtr->releaseCalled); EXPECT_FALSE(config1Ptr->teardownCalled); // Failed setup, so no teardown EXPECT_FALSE(config2Ptr->teardownCalled); // Never setup } // Test 3: Exception in second configurator setup() { auto creator = std::make_unique(); auto* creatorPtr = creator.get(); auto config1 = std::make_unique("config1"); auto config2 = std::make_unique("config2"); config2->throwOnSetup = true; auto* config1Ptr = config1.get(); auto* config2Ptr = config2.get(); CUDAVirtualMemoryChunk::Configurators configurators; configurators.push_back(std::move(config1)); configurators.push_back(std::move(config2)); CUDAVirtualMemoryChunk vm(std::move(creator), std::move(configurators)); // materialize() should throw due to second configurator exception EXPECT_THROW(vm.materialize(), DummyException); // Verify both creator and first configurator were called EXPECT_TRUE(creatorPtr->createCalled); EXPECT_TRUE(config1Ptr->setupCalled); EXPECT_TRUE(config2Ptr->setupCalled); // Status should be ERRORED EXPECT_EQ(vm.status(), CUDAVirtualMemoryChunk::ERRORED); // release() should teardown the first configurator (successful setup) but not the second vm.release(); EXPECT_TRUE(creatorPtr->releaseCalled); EXPECT_TRUE(config1Ptr->teardownCalled); // Successful setup, so teardown called EXPECT_FALSE(config2Ptr->teardownCalled); // Failed setup, so no teardown } // Test 4: Exception in configurator teardown() during release() { auto creator = std::make_unique(); auto* creatorPtr = creator.get(); auto config1 = std::make_unique("config1"); auto config2 = std::make_unique("config2"); config2->throwOnTeardown = true; auto* config1Ptr = config1.get(); auto* config2Ptr = config2.get(); CUDAVirtualMemoryChunk::Configurators configurators; configurators.push_back(std::move(config1)); configurators.push_back(std::move(config2)); CUDAVirtualMemoryChunk vm(std::move(creator), std::move(configurators)); // materialize() should succeed vm.materialize(); EXPECT_EQ(vm.status(), CUDAVirtualMemoryChunk::MATERIALIZED); // release() should throw due to teardown exception but still complete cleanup EXPECT_THROW(vm.release(), DummyException); // Verify all teardown methods were called despite exception EXPECT_TRUE(config1Ptr->teardownCalled); EXPECT_TRUE(config2Ptr->teardownCalled); EXPECT_TRUE(creatorPtr->releaseCalled); // Status should be ERRORED due to exception EXPECT_EQ(vm.status(), CUDAVirtualMemoryChunk::ERRORED); } // Test 5: Exception in creator->release() { auto creator = std::make_unique(); creator->throwOnRelease = true; auto* creatorPtr = creator.get(); auto config1 = std::make_unique("config1"); auto* config1Ptr = config1.get(); CUDAVirtualMemoryChunk::Configurators configurators; configurators.push_back(std::move(config1)); CUDAVirtualMemoryChunk vm(std::move(creator), std::move(configurators)); // materialize() should succeed vm.materialize(); EXPECT_EQ(vm.status(), CUDAVirtualMemoryChunk::MATERIALIZED); // release() should throw due to creator exception but still complete configurator cleanup EXPECT_THROW(vm.release(), DummyException); // Verify configurator teardown was called despite creator exception EXPECT_TRUE(config1Ptr->teardownCalled); EXPECT_TRUE(creatorPtr->releaseCalled); // Status should be ERRORED due to exception EXPECT_EQ(vm.status(), CUDAVirtualMemoryChunk::ERRORED); } } // Test various class facilities TEST_F(VirtualMemoryTest, TestFacilities) { // Test default constructed CUDAVirtualMemoryChunk { CUDAVirtualMemoryChunk defaultVm; // Should be invalid EXPECT_FALSE(defaultVm); EXPECT_EQ(defaultVm.status(), CUDAVirtualMemoryChunk::INVALID); } CUdeviceptr address{}; std::size_t constexpr size = 64 * 1024 * 1024; TLLM_CU_CHECK(cuMemAddressReserve(&address, size, 0, {}, 0)); // Test move semantic { // Create original CUDAVirtualMemoryChunk CUDAVirtualMemoryChunk::CreatorPtr creator = std::make_unique>(CUmemAllocationProp{CU_MEM_ALLOCATION_TYPE_PINNED, CU_MEM_HANDLE_TYPE_NONE, {CU_MEM_LOCATION_TYPE_DEVICE, 0}}, size); CUDAVirtualMemoryChunk::Configurators configurators; configurators.push_back(std::make_unique( address, size, CUmemAccessDesc{{CU_MEM_LOCATION_TYPE_DEVICE, 0}, CU_MEM_ACCESS_FLAGS_PROT_READWRITE})); CUDAVirtualMemoryChunk original(std::move(creator), std::move(configurators)); original.materialize(); EXPECT_EQ(original.status(), CUDAVirtualMemoryChunk::MATERIALIZED); // Test move constructor CUDAVirtualMemoryChunk moved{std::move(original)}; EXPECT_FALSE(original); // Original should be invalid after move EXPECT_TRUE(moved); // Moved-to object should be valid EXPECT_EQ(moved.status(), CUDAVirtualMemoryChunk::MATERIALIZED); // Test move assignment CUDAVirtualMemoryChunk assigned; EXPECT_FALSE(assigned); // Default constructed, should be invalid assigned = std::move(moved); EXPECT_FALSE(moved); // moved should be invalid after move EXPECT_TRUE(assigned); // assigned should be valid EXPECT_EQ(assigned.status(), CUDAVirtualMemoryChunk::MATERIALIZED); // Clean up assigned.release(); } } // Test destructor TEST_F(VirtualMemoryTest, TestDestructor) { // Dummy Creator for testing destructor behavior class DummyCreator : public CUDAVirtualMemoryChunk::Creator { public: bool& createCalledRef; bool& releaseCalledRef; CUmemGenericAllocationHandle createdHandle = 0; DummyCreator(bool& createRef, bool& releaseRef) : createCalledRef(createRef) , releaseCalledRef(releaseRef) { } CUmemGenericAllocationHandle create() override { createCalledRef = true; createdHandle = 0xbaadf00dbaadf00d; return createdHandle; } void release(CUmemGenericAllocationHandle handle, bool destructing) override { releaseCalledRef = true; ASSERT_EQ(handle, createdHandle); } }; // Dummy Configurator for testing destructor behavior class DummyConfigurator : public CUDAVirtualMemoryChunk::Configurator { public: bool& setupCalledRef; bool& teardownCalledRef; std::string name; DummyConfigurator(std::string n, bool& setupRef, bool& teardownRef) : setupCalledRef(setupRef) , teardownCalledRef(teardownRef) , name(std::move(n)) { } void setup(CUmemGenericAllocationHandle) override { setupCalledRef = true; } void teardown(CUmemGenericAllocationHandle, bool) override { teardownCalledRef = true; } }; // Test destructor calls release automatically for materialized memory { bool createCalled = false; bool releaseCalled = false; bool setupCalled = false; bool teardownCalled = false; auto creator = std::make_unique(createCalled, releaseCalled); auto config1 = std::make_unique("config1", setupCalled, teardownCalled); CUDAVirtualMemoryChunk::Configurators configurators; configurators.push_back(std::move(config1)); alignas(CUDAVirtualMemoryChunk) std::byte storage[sizeof(CUDAVirtualMemoryChunk)]; CUDAVirtualMemoryChunk* vm = new (storage) CUDAVirtualMemoryChunk(std::move(creator), std::move(configurators)); vm->materialize(); // Verify materialize was called EXPECT_TRUE(createCalled); EXPECT_TRUE(setupCalled); EXPECT_FALSE(releaseCalled); EXPECT_FALSE(teardownCalled); EXPECT_EQ(vm->status(), CUDAVirtualMemoryChunk::MATERIALIZED); vm->~CUDAVirtualMemoryChunk(); // Verify destructor called release EXPECT_TRUE(releaseCalled); EXPECT_TRUE(teardownCalled); } // Test destructor doesn't double-release for manually released memory { // Local variables to track calls (persist after object destruction) bool createCalled = false; bool releaseCalled = false; bool setupCalled = false; bool teardownCalled = false; auto creator = std::make_unique(createCalled, releaseCalled); auto config1 = std::make_unique("config1", setupCalled, teardownCalled); CUDAVirtualMemoryChunk::Configurators configurators; configurators.push_back(std::move(config1)); alignas(CUDAVirtualMemoryChunk) std::byte storage[sizeof(CUDAVirtualMemoryChunk)]; auto* vm = new (storage) CUDAVirtualMemoryChunk(std::move(creator), std::move(configurators)); vm->materialize(); vm->release(); // Manual release // Verify manual release was called EXPECT_TRUE(releaseCalled); EXPECT_TRUE(teardownCalled); EXPECT_EQ(vm->status(), CUDAVirtualMemoryChunk::RELEASED); // Reset flags to verify destructor doesn't call release again releaseCalled = false; teardownCalled = false; vm->~CUDAVirtualMemoryChunk(); // Verify destructor did NOT call release again (no double-release) EXPECT_FALSE(releaseCalled); EXPECT_FALSE(teardownCalled); } // Test destructor behavior with ERRORED state { // Local variables to track calls (persist after object destruction) bool createCalled = false; bool releaseCalled = false; bool config1SetupCalled = false; bool config1TeardownCalled = false; bool throwingSetupCalled = false; bool throwingTeardownCalled = false; class ThrowingConfigurator : public CUDAVirtualMemoryChunk::Configurator { public: bool& setupCalledRef; bool& teardownCalledRef; ThrowingConfigurator(bool& setupRef, bool& teardownRef) : setupCalledRef(setupRef) , teardownCalledRef(teardownRef) { } void setup(CUmemGenericAllocationHandle) override { setupCalledRef = true; throw DummyException(); } void teardown(CUmemGenericAllocationHandle, bool) override { teardownCalledRef = true; } }; auto creator = std::make_unique(createCalled, releaseCalled); auto config1 = std::make_unique("config1", config1SetupCalled, config1TeardownCalled); auto throwingConfig = std::make_unique(throwingSetupCalled, throwingTeardownCalled); CUDAVirtualMemoryChunk::Configurators configurators; configurators.push_back(std::move(config1)); configurators.push_back(std::move(throwingConfig)); alignas(CUDAVirtualMemoryChunk) std::byte storage[sizeof(CUDAVirtualMemoryChunk)]; auto* vm = new (storage) CUDAVirtualMemoryChunk(std::move(creator), std::move(configurators)); // Materialize should throw and leave VM in ERRORED state EXPECT_THROW(vm->materialize(), DummyException); EXPECT_EQ(vm->status(), CUDAVirtualMemoryChunk::ERRORED); // Verify partial setup occurred EXPECT_TRUE(createCalled); EXPECT_TRUE(config1SetupCalled); EXPECT_TRUE(throwingSetupCalled); EXPECT_FALSE(releaseCalled); vm->~CUDAVirtualMemoryChunk(); // Verify destructor called release to clean up the errored state EXPECT_TRUE(releaseCalled); EXPECT_TRUE(config1TeardownCalled); // throwingConfig's teardown should NOT be called since setup failed EXPECT_FALSE(throwingTeardownCalled); } } // Test edge cases and error scenarios TEST_F(VirtualMemoryTest, TestEdgeCases) { // Dummy Creator for testing edge cases class DummyCreator : public CUDAVirtualMemoryChunk::Creator { public: CUmemGenericAllocationHandle createdHandle = 0xbaadf00dbaadf00d; CUmemGenericAllocationHandle create() override { return createdHandle; } void release(CUmemGenericAllocationHandle handle, bool destructing) override { ASSERT_EQ(handle, createdHandle); } }; // Test multiple materialize calls (should throw) { auto creator = std::make_unique(); CUDAVirtualMemoryChunk vm(std::move(creator), {}); vm.materialize(); EXPECT_EQ(vm.status(), CUDAVirtualMemoryChunk::MATERIALIZED); // Second materialize should throw EXPECT_THROW(vm.materialize(), tc::TllmException); EXPECT_EQ(vm.status(), CUDAVirtualMemoryChunk::MATERIALIZED); vm.release(); } // Test multiple release calls (should throw) { auto creator = std::make_unique(); CUDAVirtualMemoryChunk vm(std::move(creator), {}); vm.materialize(); vm.release(); EXPECT_EQ(vm.status(), CUDAVirtualMemoryChunk::RELEASED); // Second release should throw EXPECT_THROW(vm.release(), tc::TllmException); EXPECT_EQ(vm.status(), CUDAVirtualMemoryChunk::RELEASED); } // Test release on RELEASED state (should throw) { auto creator = std::make_unique(); CUDAVirtualMemoryChunk vm(std::move(creator), {}); EXPECT_EQ(vm.status(), CUDAVirtualMemoryChunk::RELEASED); EXPECT_THROW(vm.release(), tc::TllmException); // Should throw on RELEASED state EXPECT_EQ(vm.status(), CUDAVirtualMemoryChunk::RELEASED); } // Test materialize on ERRORED state after exception recovery { // Create a VM that will go into ERRORED state class ThrowingConfigurator : public CUDAVirtualMemoryChunk::Configurator { public: bool shouldThrow = true; void setup(CUmemGenericAllocationHandle) override { if (shouldThrow) { throw DummyException(); } } void teardown(CUmemGenericAllocationHandle, bool) override {} }; auto creator = std::make_unique(); auto throwingConfig = std::make_unique(); CUDAVirtualMemoryChunk::Configurators configurators; configurators.push_back(std::move(throwingConfig)); CUDAVirtualMemoryChunk vm(std::move(creator), std::move(configurators)); // First materialize should throw and leave VM in ERRORED state EXPECT_THROW(vm.materialize(), DummyException); EXPECT_EQ(vm.status(), CUDAVirtualMemoryChunk::ERRORED); // Should be able to release from ERRORED state vm.release(); EXPECT_EQ(vm.status(), CUDAVirtualMemoryChunk::RELEASED); } } class VirtualMemoryManagerTest : public VirtualMemoryTestBase // NOLINT(cppcoreguidelines-pro-type-member-init) { using Base = VirtualMemoryTestBase; protected: auto& entries() { return mVMManager->mEntries; } auto& memories() { return mVMManager->mMemories; } auto& badHandles() { return mVMManager->mBadHandles; } void SetUp() override { this->Base::SetUp(); mVMManager = std::make_unique(); } void TearDown() override { this->Base::TearDown(); ASSERT_TRUE(!mVMManager || entries().size() == 0) << "Leftover memory in manager"; } std::unique_ptr mVMManager = nullptr; }; TEST_F(VirtualMemoryManagerTest, TestBasic) { CUdeviceptr address{}; std::size_t constexpr size = 256 * 1024 * 1024; TLLM_CU_CHECK(cuMemAddressReserve(&address, size, 0, {}, 0)); uintptr_t handle = static_cast(address); std::string tag = "test_tag"; CUDAVirtualMemoryChunk::CreatorPtr creator = std::make_unique>(CUmemAllocationProp{CU_MEM_ALLOCATION_TYPE_PINNED, CU_MEM_HANDLE_TYPE_NONE, { CU_MEM_LOCATION_TYPE_DEVICE, 0, }}, size); CUDAVirtualMemoryChunk::Configurators configurators; configurators.push_back(std::make_unique(address, size, CUmemAccessDesc{{ CU_MEM_LOCATION_TYPE_DEVICE, 0, }, CU_MEM_ACCESS_FLAGS_PROT_READWRITE})); auto memoryBegin = getCurrentProcessMemoryInfo(); // Add to manager - this automatically materializes mVMManager->add(handle, tag, std::move(creator), std::move(configurators)); auto memoryMaterialized = getCurrentProcessMemoryInfo(); if (memoryInfoAvailable()) { ASSERT_EQ(memoryBegin + size, memoryMaterialized) << "add/materialize does not allocate memory"; } // Test memory access after materialization auto result = cuMemsetD8_v2(address, 255, size); ASSERT_EQ(result, CUDA_SUCCESS) << "Accessing memory returned failure (first materialize)"; TLLM_CU_CHECK(cuStreamSynchronize(nullptr)); // Release memory through manager auto releaseCount = mVMManager->releaseWithTag(tag); ASSERT_EQ(releaseCount, 1) << "Expected to release 1 memory object"; auto memoryReleased = getCurrentProcessMemoryInfo(); if (memoryInfoAvailable()) { ASSERT_EQ(memoryBegin, memoryReleased) << "releaseWithTag does not release memory"; } // Materialize again through manager auto materializeCount = mVMManager->materializeWithTag(tag); ASSERT_EQ(materializeCount, 1) << "Expected to materialize 1 memory object"; auto memoryRematerialized = getCurrentProcessMemoryInfo(); if (memoryInfoAvailable()) { ASSERT_EQ(memoryBegin + size, memoryRematerialized) << "materializeWithTag does not allocate memory"; } // Test memory access after rematerialization result = cuMemsetD8_v2(address, 255, size); ASSERT_EQ(result, CUDA_SUCCESS) << "Accessing memory returned failure (second materialize)"; TLLM_CU_CHECK(cuStreamSynchronize(nullptr)); // Clean up - remove from manager { auto removedMemory = mVMManager->remove(handle); ASSERT_TRUE(removedMemory) << "Expected to successfully remove memory from manager"; } auto memoryAfterRemove = getCurrentProcessMemoryInfo(); if (memoryInfoAvailable()) { ASSERT_EQ(memoryBegin, memoryAfterRemove) << "remove does not release memory"; } auto unknownMemory = mVMManager->remove(0); ASSERT_FALSE(unknownMemory) << "Expect invalid memory for unknown handle"; } TEST_F(VirtualMemoryManagerTest, TestTags) { // Dummy Creator for testing tag functionality class DummyCreator : public CUDAVirtualMemoryChunk::Creator { public: bool createCalled = false; bool releaseCalled = false; CUmemGenericAllocationHandle createdHandle = 0xbaadf00dbaadf00d; CUmemGenericAllocationHandle create() override { createCalled = true; return createdHandle; } void release(CUmemGenericAllocationHandle handle, bool destructing) override { releaseCalled = true; ASSERT_EQ(handle, createdHandle); } }; // Create creators for different virtual memories auto creator1 = std::make_unique(); auto creator2 = std::make_unique(); auto creator3 = std::make_unique(); auto creator4 = std::make_unique(); // Keep pointers to track state auto* creator1Ptr = creator1.get(); auto* creator2Ptr = creator2.get(); auto* creator3Ptr = creator3.get(); auto* creator4Ptr = creator4.get(); mVMManager->add(0x1000, "tag_A", std::move(creator1), {}); mVMManager->add(0x2000, "tag_B", std::move(creator2), {}); mVMManager->add(0x3000, "tag_A", std::move(creator3), {}); mVMManager->add(0x4000, "tag_C", std::move(creator4), {}); // All should be materialized initially (since add() materializes automatically) EXPECT_TRUE(creator1Ptr->createCalled); EXPECT_TRUE(creator2Ptr->createCalled); EXPECT_TRUE(creator3Ptr->createCalled); EXPECT_TRUE(creator4Ptr->createCalled); // Reset create flags to test materializeWithTag later creator1Ptr->createCalled = false; creator2Ptr->createCalled = false; creator3Ptr->createCalled = false; creator4Ptr->createCalled = false; // Test releaseWithTag - should release only memories with "tag_A" auto releaseCount = mVMManager->releaseWithTag("tag_A"); EXPECT_EQ(releaseCount, 2); // Should release 2 memories with tag_A // Verify only tag_A memories were released EXPECT_TRUE(creator1Ptr->releaseCalled); // tag_A EXPECT_FALSE(creator2Ptr->releaseCalled); // tag_B EXPECT_TRUE(creator3Ptr->releaseCalled); // tag_A EXPECT_FALSE(creator4Ptr->releaseCalled); // tag_C // Test materializeWithTag - should materialize only memories with "tag_A" auto materializeCount = mVMManager->materializeWithTag("tag_A"); EXPECT_EQ(materializeCount, 2); // Should materialize 2 memories with tag_A // Verify only tag_A memories were materialized EXPECT_TRUE(creator1Ptr->createCalled); // tag_A EXPECT_FALSE(creator2Ptr->createCalled); // tag_B EXPECT_TRUE(creator3Ptr->createCalled); // tag_A EXPECT_FALSE(creator4Ptr->createCalled); // tag_C // Reset flags and test releasing with a different tag creator2Ptr->releaseCalled = false; releaseCount = mVMManager->releaseWithTag("tag_B"); EXPECT_EQ(releaseCount, 1); // Should release 1 memory with tag_B EXPECT_TRUE(creator2Ptr->releaseCalled); // tag_B should now be released // Test with non-existent tag releaseCount = mVMManager->releaseWithTag("nonexistent_tag"); EXPECT_EQ(releaseCount, 0); // Should release 0 memories materializeCount = mVMManager->materializeWithTag("nonexistent_tag"); EXPECT_EQ(materializeCount, 0); // Should materialize 0 memories // Clean up - remove all memories mVMManager->remove(0x1000); mVMManager->remove(0x2000); mVMManager->remove(0x3000); mVMManager->remove(0x4000); } TEST_F(VirtualMemoryManagerTest, TestAddException) { // Dummy Creator that succeeds class DummyCreator : public CUDAVirtualMemoryChunk::Creator { public: CUmemGenericAllocationHandle createdHandle = 0xbaadf00dbaadf00d; CUmemGenericAllocationHandle create() override { return createdHandle; } void release(CUmemGenericAllocationHandle handle, bool destructing) override { ASSERT_EQ(handle, createdHandle); } }; // Dummy Configurator that throws during setup class ThrowingConfigurator : public CUDAVirtualMemoryChunk::Configurator { public: void setup(CUmemGenericAllocationHandle) override { throw DummyException(); } void teardown(CUmemGenericAllocationHandle, bool) override { ASSERT_TRUE(false) << "Unreachable"; } }; uintptr_t handle = 0x12345678; std::string tag = "test_tag"; // Verify initial state is clean EXPECT_TRUE(memories().empty()); EXPECT_TRUE(entries().empty()); EXPECT_TRUE(badHandles().empty()); auto creator = std::make_unique(); CUDAVirtualMemoryChunk::Configurators configurators; configurators.push_back(std::make_unique()); // add() should throw because materialize() will fail due to ThrowingConfigurator EXPECT_THROW(mVMManager->add(handle, tag, std::move(creator), std::move(configurators)), DummyException); // Verify that the manager state is clean after the exception // The ScopeGuards in add() should have cleaned up properly EXPECT_TRUE(memories().empty()) << "mMemories should be empty after failed add()"; EXPECT_TRUE(entries().empty()) << "mEntries should be empty after failed add()"; EXPECT_TRUE(badHandles().empty()) << "mBadHandles should be empty after failed add()"; // Test that we can successfully add a memory with the same handle after the failure auto successCreator = std::make_unique(); CUDAVirtualMemoryChunk::Configurators successConfigurators; // Empty configurators should work // This should succeed without throwing EXPECT_NO_THROW(mVMManager->add(handle, tag, std::move(successCreator), std::move(successConfigurators))); // Verify that the manager now has the entry EXPECT_EQ(memories().size(), 1); EXPECT_EQ(entries().size(), 1); EXPECT_TRUE(badHandles().empty()); // Clean up auto removedMemory = mVMManager->remove(handle); EXPECT_TRUE(removedMemory); } TEST_F(VirtualMemoryManagerTest, TestMaterializeException) { // State structure to track create/release order and can throw on a specific call struct CreatorState { int& createCounter; // Reference to shared counter int throwOnCreateIdx = 0; // 1-based index to throw on create int myCreateIdx = INT_MAX; bool createCalled = false; bool releaseCalled = false; CUmemGenericAllocationHandle createdHandle = 0xbaadf00dbaadf00d; CreatorState(int& sharedCounter) : createCounter(sharedCounter) { } }; // Dummy Creator that uses external state class TestMatEx_DummyCreator : public CUDAVirtualMemoryChunk::Creator { public: CreatorState& state; TestMatEx_DummyCreator(CreatorState& state) : state(state) { } CUmemGenericAllocationHandle create() override { state.createCalled = true; state.myCreateIdx = ++state.createCounter; if (state.throwOnCreateIdx > 0 && state.myCreateIdx == state.throwOnCreateIdx) { throw DummyException(); } return state.createdHandle; } void release(CUmemGenericAllocationHandle handle, bool destructing) override { state.releaseCalled = true; ASSERT_EQ(handle, state.createdHandle); } }; // Create shared counter int sharedCreateCounter = 0; // Create state objects for each creator CreatorState state1(sharedCreateCounter); CreatorState state2(sharedCreateCounter); CreatorState state3(sharedCreateCounter); // We want the second memory (by create order) to throw state1.throwOnCreateIdx = 2; state2.throwOnCreateIdx = 2; state3.throwOnCreateIdx = 2; // Create creators and configurators auto creator1 = std::make_unique(state1); auto creator2 = std::make_unique(state2); auto creator3 = std::make_unique(state3); // Add memories to manager in RELEASED state (don't auto-materialize by constructing manually) CUDAVirtualMemoryChunk vm1(std::move(creator1), {}); CUDAVirtualMemoryChunk vm2(std::move(creator2), {}); CUDAVirtualMemoryChunk vm3(std::move(creator3), {}); mVMManager->add(0x1000, "test_tag", std::move(vm1)); mVMManager->add(0x2000, "test_tag", std::move(vm2)); mVMManager->add(0x3000, "test_tag", std::move(vm3)); // Verify initial state is clean EXPECT_TRUE(badHandles().empty()); // materializeWithTag should stop at the first exception (second memory by create order) // and attempt to rollback the first memory that succeeded EXPECT_THROW(mVMManager->materializeWithTag("test_tag"), DummyException); // Find which creators were called and in what order std::vector> creators = {{0x1000, &state1}, {0x2000, &state2}, {0x3000, &state3}}; // Sort by myCreateIdx (nonzero means create was called) std::sort(creators.begin(), creators.end(), [](auto const& a, auto const& b) { return a.second->myCreateIdx < b.second->myCreateIdx; }); // The first memory (by create order) should have been materialized then released during rollback auto* first = creators[0].second; EXPECT_TRUE(first->createCalled); EXPECT_TRUE(first->releaseCalled); // Rolled back // The second memory should have thrown during setup, so creator was called but setup failed auto* second = creators[1].second; EXPECT_TRUE(second->createCalled); EXPECT_FALSE(second->releaseCalled); // The third memory should not have been touched (myCreateIdx == 0) auto* third = creators[2].second; EXPECT_FALSE(third->createCalled); EXPECT_FALSE(third->releaseCalled); // The handle of the memory that threw should be the second one's handle uintptr_t thrownHandle = creators[1].first; // Verify bad handles tracking - memories that threw exceptions should be removed auto badHandles = mVMManager->retrieveBadHandles(); EXPECT_EQ(badHandles.size(), 1); EXPECT_EQ(badHandles[0], thrownHandle); // Verify the memory that threw was removed from the manager auto removedMem = mVMManager->remove(thrownHandle); EXPECT_FALSE(removedMem); // Should have been removed due to exception // The other two memories should still be in manager for (int i = 0; i < 3; ++i) { if (creators[i].first != thrownHandle) { auto removed = mVMManager->remove(creators[i].first); EXPECT_TRUE(removed); } } } TEST_F(VirtualMemoryManagerTest, TestReleaseException) { // State structure to track create/release calls struct CreatorState { bool createCalled = false; bool releaseCalled = false; int& releaseCounter; int throwOnReleaseCount; CUmemGenericAllocationHandle createdHandle = 0xbaadf00dbaadf00d; CreatorState(int& counter, int throwCount) : releaseCounter(counter) , throwOnReleaseCount(throwCount) { } }; // State structure to track setup/teardown calls struct ConfiguratorState { bool setupCalled = false; bool teardownCalled = false; int& teardownCounter; int throwOnTeardownCount; ConfiguratorState(int& counter, int throwCount) : teardownCounter(counter) , throwOnTeardownCount(throwCount) { } }; // Dummy Creator that succeeds class DummyCreator : public CUDAVirtualMemoryChunk::Creator { public: CreatorState& state; DummyCreator(CreatorState& state) : state(state) { } CUmemGenericAllocationHandle create() override { state.createCalled = true; return state.createdHandle; } void release(CUmemGenericAllocationHandle handle, bool destructing) override { state.releaseCalled = true; ASSERT_EQ(handle, state.createdHandle); if (++state.releaseCounter == state.throwOnReleaseCount) { throw DummyException(); } } }; // Dummy Configurator that succeeds class DummyConfigurator : public CUDAVirtualMemoryChunk::Configurator { public: ConfiguratorState& state; DummyConfigurator(ConfiguratorState& state) : state(state) { } void setup(CUmemGenericAllocationHandle) override { state.setupCalled = true; } void teardown(CUmemGenericAllocationHandle, bool) override { state.teardownCalled = true; if (++state.teardownCounter == state.throwOnTeardownCount) { throw DummyException(); } } }; // Create counters for tracking release/teardown calls int releaseCounter = 0; int teardownCounter = 0; // Create state objects for each creator and configurator CreatorState state1(releaseCounter, 2); // Throw on 2nd release CreatorState state2(releaseCounter, 2); // Throw on 2nd release CreatorState state3(releaseCounter, 2); // Throw on 2nd release CreatorState state4(releaseCounter, 2); // Throw on 2nd release ConfiguratorState configState1(teardownCounter, 3); // Throw on 3rd teardown ConfiguratorState configState2(teardownCounter, 3); // Throw on 3rd teardown ConfiguratorState configState3(teardownCounter, 3); // Throw on 3rd teardown ConfiguratorState configState4(teardownCounter, 3); // Throw on 3rd teardown // Create creators and configurators auto creator1 = std::make_unique(state1); auto creator2 = std::make_unique(state2); auto creator3 = std::make_unique(state3); auto creator4 = std::make_unique(state4); auto config1 = std::make_unique(configState1); auto config2 = std::make_unique(configState2); auto config3 = std::make_unique(configState3); auto config4 = std::make_unique(configState4); CUDAVirtualMemoryChunk::Configurators configurators1; configurators1.push_back(std::move(config1)); CUDAVirtualMemoryChunk::Configurators configurators2; configurators2.push_back(std::move(config2)); CUDAVirtualMemoryChunk::Configurators configurators3; configurators3.push_back(std::move(config3)); CUDAVirtualMemoryChunk::Configurators configurators4; configurators4.push_back(std::move(config4)); mVMManager->add(0x1000, "test_tag", std::move(creator1), std::move(configurators1)); mVMManager->add(0x2000, "test_tag", std::move(creator2), std::move(configurators2)); mVMManager->add(0x3000, "test_tag", std::move(creator3), std::move(configurators3)); mVMManager->add(0x4000, "other_tag", std::move(creator4), std::move(configurators4)); // Verify initial state EXPECT_TRUE(badHandles().empty()); // releaseWithTag should call release on all memories with "test_tag" // and continue despite exceptions EXPECT_THROW(mVMManager->releaseWithTag("test_tag"), DummyException); // Verify behavior: // - All memories with "test_tag" should have had release() attempted EXPECT_TRUE(state1.releaseCalled); EXPECT_TRUE(configState1.teardownCalled); EXPECT_TRUE(state2.releaseCalled); EXPECT_TRUE(configState2.teardownCalled); EXPECT_TRUE(state3.releaseCalled); EXPECT_TRUE(configState3.teardownCalled); // - Memory with different tag should not be affected EXPECT_FALSE(state4.releaseCalled); EXPECT_FALSE(configState4.teardownCalled); // Verify bad handles tracking - memories that threw exceptions should be removed auto badHandles = mVMManager->retrieveBadHandles(); EXPECT_EQ(badHandles.size(), 2); EXPECT_NE(std::find(badHandles.begin(), badHandles.end(), 0x2000), badHandles.end()); EXPECT_NE(std::find(badHandles.begin(), badHandles.end(), 0x3000), badHandles.end()); // Verify the memories were removed from the manager auto removedMem1 = mVMManager->remove(0x1000); auto removedMem2 = mVMManager->remove(0x2000); auto removedMem3 = mVMManager->remove(0x3000); auto removedMem4 = mVMManager->remove(0x4000); EXPECT_TRUE(removedMem1); // Should have been removed due to exception EXPECT_FALSE(removedMem2); // Should have been removed due to exception EXPECT_FALSE(removedMem3); // Should have been removed due to exception EXPECT_TRUE(removedMem4); // Should still be in manager (different tag, not affected) } TEST_F(VirtualMemoryManagerTest, TestCudaVirtualMemoryAllocator) { std::size_t constexpr size = 64 * 1024 * 1024; // 64 MB std::string tag = "test_allocator_tag"; // Create a CUDA stream for the allocator CudaStream stream; auto streamPtr = std::make_shared(std::move(stream)); // Create configuration for the virtual address allocator auto config = std::make_shared( *mVMManager.get(), tag, CudaVirtualMemoryAllocator::RestoreMode::NONE, streamPtr); auto memoryBegin = getCurrentProcessMemoryInfo(); // Create a buffer using the virtual address allocator auto buffer = std::make_unique( size, nvinfer1::DataType::kINT8, CudaVirtualMemoryAllocator{config}); auto memoryAfterAllocation = getCurrentProcessMemoryInfo(); if (memoryInfoAvailable()) { ASSERT_EQ(memoryBegin + size, memoryAfterAllocation) << "Buffer allocation does not allocate memory"; } // Test that we can access the buffer data ASSERT_NE(buffer->data(), nullptr) << "Buffer data should not be null"; ASSERT_EQ(buffer->getSize(), size) << "Buffer size should match requested size"; ASSERT_EQ(buffer->getDataType(), nvinfer1::DataType::kINT8) << "Buffer data type should be INT8"; ASSERT_EQ(buffer->getMemoryType(), MemoryType::kGPU) << "Buffer memory type should be GPU"; // Test memory access by setting memory to a known pattern auto devicePtr = reinterpret_cast(buffer->data()); auto result = cuMemsetD8_v2(devicePtr, 0xAB, size); ASSERT_EQ(result, CUDA_SUCCESS) << "Memory access should succeed"; TLLM_CU_CHECK(cuStreamSynchronize(nullptr)); // Test releasing memory with tag - this should free the virtual memory auto releaseCount = mVMManager->releaseWithTag(tag); ASSERT_EQ(releaseCount, 1) << "Expected to release 1 memory object"; auto memoryAfterRelease = getCurrentProcessMemoryInfo(); if (memoryInfoAvailable()) { ASSERT_EQ(memoryBegin, memoryAfterRelease) << "Release should free the memory"; } // Test materializing memory with tag - this should re-allocate the virtual memory auto materializeCount = mVMManager->materializeWithTag(tag); ASSERT_EQ(materializeCount, 1) << "Expected to materialize 1 memory object"; auto memoryAfterMaterialize = getCurrentProcessMemoryInfo(); if (memoryInfoAvailable()) { ASSERT_EQ(memoryBegin + size, memoryAfterMaterialize) << "Materialize should allocate memory"; } // Test memory access again after rematerialization result = cuMemsetD8_v2(devicePtr, 0xCD, size); ASSERT_EQ(result, CUDA_SUCCESS) << "Memory access should succeed after rematerialization"; TLLM_CU_CHECK(cuStreamSynchronize(nullptr)); // Clean up by destroying the buffer (this should automatically clean up the virtual memory) buffer.reset(); auto memoryAfterCleanup = getCurrentProcessMemoryInfo(); if (memoryInfoAvailable()) { ASSERT_EQ(memoryBegin, memoryAfterCleanup) << "Buffer destruction should free memory"; } }