/* * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "kvCacheManagerV2Utils.h" #include "tensorrt_llm/batch_manager/kvCacheManagerV2Utils.h" #include #include namespace nb = nanobind; namespace tensorrt_llm::batch_manager::kv_cache_manager_v2 { void KVCacheManagerV2UtilsBindings::initBindings(nb::module_& module) { // Bind DiskAddress struct nb::class_(module, "DiskAddress") .def(nb::init(), nb::arg("fd"), nb::arg("pos")) .def_rw("fd", &DiskAddress::fd) .def_rw("pos", &DiskAddress::pos); // Bind Task template instantiations nb::class_>(module, "DiskToDiskTask") .def(nb::init(), nb::arg("dst"), nb::arg("src")) .def_rw("dst", &Task::dst) .def_rw("src", &Task::src); nb::class_>(module, "DiskToHostTask") .def(nb::init(), nb::arg("dst"), nb::arg("src")) .def_rw("dst", &Task::dst) .def_rw("src", &Task::src); nb::class_>(module, "HostToDiskTask") .def(nb::init(), nb::arg("dst"), nb::arg("src")) .def_rw("dst", &Task::dst) .def_rw("src", &Task::src); nb::class_>(module, "MemToMemTask") .def(nb::init(), nb::arg("dst"), nb::arg("src")) .def_rw("dst", &Task::dst) .def_rw("src", &Task::src); // Bind copy functions module.def( "copy_disk_to_disk", [](std::vector> tasks, ssize_t numBytes, uintptr_t stream) -> int { return copyDiskToDisk(std::move(tasks), numBytes, reinterpret_cast(stream)); }, nb::arg("tasks"), nb::arg("num_bytes"), nb::arg("stream"), nb::call_guard(), "Copy data from disk to disk using CUDA host function"); module.def( "copy_disk_to_host", [](std::vector> tasks, ssize_t numBytes, uintptr_t stream) -> int { return copyDiskToHost(std::move(tasks), numBytes, reinterpret_cast(stream)); }, nb::arg("tasks"), nb::arg("num_bytes"), nb::arg("stream"), nb::call_guard(), "Copy data from disk to host using CUDA host function"); module.def( "copy_host_to_disk", [](std::vector> tasks, ssize_t numBytes, uintptr_t stream) -> int { return copyHostToDisk(std::move(tasks), numBytes, reinterpret_cast(stream)); }, nb::arg("tasks"), nb::arg("num_bytes"), nb::arg("stream"), nb::call_guard(), "Copy data from host to disk using CUDA host function"); module.def( "copy_host_to_host", [](std::vector> tasks, ssize_t numBytes, uintptr_t stream) -> int { return copyHostToHost(std::move(tasks), numBytes, reinterpret_cast(stream)); }, nb::arg("tasks"), nb::arg("num_bytes"), nb::arg("stream"), nb::call_guard(), "Copy data from host to host using CUDA host function"); module.def( "copy_host_to_device", [](std::vector> const& tasks, ssize_t numBytes, uintptr_t stream) -> int { return copyHostToDevice(tasks, numBytes, reinterpret_cast(stream)); }, nb::arg("tasks"), nb::arg("num_bytes"), nb::arg("stream"), nb::call_guard(), "Copy data from host to device using CUDA kernels"); module.def( "copy_device_to_host", [](std::vector> const& tasks, ssize_t numBytes, uintptr_t stream) -> int { return copyDeviceToHost(tasks, numBytes, reinterpret_cast(stream)); }, nb::arg("tasks"), nb::arg("num_bytes"), nb::arg("stream"), nb::call_guard(), "Copy data from device to host using CUDA kernels"); module.def( "copy_device_to_device", [](std::vector> const& tasks, ssize_t numBytes, uintptr_t stream) -> int { return copyDeviceToDevice(tasks, numBytes, reinterpret_cast(stream)); }, nb::arg("tasks"), nb::arg("num_bytes"), nb::arg("stream"), nb::call_guard(), "Copy data from device to device using CUDA kernels"); } } // namespace tensorrt_llm::batch_manager::kv_cache_manager_v2