feat: binding type build argument (pybind, nanobind) (#5802)

Signed-off-by: Linda-Stadter <57756729+Linda-Stadter@users.noreply.github.com>
2026-01-13 22:18:36 +08:00 · 2025-07-10 17:48:50 +02:00 · 2025-07-10 17:48:50 +02:00 · 4d071eb2d1
commit 4d071eb2d1
parent 2e3cf42e03
9 changed files with 181 additions and 47 deletions
--- a/.gitmodules
+++ b/.gitmodules
@ -20,3 +20,6 @@
 [submodule "3rdparty/xgrammar"]
 	path = 3rdparty/xgrammar
 	url = https://github.com/mlc-ai/xgrammar.git
+[submodule "3rdparty/nanobind"]
+	path = 3rdparty/nanobind
+	url = https://github.com/wjakob/nanobind
--- a/3rdparty/nanobind
+++ b/3rdparty/nanobind
@ -0,0 +1 @@
+Subproject commit a0ed2587f1089ef7657e2ed49ad6756b01c74e9f
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@ -28,8 +28,6 @@ project(tensorrt_llm LANGUAGES CXX)

 # Build options
 option(BUILD_PYT "Build in PyTorch TorchScript class mode" ON)
-option(BUILD_PYBIND "Build Python bindings for C++ runtime and batch manager"
-       ON)
 option(BUILD_TESTS "Build Google tests" ON)
 option(BUILD_BENCHMARKS "Build benchmarks" ON)
 option(BUILD_MICRO_BENCHMARKS "Build C++ micro benchmarks" OFF)
@ -68,6 +66,11 @@ endif()
 add_compile_definitions("TLLM_GEN_EXPORT_INTERFACE")
 add_compile_definitions("TLLM_ENABLE_CUDA")

+set(BINDING_TYPE
+    "pybind"
+    CACHE STRING
+          "Binding type of Python bindings for C++ runtime and batch manager")
+
 set(INTERNAL_CUTLASS_KERNELS_PATH
    ""
    CACHE
@ -195,7 +198,14 @@ set(TRT_LIB TensorRT::NvInfer)
 get_filename_component(TRT_LLM_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR} PATH)

 set(3RDPARTY_DIR ${TRT_LLM_ROOT_DIR}/3rdparty)
-add_subdirectory(${3RDPARTY_DIR}/pybind11 ${CMAKE_CURRENT_BINARY_DIR}/pybind11)
+if(BINDING_TYPE STREQUAL "pybind")
+  add_subdirectory(${3RDPARTY_DIR}/pybind11
+                   ${CMAKE_CURRENT_BINARY_DIR}/pybind11)
+endif()
+if(BINDING_TYPE STREQUAL "nanobind")
+  add_subdirectory(${3RDPARTY_DIR}/nanobind
+                   ${CMAKE_CURRENT_BINARY_DIR}/nanobind)
+endif()

 # include as system to suppress warnings
 include_directories(
@ -206,8 +216,13 @@ include_directories(
  ${3RDPARTY_DIR}/cutlass/include
  ${3RDPARTY_DIR}/cutlass/tools/util/include
  ${3RDPARTY_DIR}/NVTX/include
-  ${3RDPARTY_DIR}/json/include
-  ${3RDPARTY_DIR}/pybind11/include)
+  ${3RDPARTY_DIR}/json/include)
+if(BINDING_TYPE STREQUAL "pybind")
+  include_directories(${3RDPARTY_DIR}/pybind11/include)
+endif()
+if(BINDING_TYPE STREQUAL "nanobind")
+  include_directories(${3RDPARTY_DIR}/nanobind/include)
+endif()

 if(${CUDAToolkit_VERSION} VERSION_GREATER_EQUAL "11")
  add_definitions("-DENABLE_BF16")
--- a/cpp/tensorrt_llm/CMakeLists.txt
+++ b/cpp/tensorrt_llm/CMakeLists.txt
@ -302,10 +302,14 @@ if(BUILD_PYT)
  add_subdirectory(thop)
 endif()

-if(BUILD_PYBIND)
+if(BINDING_TYPE STREQUAL "pybind")
  add_subdirectory(pybind)
 endif()

+if(BINDING_TYPE STREQUAL "nanobind")
+  add_subdirectory(nanobind)
+endif()
+
 if(BUILD_DEEP_EP)
  add_subdirectory(deep_ep)
 endif()
--- a/cpp/tensorrt_llm/nanobind/CMakeLists.txt
+++ b/cpp/tensorrt_llm/nanobind/CMakeLists.txt
@ -0,0 +1,33 @@
+set(TRTLLM_NB_MODULE bindings)
+set(TRTLLM_NB_MODULE
+    ${TRTLLM_NB_MODULE}
+    PARENT_SCOPE)
+
+set(SRCS ../runtime/ipcNvlsMemory.cu bindings.cpp)
+
+include_directories(${PROJECT_SOURCE_DIR}/include)
+
+nanobind_add_module(${TRTLLM_NB_MODULE} ${SRCS})
+
+set_property(TARGET ${TRTLLM_NB_MODULE} PROPERTY POSITION_INDEPENDENT_CODE ON)
+
+target_link_directories(${TRTLLM_NB_MODULE} PUBLIC
+                        "${TORCH_INSTALL_PREFIX}/lib")
+
+target_link_libraries(
+  ${TRTLLM_NB_MODULE}
+  PUBLIC ${SHARED_TARGET} ${UNDEFINED_FLAG} ${NO_AS_NEEDED_FLAG}
+         ${Python3_LIBRARIES} ${TORCH_LIBRARIES} torch_python)
+
+target_compile_definitions(
+  ${TRTLLM_NB_MODULE} PUBLIC TRTLLM_NB_MODULE=${TRTLLM_NB_MODULE}
+                             NB_DETAILED_ERROR_MESSAGES=1)
+
+if(NOT WIN32)
+  set_target_properties(
+    ${TRTLLM_NB_MODULE}
+    PROPERTIES
+      LINK_FLAGS
+      "-Wl,-rpath,'$ORIGIN/libs' -Wl,-rpath,'$ORIGIN/../nvidia/nccl/lib' ${AS_NEEDED_FLAG} ${UNDEFINED_FLAG}"
+  )
+endif()
--- a/cpp/tensorrt_llm/nanobind/bindings.cpp
+++ b/cpp/tensorrt_llm/nanobind/bindings.cpp
@ -0,0 +1,28 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <nanobind/nanobind.h>
+
+#if not defined(TRTLLM_NB_MODULE)
+#error "TRTLLM_NB_MODULE must be defined"
+#endif
+
+NB_MODULE(TRTLLM_NB_MODULE, m)
+{
+    m.doc() = "TensorRT-LLM Python bindings for C++ runtime";
+    m.attr("binding_type") = "nanobind";
+}
--- a/cpp/tensorrt_llm/pybind/bindings.cpp
+++ b/cpp/tensorrt_llm/pybind/bindings.cpp
@ -70,6 +70,7 @@ tr::SamplingConfig makeSamplingConfig(std::vector<tr::SamplingConfig> const& con
 PYBIND11_MODULE(TRTLLM_PYBIND_MODULE, m)
 {
    m.doc() = "TensorRT-LLM Python bindings for C++ runtime";
+    m.attr("binding_type") = "pybind";

    // Create MpiComm binding first since it's used in the executor bindings
    py::classh<tensorrt_llm::mpi::MpiComm>(m, "MpiComm")
--- a/scripts/build_wheel.py
+++ b/scripts/build_wheel.py
@ -299,6 +299,7 @@ def main(*,
         skip_building_wheel: bool = False,
         linking_install_binary: bool = False,
         python_bindings: bool = True,
+         binding_type: str = "pybind",
         benchmarks: bool = False,
         micro_benchmarks: bool = False,
         nvtx: bool = False,
@ -396,6 +397,39 @@ def main(*,
        clear_folder(build_dir)  # Keep the folder in case it is mounted.
    build_dir.mkdir(parents=True, exist_ok=True)

+    def get_binding_type_from_cache():
+        cmake_cache_file = build_dir / "CMakeCache.txt"
+        if not cmake_cache_file.exists():
+            return None
+
+        with open(cmake_cache_file, 'r') as f:
+            for line in f:
+                if line.startswith("BINDING_TYPE:STRING="):
+                    cashed_binding_type = line.split("=", 1)[1].strip()
+                    if cashed_binding_type in ['pybind', 'nanobind']:
+                        return cashed_binding_type
+            return None
+
+    cached_binding_type = get_binding_type_from_cache()
+
+    if not first_build and cached_binding_type != binding_type:
+        # Clean up of previous binding build artifacts
+        nanobind_dir = build_dir / "tensorrt_llm" / "nanobind"
+        if nanobind_dir.exists():
+            rmtree(nanobind_dir)
+        nanobind_stub_file = project_dir / "tensorrt_llm" / "bindings.pyi"
+        if nanobind_stub_file.exists():
+            nanobind_stub_file.unlink()
+
+        pybind_dir = build_dir / "tensorrt_llm" / "pybind"
+        if pybind_dir.exists():
+            rmtree(pybind_dir)
+        pybind_stub_dir = project_dir / "tensorrt_llm" / "bindings"
+        if pybind_stub_dir.exists():
+            rmtree(pybind_stub_dir)
+
+        configure_cmake = True
+
    if use_ccache:
        cmake_def_args.append(
            f"-DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache"
@ -411,12 +445,10 @@ def main(*,

    if cpp_only:
        build_pyt = "OFF"
-        build_pybind = "OFF"
        build_deep_ep = "OFF"
    else:
        targets.extend(["th_common", "bindings", "deep_ep"])
        build_pyt = "ON"
-        build_pybind = "ON"
        build_deep_ep = "ON"

    if benchmarks:
@ -456,7 +488,7 @@ def main(*,
                )
            cmake_def_args = " ".join(cmake_def_args)
            cmake_configure_command = (
-                f'cmake -DCMAKE_BUILD_TYPE="{build_type}" -DBUILD_PYT="{build_pyt}" -DBUILD_PYBIND="{build_pybind}" -DBUILD_DEEP_EP="{build_deep_ep}"'
+                f'cmake -DCMAKE_BUILD_TYPE="{build_type}" -DBUILD_PYT="{build_pyt}" -DBINDING_TYPE="{binding_type}" -DBUILD_DEEP_EP="{build_deep_ep}"'
                f' -DNVTX_DISABLE="{disable_nvtx}" -DBUILD_MICRO_BENCHMARKS={build_micro_benchmarks}'
                f' -DBUILD_WHEEL_TARGETS="{";".join(targets)}"'
                f' -DPython_EXECUTABLE={venv_python} -DPython3_EXECUTABLE={venv_python}'
@ -614,25 +646,26 @@ def main(*,

    if not cpp_only:

-        def get_pybind_lib(subdirectory, name):
-            pybind_build_dir = (build_dir / "tensorrt_llm" / subdirectory)
+        def get_binding_lib(subdirectory, name):
+            binding_build_dir = (build_dir / "tensorrt_llm" / subdirectory)
            if on_windows:
-                pybind_lib = list(pybind_build_dir.glob(f"{name}.*.pyd"))
+                binding_lib = list(binding_build_dir.glob(f"{name}.*.pyd"))
            else:
-                pybind_lib = list(pybind_build_dir.glob(f"{name}.*.so"))
+                binding_lib = list(binding_build_dir.glob(f"{name}.*.so"))

            assert len(
-                pybind_lib
-            ) == 1, f"Exactly one pybind library should be present: {pybind_lib}"
-            return pybind_lib[0]
+                binding_lib
+            ) == 1, f"Exactly one binding library should be present: {binding_lib}"
+            return binding_lib[0]

-        install_file(get_pybind_lib("pybind", "bindings"), pkg_dir)
+        install_file(get_binding_lib(binding_type, "bindings"), pkg_dir)

        with (build_dir / "tensorrt_llm" / "deep_ep" /
              "cuda_architectures.txt").open() as f:
            deep_ep_cuda_architectures = f.read().strip().strip(";")
        if deep_ep_cuda_architectures:
-            install_file(get_pybind_lib("deep_ep", "deep_ep_cpp_tllm"), pkg_dir)
+            install_file(get_binding_lib("deep_ep", "deep_ep_cpp_tllm"),
+                         pkg_dir)
            install_tree(build_dir / "tensorrt_llm" / "deep_ep" / "python" /
                         "deep_ep",
                         deep_ep_dir,
@ -651,30 +684,38 @@ def main(*,
                lib_dir / "nvshmem")
        if not skip_stubs:
            with working_directory(project_dir):
-                build_run(f"\"{venv_python}\" -m pip install pybind11-stubgen")
+                if binding_type == "nanobind":
+                    build_run(f"\"{venv_python}\" -m pip install nanobind")
+                else:
+                    build_run(
+                        f"\"{venv_python}\" -m pip install pybind11-stubgen")
            with working_directory(pkg_dir):
                if on_windows:
-                    stubgen = "stubgen.py"
-                    stubgen_contents = """
-                    # Loading torch, trt before bindings is required to avoid import errors on windows.
-                    # isort: off
-                    import torch
-                    import tensorrt as trt
-                    # isort: on
-                    import os
-                    import platform
+                    if binding_type == "nanobind":
+                        print("Windows not yet supported for nanobind stubs")
+                        exit(1)
+                    else:
+                        stubgen = "stubgen.py"
+                        stubgen_contents = """
+                        # Loading torch, trt before bindings is required to avoid import errors on windows.
+                        # isort: off
+                        import torch
+                        import tensorrt as trt
+                        # isort: on
+                        import os
+                        import platform

-                    from pybind11_stubgen import main
+                        from pybind11_stubgen import main

-                    if __name__ == "__main__":
-                        # Load dlls from `libs` directory before launching bindings.
-                        if platform.system() == "Windows":
-                            os.add_dll_directory(r\"{lib_dir}\")
-                        main()
-                    """.format(lib_dir=lib_dir)
-                    (pkg_dir / stubgen).write_text(dedent(stubgen_contents))
-                    build_run(f"\"{venv_python}\" {stubgen} -o . bindings")
-                    (pkg_dir / stubgen).unlink()
+                        if __name__ == "__main__":
+                            # Load dlls from `libs` directory before launching bindings.
+                            if platform.system() == "Windows":
+                                os.add_dll_directory(r\"{lib_dir}\")
+                            main()
+                        """.format(lib_dir=lib_dir)
+                        (pkg_dir / stubgen).write_text(dedent(stubgen_contents))
+                        build_run(f"\"{venv_python}\" {stubgen} -o . bindings")
+                        (pkg_dir / stubgen).unlink()
                else:
                    env_ld = os.environ.copy()

@ -702,14 +743,18 @@ def main(*,
                            exit(1)

                    env_ld["LD_LIBRARY_PATH"] = new_library_path
-
-                    build_run(
-                        f"\"{venv_python}\" -m pybind11_stubgen -o . bindings --exit-code",
-                        env=env_ld)
-                    if deep_ep_cuda_architectures:
+                    if binding_type == "nanobind":
                        build_run(
-                            f"\"{venv_python}\" -m pybind11_stubgen -o . deep_ep_cpp_tllm --exit-code",
+                            f"\"{venv_python}\" -m nanobind.stubgen -m bindings -O .",
                            env=env_ld)
+                    else:
+                        build_run(
+                            f"\"{venv_python}\" -m pybind11_stubgen -o . bindings --exit-code",
+                            env=env_ld)
+                        if deep_ep_cuda_architectures:
+                            build_run(
+                                f"\"{venv_python}\" -m pybind11_stubgen -o . deep_ep_cpp_tllm --exit-code",
+                                env=env_ld)

    if not skip_building_wheel:
        if dist_dir is None:
@ -820,6 +865,10 @@ def add_arguments(parser: ArgumentParser):
        "-p",
        action="store_true",
        help="(deprecated) Build the python bindings for the C++ runtime.")
+    parser.add_argument("--binding_type",
+                        choices=["pybind", "nanobind"],
+                        default="pybind",
+                        help="Which binding type to build: pybind, nanobind")
    parser.add_argument("--benchmarks",
                        action="store_true",
                        help="Build the benchmarks for the C++ runtime.")
--- a/setup.py
+++ b/setup.py
@ -49,9 +49,9 @@ def parse_requirements(filename: os.PathLike):


 def sanity_check():
-    bindings_path = Path(
-        __file__).resolve().parent / "tensorrt_llm" / "bindings"
-    if not bindings_path.exists():
+    tensorrt_llm_path = Path(__file__).resolve().parent / "tensorrt_llm"
+    if not ((tensorrt_llm_path / "bindings").exists() or
+            (tensorrt_llm_path / "bindings.pyi").exists()):
        raise ImportError(
            'The `bindings` module does not exist. Please check the package integrity. '
            'If you are attempting to use the pip development mode (editable installation), '
				`@ -0,0 +1 @@`
				`Subproject commit a0ed2587f1089ef7657e2ed49ad6756b01c74e9f`