feat: binding type build argument (pybind, nanobind) (#5802)

Signed-off-by: Linda-Stadter <57756729+Linda-Stadter@users.noreply.github.com>
This commit is contained in:
Linda 2025-07-10 17:48:50 +02:00 committed by GitHub
parent 2e3cf42e03
commit 4d071eb2d1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 181 additions and 47 deletions

3
.gitmodules vendored
View File

@ -20,3 +20,6 @@
[submodule "3rdparty/xgrammar"]
path = 3rdparty/xgrammar
url = https://github.com/mlc-ai/xgrammar.git
[submodule "3rdparty/nanobind"]
path = 3rdparty/nanobind
url = https://github.com/wjakob/nanobind

1
3rdparty/nanobind vendored Submodule

@ -0,0 +1 @@
Subproject commit a0ed2587f1089ef7657e2ed49ad6756b01c74e9f

View File

@ -28,8 +28,6 @@ project(tensorrt_llm LANGUAGES CXX)
# Build options
option(BUILD_PYT "Build in PyTorch TorchScript class mode" ON)
option(BUILD_PYBIND "Build Python bindings for C++ runtime and batch manager"
ON)
option(BUILD_TESTS "Build Google tests" ON)
option(BUILD_BENCHMARKS "Build benchmarks" ON)
option(BUILD_MICRO_BENCHMARKS "Build C++ micro benchmarks" OFF)
@ -68,6 +66,11 @@ endif()
add_compile_definitions("TLLM_GEN_EXPORT_INTERFACE")
add_compile_definitions("TLLM_ENABLE_CUDA")
set(BINDING_TYPE
"pybind"
CACHE STRING
"Binding type of Python bindings for C++ runtime and batch manager")
set(INTERNAL_CUTLASS_KERNELS_PATH
""
CACHE
@ -195,7 +198,14 @@ set(TRT_LIB TensorRT::NvInfer)
get_filename_component(TRT_LLM_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR} PATH)
set(3RDPARTY_DIR ${TRT_LLM_ROOT_DIR}/3rdparty)
add_subdirectory(${3RDPARTY_DIR}/pybind11 ${CMAKE_CURRENT_BINARY_DIR}/pybind11)
if(BINDING_TYPE STREQUAL "pybind")
add_subdirectory(${3RDPARTY_DIR}/pybind11
${CMAKE_CURRENT_BINARY_DIR}/pybind11)
endif()
if(BINDING_TYPE STREQUAL "nanobind")
add_subdirectory(${3RDPARTY_DIR}/nanobind
${CMAKE_CURRENT_BINARY_DIR}/nanobind)
endif()
# include as system to suppress warnings
include_directories(
@ -206,8 +216,13 @@ include_directories(
${3RDPARTY_DIR}/cutlass/include
${3RDPARTY_DIR}/cutlass/tools/util/include
${3RDPARTY_DIR}/NVTX/include
${3RDPARTY_DIR}/json/include
${3RDPARTY_DIR}/pybind11/include)
${3RDPARTY_DIR}/json/include)
if(BINDING_TYPE STREQUAL "pybind")
include_directories(${3RDPARTY_DIR}/pybind11/include)
endif()
if(BINDING_TYPE STREQUAL "nanobind")
include_directories(${3RDPARTY_DIR}/nanobind/include)
endif()
if(${CUDAToolkit_VERSION} VERSION_GREATER_EQUAL "11")
add_definitions("-DENABLE_BF16")

View File

@ -302,10 +302,14 @@ if(BUILD_PYT)
add_subdirectory(thop)
endif()
if(BUILD_PYBIND)
if(BINDING_TYPE STREQUAL "pybind")
add_subdirectory(pybind)
endif()
if(BINDING_TYPE STREQUAL "nanobind")
add_subdirectory(nanobind)
endif()
if(BUILD_DEEP_EP)
add_subdirectory(deep_ep)
endif()

View File

@ -0,0 +1,33 @@
set(TRTLLM_NB_MODULE bindings)
set(TRTLLM_NB_MODULE
${TRTLLM_NB_MODULE}
PARENT_SCOPE)
set(SRCS ../runtime/ipcNvlsMemory.cu bindings.cpp)
include_directories(${PROJECT_SOURCE_DIR}/include)
nanobind_add_module(${TRTLLM_NB_MODULE} ${SRCS})
set_property(TARGET ${TRTLLM_NB_MODULE} PROPERTY POSITION_INDEPENDENT_CODE ON)
target_link_directories(${TRTLLM_NB_MODULE} PUBLIC
"${TORCH_INSTALL_PREFIX}/lib")
target_link_libraries(
${TRTLLM_NB_MODULE}
PUBLIC ${SHARED_TARGET} ${UNDEFINED_FLAG} ${NO_AS_NEEDED_FLAG}
${Python3_LIBRARIES} ${TORCH_LIBRARIES} torch_python)
target_compile_definitions(
${TRTLLM_NB_MODULE} PUBLIC TRTLLM_NB_MODULE=${TRTLLM_NB_MODULE}
NB_DETAILED_ERROR_MESSAGES=1)
if(NOT WIN32)
set_target_properties(
${TRTLLM_NB_MODULE}
PROPERTIES
LINK_FLAGS
"-Wl,-rpath,'$ORIGIN/libs' -Wl,-rpath,'$ORIGIN/../nvidia/nccl/lib' ${AS_NEEDED_FLAG} ${UNDEFINED_FLAG}"
)
endif()

View File

@ -0,0 +1,28 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <nanobind/nanobind.h>
#if not defined(TRTLLM_NB_MODULE)
#error "TRTLLM_NB_MODULE must be defined"
#endif
NB_MODULE(TRTLLM_NB_MODULE, m)
{
m.doc() = "TensorRT-LLM Python bindings for C++ runtime";
m.attr("binding_type") = "nanobind";
}

View File

@ -70,6 +70,7 @@ tr::SamplingConfig makeSamplingConfig(std::vector<tr::SamplingConfig> const& con
PYBIND11_MODULE(TRTLLM_PYBIND_MODULE, m)
{
m.doc() = "TensorRT-LLM Python bindings for C++ runtime";
m.attr("binding_type") = "pybind";
// Create MpiComm binding first since it's used in the executor bindings
py::classh<tensorrt_llm::mpi::MpiComm>(m, "MpiComm")

View File

@ -299,6 +299,7 @@ def main(*,
skip_building_wheel: bool = False,
linking_install_binary: bool = False,
python_bindings: bool = True,
binding_type: str = "pybind",
benchmarks: bool = False,
micro_benchmarks: bool = False,
nvtx: bool = False,
@ -396,6 +397,39 @@ def main(*,
clear_folder(build_dir) # Keep the folder in case it is mounted.
build_dir.mkdir(parents=True, exist_ok=True)
def get_binding_type_from_cache():
cmake_cache_file = build_dir / "CMakeCache.txt"
if not cmake_cache_file.exists():
return None
with open(cmake_cache_file, 'r') as f:
for line in f:
if line.startswith("BINDING_TYPE:STRING="):
cashed_binding_type = line.split("=", 1)[1].strip()
if cashed_binding_type in ['pybind', 'nanobind']:
return cashed_binding_type
return None
cached_binding_type = get_binding_type_from_cache()
if not first_build and cached_binding_type != binding_type:
# Clean up of previous binding build artifacts
nanobind_dir = build_dir / "tensorrt_llm" / "nanobind"
if nanobind_dir.exists():
rmtree(nanobind_dir)
nanobind_stub_file = project_dir / "tensorrt_llm" / "bindings.pyi"
if nanobind_stub_file.exists():
nanobind_stub_file.unlink()
pybind_dir = build_dir / "tensorrt_llm" / "pybind"
if pybind_dir.exists():
rmtree(pybind_dir)
pybind_stub_dir = project_dir / "tensorrt_llm" / "bindings"
if pybind_stub_dir.exists():
rmtree(pybind_stub_dir)
configure_cmake = True
if use_ccache:
cmake_def_args.append(
f"-DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache"
@ -411,12 +445,10 @@ def main(*,
if cpp_only:
build_pyt = "OFF"
build_pybind = "OFF"
build_deep_ep = "OFF"
else:
targets.extend(["th_common", "bindings", "deep_ep"])
build_pyt = "ON"
build_pybind = "ON"
build_deep_ep = "ON"
if benchmarks:
@ -456,7 +488,7 @@ def main(*,
)
cmake_def_args = " ".join(cmake_def_args)
cmake_configure_command = (
f'cmake -DCMAKE_BUILD_TYPE="{build_type}" -DBUILD_PYT="{build_pyt}" -DBUILD_PYBIND="{build_pybind}" -DBUILD_DEEP_EP="{build_deep_ep}"'
f'cmake -DCMAKE_BUILD_TYPE="{build_type}" -DBUILD_PYT="{build_pyt}" -DBINDING_TYPE="{binding_type}" -DBUILD_DEEP_EP="{build_deep_ep}"'
f' -DNVTX_DISABLE="{disable_nvtx}" -DBUILD_MICRO_BENCHMARKS={build_micro_benchmarks}'
f' -DBUILD_WHEEL_TARGETS="{";".join(targets)}"'
f' -DPython_EXECUTABLE={venv_python} -DPython3_EXECUTABLE={venv_python}'
@ -614,25 +646,26 @@ def main(*,
if not cpp_only:
def get_pybind_lib(subdirectory, name):
pybind_build_dir = (build_dir / "tensorrt_llm" / subdirectory)
def get_binding_lib(subdirectory, name):
binding_build_dir = (build_dir / "tensorrt_llm" / subdirectory)
if on_windows:
pybind_lib = list(pybind_build_dir.glob(f"{name}.*.pyd"))
binding_lib = list(binding_build_dir.glob(f"{name}.*.pyd"))
else:
pybind_lib = list(pybind_build_dir.glob(f"{name}.*.so"))
binding_lib = list(binding_build_dir.glob(f"{name}.*.so"))
assert len(
pybind_lib
) == 1, f"Exactly one pybind library should be present: {pybind_lib}"
return pybind_lib[0]
binding_lib
) == 1, f"Exactly one binding library should be present: {binding_lib}"
return binding_lib[0]
install_file(get_pybind_lib("pybind", "bindings"), pkg_dir)
install_file(get_binding_lib(binding_type, "bindings"), pkg_dir)
with (build_dir / "tensorrt_llm" / "deep_ep" /
"cuda_architectures.txt").open() as f:
deep_ep_cuda_architectures = f.read().strip().strip(";")
if deep_ep_cuda_architectures:
install_file(get_pybind_lib("deep_ep", "deep_ep_cpp_tllm"), pkg_dir)
install_file(get_binding_lib("deep_ep", "deep_ep_cpp_tllm"),
pkg_dir)
install_tree(build_dir / "tensorrt_llm" / "deep_ep" / "python" /
"deep_ep",
deep_ep_dir,
@ -651,30 +684,38 @@ def main(*,
lib_dir / "nvshmem")
if not skip_stubs:
with working_directory(project_dir):
build_run(f"\"{venv_python}\" -m pip install pybind11-stubgen")
if binding_type == "nanobind":
build_run(f"\"{venv_python}\" -m pip install nanobind")
else:
build_run(
f"\"{venv_python}\" -m pip install pybind11-stubgen")
with working_directory(pkg_dir):
if on_windows:
stubgen = "stubgen.py"
stubgen_contents = """
# Loading torch, trt before bindings is required to avoid import errors on windows.
# isort: off
import torch
import tensorrt as trt
# isort: on
import os
import platform
if binding_type == "nanobind":
print("Windows not yet supported for nanobind stubs")
exit(1)
else:
stubgen = "stubgen.py"
stubgen_contents = """
# Loading torch, trt before bindings is required to avoid import errors on windows.
# isort: off
import torch
import tensorrt as trt
# isort: on
import os
import platform
from pybind11_stubgen import main
from pybind11_stubgen import main
if __name__ == "__main__":
# Load dlls from `libs` directory before launching bindings.
if platform.system() == "Windows":
os.add_dll_directory(r\"{lib_dir}\")
main()
""".format(lib_dir=lib_dir)
(pkg_dir / stubgen).write_text(dedent(stubgen_contents))
build_run(f"\"{venv_python}\" {stubgen} -o . bindings")
(pkg_dir / stubgen).unlink()
if __name__ == "__main__":
# Load dlls from `libs` directory before launching bindings.
if platform.system() == "Windows":
os.add_dll_directory(r\"{lib_dir}\")
main()
""".format(lib_dir=lib_dir)
(pkg_dir / stubgen).write_text(dedent(stubgen_contents))
build_run(f"\"{venv_python}\" {stubgen} -o . bindings")
(pkg_dir / stubgen).unlink()
else:
env_ld = os.environ.copy()
@ -702,14 +743,18 @@ def main(*,
exit(1)
env_ld["LD_LIBRARY_PATH"] = new_library_path
build_run(
f"\"{venv_python}\" -m pybind11_stubgen -o . bindings --exit-code",
env=env_ld)
if deep_ep_cuda_architectures:
if binding_type == "nanobind":
build_run(
f"\"{venv_python}\" -m pybind11_stubgen -o . deep_ep_cpp_tllm --exit-code",
f"\"{venv_python}\" -m nanobind.stubgen -m bindings -O .",
env=env_ld)
else:
build_run(
f"\"{venv_python}\" -m pybind11_stubgen -o . bindings --exit-code",
env=env_ld)
if deep_ep_cuda_architectures:
build_run(
f"\"{venv_python}\" -m pybind11_stubgen -o . deep_ep_cpp_tllm --exit-code",
env=env_ld)
if not skip_building_wheel:
if dist_dir is None:
@ -820,6 +865,10 @@ def add_arguments(parser: ArgumentParser):
"-p",
action="store_true",
help="(deprecated) Build the python bindings for the C++ runtime.")
parser.add_argument("--binding_type",
choices=["pybind", "nanobind"],
default="pybind",
help="Which binding type to build: pybind, nanobind")
parser.add_argument("--benchmarks",
action="store_true",
help="Build the benchmarks for the C++ runtime.")

View File

@ -49,9 +49,9 @@ def parse_requirements(filename: os.PathLike):
def sanity_check():
bindings_path = Path(
__file__).resolve().parent / "tensorrt_llm" / "bindings"
if not bindings_path.exists():
tensorrt_llm_path = Path(__file__).resolve().parent / "tensorrt_llm"
if not ((tensorrt_llm_path / "bindings").exists() or
(tensorrt_llm_path / "bindings.pyi").exists()):
raise ImportError(
'The `bindings` module does not exist. Please check the package integrity. '
'If you are attempting to use the pip development mode (editable installation), '