mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 22:18:36 +08:00
feat: binding type build argument (pybind, nanobind) (#5802)
Signed-off-by: Linda-Stadter <57756729+Linda-Stadter@users.noreply.github.com>
This commit is contained in:
parent
2e3cf42e03
commit
4d071eb2d1
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -20,3 +20,6 @@
|
||||
[submodule "3rdparty/xgrammar"]
|
||||
path = 3rdparty/xgrammar
|
||||
url = https://github.com/mlc-ai/xgrammar.git
|
||||
[submodule "3rdparty/nanobind"]
|
||||
path = 3rdparty/nanobind
|
||||
url = https://github.com/wjakob/nanobind
|
||||
|
||||
1
3rdparty/nanobind
vendored
Submodule
1
3rdparty/nanobind
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit a0ed2587f1089ef7657e2ed49ad6756b01c74e9f
|
||||
@ -28,8 +28,6 @@ project(tensorrt_llm LANGUAGES CXX)
|
||||
|
||||
# Build options
|
||||
option(BUILD_PYT "Build in PyTorch TorchScript class mode" ON)
|
||||
option(BUILD_PYBIND "Build Python bindings for C++ runtime and batch manager"
|
||||
ON)
|
||||
option(BUILD_TESTS "Build Google tests" ON)
|
||||
option(BUILD_BENCHMARKS "Build benchmarks" ON)
|
||||
option(BUILD_MICRO_BENCHMARKS "Build C++ micro benchmarks" OFF)
|
||||
@ -68,6 +66,11 @@ endif()
|
||||
add_compile_definitions("TLLM_GEN_EXPORT_INTERFACE")
|
||||
add_compile_definitions("TLLM_ENABLE_CUDA")
|
||||
|
||||
set(BINDING_TYPE
|
||||
"pybind"
|
||||
CACHE STRING
|
||||
"Binding type of Python bindings for C++ runtime and batch manager")
|
||||
|
||||
set(INTERNAL_CUTLASS_KERNELS_PATH
|
||||
""
|
||||
CACHE
|
||||
@ -195,7 +198,14 @@ set(TRT_LIB TensorRT::NvInfer)
|
||||
get_filename_component(TRT_LLM_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR} PATH)
|
||||
|
||||
set(3RDPARTY_DIR ${TRT_LLM_ROOT_DIR}/3rdparty)
|
||||
add_subdirectory(${3RDPARTY_DIR}/pybind11 ${CMAKE_CURRENT_BINARY_DIR}/pybind11)
|
||||
if(BINDING_TYPE STREQUAL "pybind")
|
||||
add_subdirectory(${3RDPARTY_DIR}/pybind11
|
||||
${CMAKE_CURRENT_BINARY_DIR}/pybind11)
|
||||
endif()
|
||||
if(BINDING_TYPE STREQUAL "nanobind")
|
||||
add_subdirectory(${3RDPARTY_DIR}/nanobind
|
||||
${CMAKE_CURRENT_BINARY_DIR}/nanobind)
|
||||
endif()
|
||||
|
||||
# include as system to suppress warnings
|
||||
include_directories(
|
||||
@ -206,8 +216,13 @@ include_directories(
|
||||
${3RDPARTY_DIR}/cutlass/include
|
||||
${3RDPARTY_DIR}/cutlass/tools/util/include
|
||||
${3RDPARTY_DIR}/NVTX/include
|
||||
${3RDPARTY_DIR}/json/include
|
||||
${3RDPARTY_DIR}/pybind11/include)
|
||||
${3RDPARTY_DIR}/json/include)
|
||||
if(BINDING_TYPE STREQUAL "pybind")
|
||||
include_directories(${3RDPARTY_DIR}/pybind11/include)
|
||||
endif()
|
||||
if(BINDING_TYPE STREQUAL "nanobind")
|
||||
include_directories(${3RDPARTY_DIR}/nanobind/include)
|
||||
endif()
|
||||
|
||||
if(${CUDAToolkit_VERSION} VERSION_GREATER_EQUAL "11")
|
||||
add_definitions("-DENABLE_BF16")
|
||||
|
||||
@ -302,10 +302,14 @@ if(BUILD_PYT)
|
||||
add_subdirectory(thop)
|
||||
endif()
|
||||
|
||||
if(BUILD_PYBIND)
|
||||
if(BINDING_TYPE STREQUAL "pybind")
|
||||
add_subdirectory(pybind)
|
||||
endif()
|
||||
|
||||
if(BINDING_TYPE STREQUAL "nanobind")
|
||||
add_subdirectory(nanobind)
|
||||
endif()
|
||||
|
||||
if(BUILD_DEEP_EP)
|
||||
add_subdirectory(deep_ep)
|
||||
endif()
|
||||
|
||||
33
cpp/tensorrt_llm/nanobind/CMakeLists.txt
Executable file
33
cpp/tensorrt_llm/nanobind/CMakeLists.txt
Executable file
@ -0,0 +1,33 @@
|
||||
set(TRTLLM_NB_MODULE bindings)
|
||||
set(TRTLLM_NB_MODULE
|
||||
${TRTLLM_NB_MODULE}
|
||||
PARENT_SCOPE)
|
||||
|
||||
set(SRCS ../runtime/ipcNvlsMemory.cu bindings.cpp)
|
||||
|
||||
include_directories(${PROJECT_SOURCE_DIR}/include)
|
||||
|
||||
nanobind_add_module(${TRTLLM_NB_MODULE} ${SRCS})
|
||||
|
||||
set_property(TARGET ${TRTLLM_NB_MODULE} PROPERTY POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
target_link_directories(${TRTLLM_NB_MODULE} PUBLIC
|
||||
"${TORCH_INSTALL_PREFIX}/lib")
|
||||
|
||||
target_link_libraries(
|
||||
${TRTLLM_NB_MODULE}
|
||||
PUBLIC ${SHARED_TARGET} ${UNDEFINED_FLAG} ${NO_AS_NEEDED_FLAG}
|
||||
${Python3_LIBRARIES} ${TORCH_LIBRARIES} torch_python)
|
||||
|
||||
target_compile_definitions(
|
||||
${TRTLLM_NB_MODULE} PUBLIC TRTLLM_NB_MODULE=${TRTLLM_NB_MODULE}
|
||||
NB_DETAILED_ERROR_MESSAGES=1)
|
||||
|
||||
if(NOT WIN32)
|
||||
set_target_properties(
|
||||
${TRTLLM_NB_MODULE}
|
||||
PROPERTIES
|
||||
LINK_FLAGS
|
||||
"-Wl,-rpath,'$ORIGIN/libs' -Wl,-rpath,'$ORIGIN/../nvidia/nccl/lib' ${AS_NEEDED_FLAG} ${UNDEFINED_FLAG}"
|
||||
)
|
||||
endif()
|
||||
28
cpp/tensorrt_llm/nanobind/bindings.cpp
Normal file
28
cpp/tensorrt_llm/nanobind/bindings.cpp
Normal file
@ -0,0 +1,28 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <nanobind/nanobind.h>
|
||||
|
||||
#if not defined(TRTLLM_NB_MODULE)
|
||||
#error "TRTLLM_NB_MODULE must be defined"
|
||||
#endif
|
||||
|
||||
NB_MODULE(TRTLLM_NB_MODULE, m)
|
||||
{
|
||||
m.doc() = "TensorRT-LLM Python bindings for C++ runtime";
|
||||
m.attr("binding_type") = "nanobind";
|
||||
}
|
||||
@ -70,6 +70,7 @@ tr::SamplingConfig makeSamplingConfig(std::vector<tr::SamplingConfig> const& con
|
||||
PYBIND11_MODULE(TRTLLM_PYBIND_MODULE, m)
|
||||
{
|
||||
m.doc() = "TensorRT-LLM Python bindings for C++ runtime";
|
||||
m.attr("binding_type") = "pybind";
|
||||
|
||||
// Create MpiComm binding first since it's used in the executor bindings
|
||||
py::classh<tensorrt_llm::mpi::MpiComm>(m, "MpiComm")
|
||||
|
||||
@ -299,6 +299,7 @@ def main(*,
|
||||
skip_building_wheel: bool = False,
|
||||
linking_install_binary: bool = False,
|
||||
python_bindings: bool = True,
|
||||
binding_type: str = "pybind",
|
||||
benchmarks: bool = False,
|
||||
micro_benchmarks: bool = False,
|
||||
nvtx: bool = False,
|
||||
@ -396,6 +397,39 @@ def main(*,
|
||||
clear_folder(build_dir) # Keep the folder in case it is mounted.
|
||||
build_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def get_binding_type_from_cache():
|
||||
cmake_cache_file = build_dir / "CMakeCache.txt"
|
||||
if not cmake_cache_file.exists():
|
||||
return None
|
||||
|
||||
with open(cmake_cache_file, 'r') as f:
|
||||
for line in f:
|
||||
if line.startswith("BINDING_TYPE:STRING="):
|
||||
cashed_binding_type = line.split("=", 1)[1].strip()
|
||||
if cashed_binding_type in ['pybind', 'nanobind']:
|
||||
return cashed_binding_type
|
||||
return None
|
||||
|
||||
cached_binding_type = get_binding_type_from_cache()
|
||||
|
||||
if not first_build and cached_binding_type != binding_type:
|
||||
# Clean up of previous binding build artifacts
|
||||
nanobind_dir = build_dir / "tensorrt_llm" / "nanobind"
|
||||
if nanobind_dir.exists():
|
||||
rmtree(nanobind_dir)
|
||||
nanobind_stub_file = project_dir / "tensorrt_llm" / "bindings.pyi"
|
||||
if nanobind_stub_file.exists():
|
||||
nanobind_stub_file.unlink()
|
||||
|
||||
pybind_dir = build_dir / "tensorrt_llm" / "pybind"
|
||||
if pybind_dir.exists():
|
||||
rmtree(pybind_dir)
|
||||
pybind_stub_dir = project_dir / "tensorrt_llm" / "bindings"
|
||||
if pybind_stub_dir.exists():
|
||||
rmtree(pybind_stub_dir)
|
||||
|
||||
configure_cmake = True
|
||||
|
||||
if use_ccache:
|
||||
cmake_def_args.append(
|
||||
f"-DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache"
|
||||
@ -411,12 +445,10 @@ def main(*,
|
||||
|
||||
if cpp_only:
|
||||
build_pyt = "OFF"
|
||||
build_pybind = "OFF"
|
||||
build_deep_ep = "OFF"
|
||||
else:
|
||||
targets.extend(["th_common", "bindings", "deep_ep"])
|
||||
build_pyt = "ON"
|
||||
build_pybind = "ON"
|
||||
build_deep_ep = "ON"
|
||||
|
||||
if benchmarks:
|
||||
@ -456,7 +488,7 @@ def main(*,
|
||||
)
|
||||
cmake_def_args = " ".join(cmake_def_args)
|
||||
cmake_configure_command = (
|
||||
f'cmake -DCMAKE_BUILD_TYPE="{build_type}" -DBUILD_PYT="{build_pyt}" -DBUILD_PYBIND="{build_pybind}" -DBUILD_DEEP_EP="{build_deep_ep}"'
|
||||
f'cmake -DCMAKE_BUILD_TYPE="{build_type}" -DBUILD_PYT="{build_pyt}" -DBINDING_TYPE="{binding_type}" -DBUILD_DEEP_EP="{build_deep_ep}"'
|
||||
f' -DNVTX_DISABLE="{disable_nvtx}" -DBUILD_MICRO_BENCHMARKS={build_micro_benchmarks}'
|
||||
f' -DBUILD_WHEEL_TARGETS="{";".join(targets)}"'
|
||||
f' -DPython_EXECUTABLE={venv_python} -DPython3_EXECUTABLE={venv_python}'
|
||||
@ -614,25 +646,26 @@ def main(*,
|
||||
|
||||
if not cpp_only:
|
||||
|
||||
def get_pybind_lib(subdirectory, name):
|
||||
pybind_build_dir = (build_dir / "tensorrt_llm" / subdirectory)
|
||||
def get_binding_lib(subdirectory, name):
|
||||
binding_build_dir = (build_dir / "tensorrt_llm" / subdirectory)
|
||||
if on_windows:
|
||||
pybind_lib = list(pybind_build_dir.glob(f"{name}.*.pyd"))
|
||||
binding_lib = list(binding_build_dir.glob(f"{name}.*.pyd"))
|
||||
else:
|
||||
pybind_lib = list(pybind_build_dir.glob(f"{name}.*.so"))
|
||||
binding_lib = list(binding_build_dir.glob(f"{name}.*.so"))
|
||||
|
||||
assert len(
|
||||
pybind_lib
|
||||
) == 1, f"Exactly one pybind library should be present: {pybind_lib}"
|
||||
return pybind_lib[0]
|
||||
binding_lib
|
||||
) == 1, f"Exactly one binding library should be present: {binding_lib}"
|
||||
return binding_lib[0]
|
||||
|
||||
install_file(get_pybind_lib("pybind", "bindings"), pkg_dir)
|
||||
install_file(get_binding_lib(binding_type, "bindings"), pkg_dir)
|
||||
|
||||
with (build_dir / "tensorrt_llm" / "deep_ep" /
|
||||
"cuda_architectures.txt").open() as f:
|
||||
deep_ep_cuda_architectures = f.read().strip().strip(";")
|
||||
if deep_ep_cuda_architectures:
|
||||
install_file(get_pybind_lib("deep_ep", "deep_ep_cpp_tllm"), pkg_dir)
|
||||
install_file(get_binding_lib("deep_ep", "deep_ep_cpp_tllm"),
|
||||
pkg_dir)
|
||||
install_tree(build_dir / "tensorrt_llm" / "deep_ep" / "python" /
|
||||
"deep_ep",
|
||||
deep_ep_dir,
|
||||
@ -651,30 +684,38 @@ def main(*,
|
||||
lib_dir / "nvshmem")
|
||||
if not skip_stubs:
|
||||
with working_directory(project_dir):
|
||||
build_run(f"\"{venv_python}\" -m pip install pybind11-stubgen")
|
||||
if binding_type == "nanobind":
|
||||
build_run(f"\"{venv_python}\" -m pip install nanobind")
|
||||
else:
|
||||
build_run(
|
||||
f"\"{venv_python}\" -m pip install pybind11-stubgen")
|
||||
with working_directory(pkg_dir):
|
||||
if on_windows:
|
||||
stubgen = "stubgen.py"
|
||||
stubgen_contents = """
|
||||
# Loading torch, trt before bindings is required to avoid import errors on windows.
|
||||
# isort: off
|
||||
import torch
|
||||
import tensorrt as trt
|
||||
# isort: on
|
||||
import os
|
||||
import platform
|
||||
if binding_type == "nanobind":
|
||||
print("Windows not yet supported for nanobind stubs")
|
||||
exit(1)
|
||||
else:
|
||||
stubgen = "stubgen.py"
|
||||
stubgen_contents = """
|
||||
# Loading torch, trt before bindings is required to avoid import errors on windows.
|
||||
# isort: off
|
||||
import torch
|
||||
import tensorrt as trt
|
||||
# isort: on
|
||||
import os
|
||||
import platform
|
||||
|
||||
from pybind11_stubgen import main
|
||||
from pybind11_stubgen import main
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Load dlls from `libs` directory before launching bindings.
|
||||
if platform.system() == "Windows":
|
||||
os.add_dll_directory(r\"{lib_dir}\")
|
||||
main()
|
||||
""".format(lib_dir=lib_dir)
|
||||
(pkg_dir / stubgen).write_text(dedent(stubgen_contents))
|
||||
build_run(f"\"{venv_python}\" {stubgen} -o . bindings")
|
||||
(pkg_dir / stubgen).unlink()
|
||||
if __name__ == "__main__":
|
||||
# Load dlls from `libs` directory before launching bindings.
|
||||
if platform.system() == "Windows":
|
||||
os.add_dll_directory(r\"{lib_dir}\")
|
||||
main()
|
||||
""".format(lib_dir=lib_dir)
|
||||
(pkg_dir / stubgen).write_text(dedent(stubgen_contents))
|
||||
build_run(f"\"{venv_python}\" {stubgen} -o . bindings")
|
||||
(pkg_dir / stubgen).unlink()
|
||||
else:
|
||||
env_ld = os.environ.copy()
|
||||
|
||||
@ -702,14 +743,18 @@ def main(*,
|
||||
exit(1)
|
||||
|
||||
env_ld["LD_LIBRARY_PATH"] = new_library_path
|
||||
|
||||
build_run(
|
||||
f"\"{venv_python}\" -m pybind11_stubgen -o . bindings --exit-code",
|
||||
env=env_ld)
|
||||
if deep_ep_cuda_architectures:
|
||||
if binding_type == "nanobind":
|
||||
build_run(
|
||||
f"\"{venv_python}\" -m pybind11_stubgen -o . deep_ep_cpp_tllm --exit-code",
|
||||
f"\"{venv_python}\" -m nanobind.stubgen -m bindings -O .",
|
||||
env=env_ld)
|
||||
else:
|
||||
build_run(
|
||||
f"\"{venv_python}\" -m pybind11_stubgen -o . bindings --exit-code",
|
||||
env=env_ld)
|
||||
if deep_ep_cuda_architectures:
|
||||
build_run(
|
||||
f"\"{venv_python}\" -m pybind11_stubgen -o . deep_ep_cpp_tllm --exit-code",
|
||||
env=env_ld)
|
||||
|
||||
if not skip_building_wheel:
|
||||
if dist_dir is None:
|
||||
@ -820,6 +865,10 @@ def add_arguments(parser: ArgumentParser):
|
||||
"-p",
|
||||
action="store_true",
|
||||
help="(deprecated) Build the python bindings for the C++ runtime.")
|
||||
parser.add_argument("--binding_type",
|
||||
choices=["pybind", "nanobind"],
|
||||
default="pybind",
|
||||
help="Which binding type to build: pybind, nanobind")
|
||||
parser.add_argument("--benchmarks",
|
||||
action="store_true",
|
||||
help="Build the benchmarks for the C++ runtime.")
|
||||
|
||||
6
setup.py
6
setup.py
@ -49,9 +49,9 @@ def parse_requirements(filename: os.PathLike):
|
||||
|
||||
|
||||
def sanity_check():
|
||||
bindings_path = Path(
|
||||
__file__).resolve().parent / "tensorrt_llm" / "bindings"
|
||||
if not bindings_path.exists():
|
||||
tensorrt_llm_path = Path(__file__).resolve().parent / "tensorrt_llm"
|
||||
if not ((tensorrt_llm_path / "bindings").exists() or
|
||||
(tensorrt_llm_path / "bindings.pyi").exists()):
|
||||
raise ImportError(
|
||||
'The `bindings` module does not exist. Please check the package integrity. '
|
||||
'If you are attempting to use the pip development mode (editable installation), '
|
||||
|
||||
Loading…
Reference in New Issue
Block a user