fix: Ensure that Python stub generation works against libnvidia-ml stubs (#6188)

Signed-off-by: Martin Marciniszyn Mehringer <11665257+MartinMarciniszyn@users.noreply.github.com>
This commit is contained in:
Martin Marciniszyn Mehringer 2025-08-11 00:18:17 -07:00 committed by GitHub
parent d6ad4a9d5b
commit 9a8195ef88
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 113 additions and 81 deletions

View File

@ -52,6 +52,6 @@ if(NOT WIN32)
${TRTLLM_NB_MODULE}
PROPERTIES
LINK_FLAGS
"-Wl,-rpath,'$ORIGIN/libs' -Wl,-rpath,'$ORIGIN/../nvidia/nccl/lib' -Wl,-rpath,'${CUDA_TOOLKIT_ROOT_DIR}/targets/x86_64-linux/lib/stubs' ${AS_NEEDED_FLAG} ${UNDEFINED_FLAG}"
"-Wl,-rpath,'$ORIGIN/libs' -Wl,-rpath,'$ORIGIN/../nvidia/nccl/lib' ${AS_NEEDED_FLAG} ${UNDEFINED_FLAG}"
)
endif()

View File

@ -53,6 +53,6 @@ if(NOT WIN32)
${TRTLLM_PYBIND_MODULE}
PROPERTIES
LINK_FLAGS
"-Wl,-rpath,'$ORIGIN/libs' -Wl,-rpath,'$ORIGIN/../nvidia/nccl/lib' -Wl,-rpath,'${CUDA_TOOLKIT_ROOT_DIR}/targets/x86_64-linux/lib/stubs' ${AS_NEEDED_FLAG} ${UNDEFINED_FLAG}"
"-Wl,-rpath,'$ORIGIN/libs' -Wl,-rpath,'$ORIGIN/../nvidia/nccl/lib' ${AS_NEEDED_FLAG} ${UNDEFINED_FLAG}"
)
endif()

View File

@ -71,8 +71,9 @@ RUN bash ./install_pytorch.sh $TORCH_INSTALL_TYPE && rm install_pytorch.sh
ENV PYTORCH_CUDA_ALLOC_CONF="garbage_collection_threshold:0.99999"
# Install OpenCV with FFMPEG support
RUN pip3 uninstall -y opencv && rm -rf /usr/local/lib/python3*/dist-packages/cv2/
RUN pip3 install opencv-python-headless --force-reinstall --no-deps --no-cache-dir
RUN pip3 uninstall -y opencv && \
rm -rf /usr/local/lib/python3*/dist-packages/cv2/ && \
pip3 install opencv-python-headless --force-reinstall --no-deps --no-cache-dir
# WARs against security issues inherited from pytorch:25.06
# * https://github.com/advisories/GHSA-8qvm-5x2c-j2w7

View File

@ -27,7 +27,7 @@ from pathlib import Path
from shutil import copy, copytree, rmtree
from subprocess import DEVNULL, CalledProcessError, check_output, run
from textwrap import dedent
from typing import List
from typing import Sequence
try:
from packaging.requirements import Requirement
@ -120,7 +120,8 @@ def create_venv(project_dir: Path):
return venv_prefix
def setup_venv(project_dir: Path, requirements_file: Path, no_venv: bool):
def setup_venv(project_dir: Path, requirements_file: Path,
no_venv: bool) -> tuple[Path, Path]:
"""Creates/updates a venv and installs requirements.
Args:
@ -279,6 +280,103 @@ def generate_fmha_cu(project_dir, venv_python):
os.chdir(project_dir)
def create_cuda_stub_links(cuda_stub_dir: str):
"""
Creates symbolic links for CUDA stub libraries in the provided directory.
Args:
cuda_stub_dir (str): Path to the directory containing CUDA stubs.
"""
cuda_stub_path = Path(cuda_stub_dir)
if not cuda_stub_path.exists():
raise RuntimeError(
f"CUDA stub directory '{cuda_stub_dir}' does not exist.")
shared_objects = ["cuda.so",
"nvidia-ml.so"] # List of shared object names to process.
for lib_name in shared_objects:
# Define the full paths for the library (.so) and its versioned link (.so.1).
so = cuda_stub_path / f"lib{lib_name}" # e.g., libcuda.so
so_versioned = cuda_stub_path / f"lib{lib_name}.1" # e.g., libcuda.so.1
# Check if the library exists and the versioned link does not.
if so.exists() and not so_versioned.exists():
try:
# Attempt to create the symbolic link.
so_versioned.symlink_to(so)
except PermissionError:
# Handle permission errors by attempting to use `sudo` to create the link.
try:
build_run(f"sudo ln -s {str(so)} {str(so_versioned)}")
except CalledProcessError as sudo_error:
print(
f"Failed to create symbolic link even with sudo: {sudo_error}"
)
def generate_python_stubs_linux(binding_type: str, venv_python: Path,
deep_ep: bool):
is_nanobind = binding_type == "nanobind"
package = "nanobind" if is_nanobind else "pybind11-stubgen"
build_run(f"\"{venv_python}\" -m pip install {package}")
env_stub_gen = os.environ.copy()
cuda_home_dir = env_stub_gen.get("CUDA_HOME") or env_stub_gen.get(
"CUDA_PATH") or "/usr/local/cuda"
cuda_stub_dir = f"{cuda_home_dir}/lib64/stubs"
ld_library_path = env_stub_gen.get("LD_LIBRARY_PATH")
if Path(cuda_stub_dir).exists():
# Create symbolic links for the CUDA stubs
create_cuda_stub_links(cuda_stub_dir)
env_stub_gen[
"LD_LIBRARY_PATH"] = f"{ld_library_path}:{cuda_stub_dir}" if ld_library_path else cuda_stub_dir
if is_nanobind:
build_run(f"\"{venv_python}\" -m nanobind.stubgen -m bindings -O .",
env=env_stub_gen)
else:
build_run(
f"\"{venv_python}\" -m pybind11_stubgen -o . bindings --exit-code",
env=env_stub_gen)
build_run(
f"\"{venv_python}\" -m pybind11_stubgen -o . deep_gemm_cpp_tllm --exit-code",
env=env_stub_gen)
if deep_ep:
build_run(
f"\"{venv_python}\" -m pybind11_stubgen -o . deep_ep_cpp_tllm --exit-code",
env=env_stub_gen)
def generate_python_stubs_windows(binding_type: str, venv_python: Path,
pkg_dir: Path, lib_dir: Path):
if binding_type == "nanobind":
print("Windows not yet supported for nanobind stubs")
exit(1)
else:
build_run(f"\"{venv_python}\" -m pip install pybind11-stubgen")
stubgen = "stubgen.py"
stubgen_contents = """
# Loading torch, trt before bindings is required to avoid import errors on windows.
# isort: off
import torch
import tensorrt as trt
# isort: on
import os
import platform
from pybind11_stubgen import main
if __name__ == "__main__":
# Load dlls from `libs` directory before launching bindings.
if platform.system() == "Windows":
os.add_dll_directory(r\"{lib_dir}\")
main()
""".format(lib_dir=lib_dir)
(pkg_dir / stubgen).write_text(dedent(stubgen_contents))
build_run(f"\"{venv_python}\" {stubgen} -o . bindings")
(pkg_dir / stubgen).unlink()
def main(*,
build_type: str = "Release",
generator: str = "",
@ -286,7 +384,7 @@ def main(*,
dist_dir: Path = None,
cuda_architectures: str = None,
job_count: int = None,
extra_cmake_vars: List[str] = list(),
extra_cmake_vars: Sequence[str] = tuple(),
extra_make_targets: str = "",
trt_root: str = '/usr/local/tensorrt',
nccl_root: str = None,
@ -361,7 +459,7 @@ def main(*,
if on_windows:
# Windows does not support multi-device currently.
extra_cmake_vars.extend(["ENABLE_MULTI_DEVICE=0"])
extra_cmake_vars += ["ENABLE_MULTI_DEVICE=0"]
# The Ninja CMake generator is used for our Windows build
# (Easier than MSBuild to make compatible with our Docker image)
@ -703,81 +801,14 @@ def main(*,
dirs_exist_ok=True)
if not skip_stubs:
with working_directory(project_dir):
if binding_type == "nanobind":
build_run(f"\"{venv_python}\" -m pip install nanobind")
else:
build_run(
f"\"{venv_python}\" -m pip install pybind11-stubgen")
with working_directory(pkg_dir):
if on_windows:
if binding_type == "nanobind":
print("Windows not yet supported for nanobind stubs")
exit(1)
else:
stubgen = "stubgen.py"
stubgen_contents = """
# Loading torch, trt before bindings is required to avoid import errors on windows.
# isort: off
import torch
import tensorrt as trt
# isort: on
import os
import platform
from pybind11_stubgen import main
if __name__ == "__main__":
# Load dlls from `libs` directory before launching bindings.
if platform.system() == "Windows":
os.add_dll_directory(r\"{lib_dir}\")
main()
""".format(lib_dir=lib_dir)
(pkg_dir / stubgen).write_text(dedent(stubgen_contents))
build_run(f"\"{venv_python}\" {stubgen} -o . bindings")
(pkg_dir / stubgen).unlink()
else:
env_ld = os.environ.copy()
new_library_path = "/usr/local/cuda/compat:/usr/local/cuda/compat/lib:/usr/local/cuda/compat/lib.real"
if 'LD_LIBRARY_PATH' in env_ld:
new_library_path += f":{env_ld['LD_LIBRARY_PATH']}"
result = build_run("find /usr -name *libnvidia-ml.so*",
capture_output=True,
text=True)
assert result.returncode == 0, f"Failed to run find *libnvidia-ml.so*: {result.stderr}"
# Build containers only contain stub version of libnvidia-ml.so and not the real version.
# If real version not in system, we need to create symbolic link to stub version to prevent import errors.
if "libnvidia-ml.so.1" not in result.stdout:
if "libnvidia-ml.so" in result.stdout:
line = result.stdout.splitlines()[0]
path = os.path.dirname(line)
new_library_path += f":{path}"
build_run(f"ln -s {line} {path}/libnvidia-ml.so.1")
else:
print(
f"Failed to find libnvidia-ml.so: {result.stderr}",
file=sys.stderr)
exit(1)
env_ld["LD_LIBRARY_PATH"] = new_library_path
if binding_type == "nanobind":
build_run(
f"\"{venv_python}\" -m nanobind.stubgen -m bindings -O .",
env=env_ld)
else:
build_run(
f"\"{venv_python}\" -m pybind11_stubgen -o . bindings --exit-code",
env=env_ld)
if deep_ep_cuda_architectures:
build_run(
f"\"{venv_python}\" -m pybind11_stubgen -o . deep_ep_cpp_tllm --exit-code",
env=env_ld)
build_run(
f"\"{venv_python}\" -m pybind11_stubgen -o . deep_gemm_cpp_tllm --exit-code",
env=env_ld)
generate_python_stubs_windows(binding_type, venv_python,
pkg_dir, lib_dir)
else: # on linux
generate_python_stubs_linux(
binding_type, venv_python,
bool(deep_ep_cuda_architectures))
if not skip_building_wheel:
if dist_dir is None: