[None][fix] Clean up linking to CUDA stub libraries in build_wheel.py (#6823)

Signed-off-by: Linda-Stadter <57756729+Linda-Stadter@users.noreply.github.com>
Signed-off-by: Martin Marciniszyn Mehringer <11665257+MartinMarciniszyn@users.noreply.github.com>
Co-authored-by: Linda-Stadter <57756729+Linda-Stadter@users.noreply.github.com>
This commit is contained in:
Martin Marciniszyn Mehringer 2025-08-18 08:20:51 -07:00 committed by GitHub
parent 1ce23545fc
commit 425dad01fd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 153 additions and 81 deletions

View File

@ -43,6 +43,7 @@ target_link_libraries(
${Python3_LIBRARIES}
${TORCH_LIBRARIES}
torch_python
CUDA::cuda_driver
${CUDA_NVML_LIB}
th_common)
target_compile_definitions(
@ -54,6 +55,6 @@ if(NOT WIN32)
${TRTLLM_NB_MODULE}
PROPERTIES
LINK_FLAGS
"-Wl,-rpath,'$ORIGIN/libs' -Wl,-rpath,'$ORIGIN/../nvidia/nccl/lib' -Wl,-rpath,'${CUDA_TOOLKIT_ROOT_DIR}/targets/x86_64-linux/lib/stubs' ${AS_NEEDED_FLAG} ${UNDEFINED_FLAG}"
"-Wl,-rpath,'$ORIGIN/libs' -Wl,-rpath,'$ORIGIN/../nvidia/nccl/lib' ${AS_NEEDED_FLAG} ${UNDEFINED_FLAG}"
)
endif()

View File

@ -44,6 +44,7 @@ target_link_libraries(
${Python3_LIBRARIES}
${TORCH_LIBRARIES}
torch_python
CUDA::cuda_driver
${CUDA_NVML_LIB}
th_common)
target_compile_definitions(
@ -55,6 +56,6 @@ if(NOT WIN32)
${TRTLLM_PYBIND_MODULE}
PROPERTIES
LINK_FLAGS
"-Wl,-rpath,'$ORIGIN/libs' -Wl,-rpath,'$ORIGIN/../nvidia/nccl/lib' -Wl,-rpath,'${CUDA_TOOLKIT_ROOT_DIR}/targets/x86_64-linux/lib/stubs' ${AS_NEEDED_FLAG} ${UNDEFINED_FLAG}"
"-Wl,-rpath,'$ORIGIN/libs' -Wl,-rpath,'$ORIGIN/../nvidia/nccl/lib' ${AS_NEEDED_FLAG} ${UNDEFINED_FLAG}"
)
endif()

View File

@ -71,8 +71,9 @@ RUN bash ./install_pytorch.sh $TORCH_INSTALL_TYPE && rm install_pytorch.sh
ENV PYTORCH_CUDA_ALLOC_CONF="garbage_collection_threshold:0.99999"
# Install OpenCV with FFMPEG support
RUN pip3 uninstall -y opencv && rm -rf /usr/local/lib/python3*/dist-packages/cv2/
RUN pip3 install opencv-python-headless --force-reinstall --no-deps --no-cache-dir
RUN pip3 uninstall -y opencv && \
rm -rf /usr/local/lib/python3*/dist-packages/cv2/ && \
pip3 install opencv-python-headless --force-reinstall --no-deps --no-cache-dir
# WARs against security issues inherited from pytorch:25.06
# * https://github.com/advisories/GHSA-8qvm-5x2c-j2w7

View File

@ -16,8 +16,10 @@
import os
import platform
import re
import sys
import sysconfig
import tempfile
import warnings
from argparse import ArgumentParser
from contextlib import contextmanager
@ -27,7 +29,7 @@ from pathlib import Path
from shutil import copy, copytree, rmtree
from subprocess import DEVNULL, CalledProcessError, check_output, run
from textwrap import dedent
from typing import List
from typing import Sequence
try:
from packaging.requirements import Requirement
@ -120,7 +122,8 @@ def create_venv(project_dir: Path):
return venv_prefix
def setup_venv(project_dir: Path, requirements_file: Path, no_venv: bool):
def setup_venv(project_dir: Path, requirements_file: Path,
no_venv: bool) -> tuple[Path, Path]:
"""Creates/updates a venv and installs requirements.
Args:
@ -279,6 +282,139 @@ def generate_fmha_cu(project_dir, venv_python):
os.chdir(project_dir)
def create_cuda_stub_links(cuda_stub_dir: str, missing_libs: list[str]) -> str:
"""
Creates symbolic links for CUDA stub libraries in a temporary directory.
Args:
cuda_stub_dir (str): Path to the directory containing CUDA stubs.
missing_libs: Versioned names of the missing libraries.
Returns:
str: Path to the temporary directory where links were created.
"""
cuda_stub_path = Path(cuda_stub_dir)
if not cuda_stub_path.exists():
raise RuntimeError(
f"CUDA stub directory '{cuda_stub_dir}' does not exist.")
# Create a temporary directory for the symbolic links
temp_dir = tempfile.mkdtemp(prefix="cuda_stub_links_")
temp_dir_path = Path(temp_dir)
version_pattern = r'\.\d+'
for missing_lib in filter(lambda x: re.search(version_pattern, x),
missing_libs):
# Define `so` as the first part of `missing_lib` with trailing '.' and digits removed
so = cuda_stub_path / re.sub(version_pattern, '', missing_lib)
so_versioned = temp_dir_path / missing_lib
# Check if the library exists in the original directory
if so.exists():
try:
# Create the symbolic link in the temporary directory
so_versioned.symlink_to(so)
except OSError as e:
# Clean up the temporary directory on error
rmtree(temp_dir)
raise RuntimeError(
f"Failed to create symbolic link for '{missing_lib}' in temporary directory '{temp_dir}': {e}"
)
else:
warnings.warn(
f"Warning: Source library '{so}' does not exist and was skipped."
)
# Return the path to the temporary directory where the links were created
return str(temp_dir_path)
def check_missing_libs(so_prefix: str) -> list[str]:
result = build_run(f"ldd {so_prefix}.cpython*.so",
capture_output=True,
text=True)
missing = []
for line in result.stdout.splitlines():
if "not found" in line:
lib_name = line.split()[
0] # Extract the library name before "=> not found"
if lib_name not in missing:
missing.append(lib_name)
return missing
def generate_python_stubs_linux(binding_type: str, venv_python: Path,
deep_ep: bool):
is_nanobind = binding_type == "nanobind"
if is_nanobind:
build_run(f"\"{venv_python}\" -m pip install nanobind")
build_run(f"\"{venv_python}\" -m pip install pybind11-stubgen")
env_stub_gen = os.environ.copy()
cuda_home_dir = env_stub_gen.get("CUDA_HOME") or env_stub_gen.get(
"CUDA_PATH") or "/usr/local/cuda"
missing_libs = check_missing_libs("bindings")
cuda_stub_dir = f"{cuda_home_dir}/lib64/stubs"
if missing_libs and Path(cuda_stub_dir).exists():
# Create symbolic links for the CUDA stubs
link_dir = create_cuda_stub_links(cuda_stub_dir, missing_libs)
ld_library_path = env_stub_gen.get("LD_LIBRARY_PATH")
env_stub_gen["LD_LIBRARY_PATH"] = ":".join(
filter(None, [link_dir, cuda_stub_dir, ld_library_path]))
else:
link_dir = None
try:
if is_nanobind:
build_run(f"\"{venv_python}\" -m nanobind.stubgen -m bindings -O .",
env=env_stub_gen)
else:
build_run(
f"\"{venv_python}\" -m pybind11_stubgen -o . bindings --exit-code",
env=env_stub_gen)
build_run(
f"\"{venv_python}\" -m pybind11_stubgen -o . deep_gemm_cpp_tllm --exit-code",
env=env_stub_gen)
if deep_ep:
build_run(
f"\"{venv_python}\" -m pybind11_stubgen -o . deep_ep_cpp_tllm --exit-code",
env=env_stub_gen)
finally:
if link_dir:
rmtree(link_dir)
def generate_python_stubs_windows(binding_type: str, venv_python: Path,
pkg_dir: Path, lib_dir: Path):
if binding_type == "nanobind":
print("Windows not yet supported for nanobind stubs")
exit(1)
else:
build_run(f"\"{venv_python}\" -m pip install pybind11-stubgen")
stubgen = "stubgen.py"
stubgen_contents = """
# Loading torch, trt before bindings is required to avoid import errors on windows.
# isort: off
import torch
import tensorrt as trt
# isort: on
import os
import platform
from pybind11_stubgen import main
if __name__ == "__main__":
# Load dlls from `libs` directory before launching bindings.
if platform.system() == "Windows":
os.add_dll_directory(r\"{lib_dir}\")
main()
""".format(lib_dir=lib_dir)
(pkg_dir / stubgen).write_text(dedent(stubgen_contents))
build_run(f"\"{venv_python}\" {stubgen} -o . bindings")
(pkg_dir / stubgen).unlink()
def main(*,
build_type: str = "Release",
generator: str = "",
@ -286,7 +422,7 @@ def main(*,
dist_dir: Path = None,
cuda_architectures: str = None,
job_count: int = None,
extra_cmake_vars: List[str] = list(),
extra_cmake_vars: Sequence[str] = tuple(),
extra_make_targets: str = "",
trt_root: str = '/usr/local/tensorrt',
nccl_root: str = None,
@ -361,7 +497,7 @@ def main(*,
if on_windows:
# Windows does not support multi-device currently.
extra_cmake_vars.extend(["ENABLE_MULTI_DEVICE=0"])
extra_cmake_vars = list(extra_cmake_vars) + ["ENABLE_MULTI_DEVICE=0"]
# The Ninja CMake generator is used for our Windows build
# (Easier than MSBuild to make compatible with our Docker image)
@ -703,81 +839,14 @@ def main(*,
dirs_exist_ok=True)
if not skip_stubs:
with working_directory(project_dir):
if binding_type == "nanobind":
build_run(f"\"{venv_python}\" -m pip install nanobind")
else:
build_run(
f"\"{venv_python}\" -m pip install pybind11-stubgen")
with working_directory(pkg_dir):
if on_windows:
if binding_type == "nanobind":
print("Windows not yet supported for nanobind stubs")
exit(1)
else:
stubgen = "stubgen.py"
stubgen_contents = """
# Loading torch, trt before bindings is required to avoid import errors on windows.
# isort: off
import torch
import tensorrt as trt
# isort: on
import os
import platform
from pybind11_stubgen import main
if __name__ == "__main__":
# Load dlls from `libs` directory before launching bindings.
if platform.system() == "Windows":
os.add_dll_directory(r\"{lib_dir}\")
main()
""".format(lib_dir=lib_dir)
(pkg_dir / stubgen).write_text(dedent(stubgen_contents))
build_run(f"\"{venv_python}\" {stubgen} -o . bindings")
(pkg_dir / stubgen).unlink()
else:
env_ld = os.environ.copy()
new_library_path = "/usr/local/cuda/compat:/usr/local/cuda/compat/lib:/usr/local/cuda/compat/lib.real"
if 'LD_LIBRARY_PATH' in env_ld:
new_library_path += f":{env_ld['LD_LIBRARY_PATH']}"
result = build_run("find /usr -name *libnvidia-ml.so*",
capture_output=True,
text=True)
assert result.returncode == 0, f"Failed to run find *libnvidia-ml.so*: {result.stderr}"
# Build containers only contain stub version of libnvidia-ml.so and not the real version.
# If real version not in system, we need to create symbolic link to stub version to prevent import errors.
if "libnvidia-ml.so.1" not in result.stdout:
if "libnvidia-ml.so" in result.stdout:
line = result.stdout.splitlines()[0]
path = os.path.dirname(line)
new_library_path += f":{path}"
build_run(f"ln -s {line} {path}/libnvidia-ml.so.1")
else:
print(
f"Failed to find libnvidia-ml.so: {result.stderr}",
file=sys.stderr)
exit(1)
env_ld["LD_LIBRARY_PATH"] = new_library_path
if binding_type == "nanobind":
build_run(
f"\"{venv_python}\" -m nanobind.stubgen -m bindings -O .",
env=env_ld)
else:
build_run(
f"\"{venv_python}\" -m pybind11_stubgen -o . bindings --exit-code",
env=env_ld)
if deep_ep_cuda_architectures:
build_run(
f"\"{venv_python}\" -m pybind11_stubgen -o . deep_ep_cpp_tllm --exit-code",
env=env_ld)
build_run(
f"\"{venv_python}\" -m pybind11_stubgen -o . deep_gemm_cpp_tllm --exit-code",
env=env_ld)
generate_python_stubs_windows(binding_type, venv_python,
pkg_dir, lib_dir)
else: # on linux
generate_python_stubs_linux(
binding_type, venv_python,
bool(deep_ep_cuda_architectures))
if not skip_building_wheel:
if dist_dir is None: