[TRTLLM-7989][infra] Bundle UCX and NIXL libs in the TRTLLM python package (#7766)

Signed-off-by: Bo Deng <deemod@nvidia.com>
This commit is contained in:
Bo Deng 2025-09-22 16:43:35 +08:00 committed by GitHub
parent d330d0005c
commit 8cf95681e6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 59 additions and 20 deletions

View File

@ -558,6 +558,7 @@ if(ENABLE_UCX)
find_package(ucxx REQUIRED PATHS ${CMAKE_BINARY_DIR}/ucxx/build
NO_DEFAULT_PATH)
endif()
find_package(NIXL)
endif()
if(ENABLE_UCX)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DENABLE_UCX=1")

View File

@ -23,6 +23,16 @@ endif()
find_package(ucx REQUIRED)
# Set default NIXL_ROOT if not provided
if(NOT NIXL_ROOT)
set(NIXL_ROOT
"/opt/nvidia/nvda_nixl"
CACHE PATH "NIXL installation directory" FORCE)
message(STATUS "NIXL_ROOT not set, using default: ${NIXL_ROOT}")
else()
message(STATUS "Using provided NIXL_ROOT: ${NIXL_ROOT}")
endif()
find_path(NIXL_INCLUDE_DIR nixl.h HINTS ${NIXL_ROOT}/include)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
@ -69,5 +79,5 @@ else()
message(STATUS "NIXL_LIBRARY: ${NIXL_LIBRARY}")
message(STATUS "NIXL_BUILD_LIBRARY: ${NIXL_BUILD_LIBRARY}")
message(STATUS "SERDES_LIBRARY: ${SERDES_LIBRARY}")
message(FATAL_ERROR "NIXL not found after installation attempt.")
unset(NIXL_ROOT CACHE)
endif()

View File

@ -9,13 +9,6 @@
# license agreement from NVIDIA CORPORATION or its affiliates is strictly
# prohibited.
if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
message(
STATUS
"The NIXL backend is temporarily unavailable on the aarch64 platform.")
unset(NIXL_ROOT)
endif()
if(NIXL_ROOT)
find_package(NIXL REQUIRED)
# Check if all required packages were found

View File

@ -16,8 +16,8 @@ AARCH64_TRIPLE = "aarch64-linux-gnu"
LLM_DOCKER_IMAGE = env.dockerImage
LLM_DOCKER_IMAGE_12_9 = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202508051130-6090"
LLM_SBSA_DOCKER_IMAGE_12_9 = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202508051130-6090"
LLM_DOCKER_IMAGE_12_9 = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202509091430-7383"
LLM_SBSA_DOCKER_IMAGE_12_9 = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202509091430-7383"
// Always use x86_64 image for agent
AGENT_IMAGE = env.dockerImage.replace("aarch64", "x86_64")

View File

@ -37,11 +37,11 @@ LLM_DOCKER_IMAGE = env.dockerImage
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE = env.wheelDockerImagePy310
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE = env.wheelDockerImagePy312
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE_12_9="urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py310-trt10.11.0.33-skip-tritondevel-202508051130-6090"
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE_12_9="urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py312-trt10.11.0.33-skip-tritondevel-202508051130-6090"
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE_12_9="urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py310-trt10.11.0.33-skip-tritondevel-202509091430-7383"
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE_12_9="urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py312-trt10.11.0.33-skip-tritondevel-202509091430-7383"
LLM_DOCKER_IMAGE_12_9 = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202508051130-6090"
LLM_SBSA_DOCKER_IMAGE_12_9 = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202508051130-6090"
LLM_DOCKER_IMAGE_12_9 = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202509091430-7383"
LLM_SBSA_DOCKER_IMAGE_12_9 = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202509091430-7383"
DLFW_IMAGE_12_9 = "urm.nvidia.com/docker/nvidia/pytorch:25.06-py3"

View File

@ -56,6 +56,7 @@ uvicorn
setuptools<80
ordered-set
peft
patchelf==0.18.0
einops
flashinfer-python>=0.3.0
opencv-python-headless

View File

@ -749,6 +749,17 @@ def main(*,
build_dir /
"tensorrt_llm/executor/cache_transmission/ucx_utils/libtensorrt_llm_ucx_wrapper.so",
lib_dir / "libtensorrt_llm_ucx_wrapper.so")
build_run(
f'patchelf --set-rpath \'$ORIGIN/ucx/\' {lib_dir / "libtensorrt_llm_ucx_wrapper.so"}'
)
if os.path.exists("/usr/local/ucx"):
ucx_dir = lib_dir / "ucx"
if ucx_dir.exists():
clear_folder(ucx_dir)
install_tree("/usr/local/ucx/lib", ucx_dir, dirs_exist_ok=True)
build_run(
f"find {ucx_dir} -type f -name '*.so*' -exec patchelf --set-rpath \'$ORIGIN:$ORIGIN/ucx:$ORIGIN/../\' {{}} \\;"
)
if os.path.exists(
build_dir /
"tensorrt_llm/executor/cache_transmission/nixl_utils/libtensorrt_llm_nixl_wrapper.so"
@ -757,6 +768,22 @@ def main(*,
build_dir /
"tensorrt_llm/executor/cache_transmission/nixl_utils/libtensorrt_llm_nixl_wrapper.so",
lib_dir / "libtensorrt_llm_nixl_wrapper.so")
build_run(
f'patchelf --set-rpath \'$ORIGIN/nixl/\' {lib_dir / "libtensorrt_llm_nixl_wrapper.so"}'
)
if os.path.exists("/opt/nvidia/nvda_nixl"):
nixl_dir = lib_dir / "nixl"
if nixl_dir.exists():
clear_folder(nixl_dir)
nixl_lib_path = "/opt/nvidia/nvda_nixl/lib/x86_64-linux-gnu"
if not os.path.exists(nixl_lib_path):
nixl_lib_path = "/opt/nvidia/nvda_nixl/lib/aarch64-linux-gnu"
if not os.path.exists(nixl_lib_path):
nixl_lib_path = "/opt/nvidia/nvda_nixl/lib64"
install_tree(nixl_lib_path, nixl_dir, dirs_exist_ok=True)
build_run(
f"find {nixl_dir} -type f -name '*.so*' -exec patchelf --set-rpath \'$ORIGIN:$ORIGIN/plugins:$ORIGIN/../:$ORIGIN/../ucx/:$ORIGIN/../../ucx/\' {{}} \\;"
)
install_file(
build_dir /
"tensorrt_llm/kernels/decoderMaskedMultiheadAttention/libdecoder_attention_0.so",

View File

@ -103,9 +103,9 @@ else:
'bin/executorWorker', 'libs/libtensorrt_llm.so', 'libs/libth_common.so',
'libs/libnvinfer_plugin_tensorrt_llm.so',
'libs/libtensorrt_llm_ucx_wrapper.so', 'libs/libdecoder_attention_0.so',
'libs/libtensorrt_llm_nixl_wrapper.so',
'libs/libdecoder_attention_1.so', 'libs/nvshmem/License.txt',
'libs/nvshmem/nvshmem_bootstrap_uid.so.3',
'libs/libtensorrt_llm_nixl_wrapper.so', 'libs/nixl/**/*',
'libs/ucx/**/*', 'libs/libdecoder_attention_1.so',
'libs/nvshmem/License.txt', 'libs/nvshmem/nvshmem_bootstrap_uid.so.3',
'libs/nvshmem/nvshmem_transport_ibgda.so.103', 'bindings.*.so',
'deep_ep/LICENSE', 'deep_ep_cpp_tllm.*.so', "include/**/*",
'deep_gemm/LICENSE', 'deep_gemm/include/**/*', 'deep_gemm_cpp_tllm.*.so'

View File

@ -32,3 +32,5 @@ l0_sanity_check:
- llmapi/test_llm_examples.py::test_llmapi_runtime
- llmapi/test_llm_examples.py::test_llmapi_tensorrt_engine
- examples/test_llm_api_with_mpi.py::test_llm_api_single_gpu_with_mpirun[TinyLlama-1.1B-Chat-v1.0]
- unittest/others/test_kv_cache_transceiver.py::test_kv_cache_transceiver_single_process[NIXL-mha-ctx_fp16_gen_fp16]
- unittest/others/test_kv_cache_transceiver.py::test_kv_cache_transceiver_single_process[UCX-mha-ctx_fp16_gen_fp16]

View File

@ -64,8 +64,13 @@ def ctx_gen_kv_cache_dtype(request):
@pytest.mark.parametrize("attention_type",
[AttentionTypeCpp.DEFAULT, AttentionTypeCpp.MLA],
ids=["mha", "mla"])
@pytest.mark.parametrize("backend", [
trtllm.CacheTransceiverBackendType.NIXL,
trtllm.CacheTransceiverBackendType.UCX
],
ids=["NIXL", "UCX"])
def test_kv_cache_transceiver_single_process(ctx_gen_kv_cache_dtype,
attention_type):
attention_type, backend):
# Init kv_cache manager and cache transceiver
mapping = Mapping(world_size=1, rank=0)
ctx_kv_cache_dtype, gen_kv_cache_dtype = ctx_gen_kv_cache_dtype
@ -73,8 +78,7 @@ def test_kv_cache_transceiver_single_process(ctx_gen_kv_cache_dtype,
kv_cache_manager_gen = create_kv_cache_manager(mapping, gen_kv_cache_dtype)
cache_transceiver_config = trtllm.CacheTransceiverConfig(
backend=trtllm.CacheTransceiverBackendType.DEFAULT,
max_tokens_in_buffer=512)
backend=backend, max_tokens_in_buffer=512)
dist = MPIDist(mapping=mapping)
kv_cache_transceiver_ctx = create_kv_cache_transceiver(
mapping, dist, kv_cache_manager_ctx, attention_type,

View File

@ -55,6 +55,7 @@ def test_pip_install():
if not os.path.exists("/usr/local/mpi/bin/mpicc"):
subprocess.check_call("apt-get -y install libopenmpi-dev", shell=True)
subprocess.check_call("apt-get -y install libzmq3-dev", shell=True)
subprocess.check_call("apt-get -y install python3-pip", shell=True)
subprocess.check_call("pip3 install --upgrade pip || true", shell=True)
subprocess.check_call("pip3 install --upgrade setuptools || true",