mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 22:18:36 +08:00
[TRTLLM-7989][infra] Bundle UCX and NIXL libs in the TRTLLM python package (#7766)
Signed-off-by: Bo Deng <deemod@nvidia.com>
This commit is contained in:
parent
d330d0005c
commit
8cf95681e6
@ -558,6 +558,7 @@ if(ENABLE_UCX)
|
||||
find_package(ucxx REQUIRED PATHS ${CMAKE_BINARY_DIR}/ucxx/build
|
||||
NO_DEFAULT_PATH)
|
||||
endif()
|
||||
find_package(NIXL)
|
||||
endif()
|
||||
if(ENABLE_UCX)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DENABLE_UCX=1")
|
||||
|
||||
@ -23,6 +23,16 @@ endif()
|
||||
|
||||
find_package(ucx REQUIRED)
|
||||
|
||||
# Set default NIXL_ROOT if not provided
|
||||
if(NOT NIXL_ROOT)
|
||||
set(NIXL_ROOT
|
||||
"/opt/nvidia/nvda_nixl"
|
||||
CACHE PATH "NIXL installation directory" FORCE)
|
||||
message(STATUS "NIXL_ROOT not set, using default: ${NIXL_ROOT}")
|
||||
else()
|
||||
message(STATUS "Using provided NIXL_ROOT: ${NIXL_ROOT}")
|
||||
endif()
|
||||
|
||||
find_path(NIXL_INCLUDE_DIR nixl.h HINTS ${NIXL_ROOT}/include)
|
||||
|
||||
if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
|
||||
@ -69,5 +79,5 @@ else()
|
||||
message(STATUS "NIXL_LIBRARY: ${NIXL_LIBRARY}")
|
||||
message(STATUS "NIXL_BUILD_LIBRARY: ${NIXL_BUILD_LIBRARY}")
|
||||
message(STATUS "SERDES_LIBRARY: ${SERDES_LIBRARY}")
|
||||
message(FATAL_ERROR "NIXL not found after installation attempt.")
|
||||
unset(NIXL_ROOT CACHE)
|
||||
endif()
|
||||
|
||||
@ -9,13 +9,6 @@
|
||||
# license agreement from NVIDIA CORPORATION or its affiliates is strictly
|
||||
# prohibited.
|
||||
|
||||
if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
|
||||
message(
|
||||
STATUS
|
||||
"The NIXL backend is temporarily unavailable on the aarch64 platform.")
|
||||
unset(NIXL_ROOT)
|
||||
endif()
|
||||
|
||||
if(NIXL_ROOT)
|
||||
find_package(NIXL REQUIRED)
|
||||
# Check if all required packages were found
|
||||
|
||||
@ -16,8 +16,8 @@ AARCH64_TRIPLE = "aarch64-linux-gnu"
|
||||
|
||||
LLM_DOCKER_IMAGE = env.dockerImage
|
||||
|
||||
LLM_DOCKER_IMAGE_12_9 = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202508051130-6090"
|
||||
LLM_SBSA_DOCKER_IMAGE_12_9 = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202508051130-6090"
|
||||
LLM_DOCKER_IMAGE_12_9 = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202509091430-7383"
|
||||
LLM_SBSA_DOCKER_IMAGE_12_9 = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202509091430-7383"
|
||||
|
||||
// Always use x86_64 image for agent
|
||||
AGENT_IMAGE = env.dockerImage.replace("aarch64", "x86_64")
|
||||
|
||||
@ -37,11 +37,11 @@ LLM_DOCKER_IMAGE = env.dockerImage
|
||||
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE = env.wheelDockerImagePy310
|
||||
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE = env.wheelDockerImagePy312
|
||||
|
||||
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE_12_9="urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py310-trt10.11.0.33-skip-tritondevel-202508051130-6090"
|
||||
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE_12_9="urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py312-trt10.11.0.33-skip-tritondevel-202508051130-6090"
|
||||
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE_12_9="urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py310-trt10.11.0.33-skip-tritondevel-202509091430-7383"
|
||||
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE_12_9="urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py312-trt10.11.0.33-skip-tritondevel-202509091430-7383"
|
||||
|
||||
LLM_DOCKER_IMAGE_12_9 = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202508051130-6090"
|
||||
LLM_SBSA_DOCKER_IMAGE_12_9 = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202508051130-6090"
|
||||
LLM_DOCKER_IMAGE_12_9 = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202509091430-7383"
|
||||
LLM_SBSA_DOCKER_IMAGE_12_9 = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202509091430-7383"
|
||||
|
||||
DLFW_IMAGE_12_9 = "urm.nvidia.com/docker/nvidia/pytorch:25.06-py3"
|
||||
|
||||
|
||||
@ -56,6 +56,7 @@ uvicorn
|
||||
setuptools<80
|
||||
ordered-set
|
||||
peft
|
||||
patchelf==0.18.0
|
||||
einops
|
||||
flashinfer-python>=0.3.0
|
||||
opencv-python-headless
|
||||
|
||||
@ -749,6 +749,17 @@ def main(*,
|
||||
build_dir /
|
||||
"tensorrt_llm/executor/cache_transmission/ucx_utils/libtensorrt_llm_ucx_wrapper.so",
|
||||
lib_dir / "libtensorrt_llm_ucx_wrapper.so")
|
||||
build_run(
|
||||
f'patchelf --set-rpath \'$ORIGIN/ucx/\' {lib_dir / "libtensorrt_llm_ucx_wrapper.so"}'
|
||||
)
|
||||
if os.path.exists("/usr/local/ucx"):
|
||||
ucx_dir = lib_dir / "ucx"
|
||||
if ucx_dir.exists():
|
||||
clear_folder(ucx_dir)
|
||||
install_tree("/usr/local/ucx/lib", ucx_dir, dirs_exist_ok=True)
|
||||
build_run(
|
||||
f"find {ucx_dir} -type f -name '*.so*' -exec patchelf --set-rpath \'$ORIGIN:$ORIGIN/ucx:$ORIGIN/../\' {{}} \\;"
|
||||
)
|
||||
if os.path.exists(
|
||||
build_dir /
|
||||
"tensorrt_llm/executor/cache_transmission/nixl_utils/libtensorrt_llm_nixl_wrapper.so"
|
||||
@ -757,6 +768,22 @@ def main(*,
|
||||
build_dir /
|
||||
"tensorrt_llm/executor/cache_transmission/nixl_utils/libtensorrt_llm_nixl_wrapper.so",
|
||||
lib_dir / "libtensorrt_llm_nixl_wrapper.so")
|
||||
build_run(
|
||||
f'patchelf --set-rpath \'$ORIGIN/nixl/\' {lib_dir / "libtensorrt_llm_nixl_wrapper.so"}'
|
||||
)
|
||||
if os.path.exists("/opt/nvidia/nvda_nixl"):
|
||||
nixl_dir = lib_dir / "nixl"
|
||||
if nixl_dir.exists():
|
||||
clear_folder(nixl_dir)
|
||||
nixl_lib_path = "/opt/nvidia/nvda_nixl/lib/x86_64-linux-gnu"
|
||||
if not os.path.exists(nixl_lib_path):
|
||||
nixl_lib_path = "/opt/nvidia/nvda_nixl/lib/aarch64-linux-gnu"
|
||||
if not os.path.exists(nixl_lib_path):
|
||||
nixl_lib_path = "/opt/nvidia/nvda_nixl/lib64"
|
||||
install_tree(nixl_lib_path, nixl_dir, dirs_exist_ok=True)
|
||||
build_run(
|
||||
f"find {nixl_dir} -type f -name '*.so*' -exec patchelf --set-rpath \'$ORIGIN:$ORIGIN/plugins:$ORIGIN/../:$ORIGIN/../ucx/:$ORIGIN/../../ucx/\' {{}} \\;"
|
||||
)
|
||||
install_file(
|
||||
build_dir /
|
||||
"tensorrt_llm/kernels/decoderMaskedMultiheadAttention/libdecoder_attention_0.so",
|
||||
|
||||
6
setup.py
6
setup.py
@ -103,9 +103,9 @@ else:
|
||||
'bin/executorWorker', 'libs/libtensorrt_llm.so', 'libs/libth_common.so',
|
||||
'libs/libnvinfer_plugin_tensorrt_llm.so',
|
||||
'libs/libtensorrt_llm_ucx_wrapper.so', 'libs/libdecoder_attention_0.so',
|
||||
'libs/libtensorrt_llm_nixl_wrapper.so',
|
||||
'libs/libdecoder_attention_1.so', 'libs/nvshmem/License.txt',
|
||||
'libs/nvshmem/nvshmem_bootstrap_uid.so.3',
|
||||
'libs/libtensorrt_llm_nixl_wrapper.so', 'libs/nixl/**/*',
|
||||
'libs/ucx/**/*', 'libs/libdecoder_attention_1.so',
|
||||
'libs/nvshmem/License.txt', 'libs/nvshmem/nvshmem_bootstrap_uid.so.3',
|
||||
'libs/nvshmem/nvshmem_transport_ibgda.so.103', 'bindings.*.so',
|
||||
'deep_ep/LICENSE', 'deep_ep_cpp_tllm.*.so', "include/**/*",
|
||||
'deep_gemm/LICENSE', 'deep_gemm/include/**/*', 'deep_gemm_cpp_tllm.*.so'
|
||||
|
||||
@ -32,3 +32,5 @@ l0_sanity_check:
|
||||
- llmapi/test_llm_examples.py::test_llmapi_runtime
|
||||
- llmapi/test_llm_examples.py::test_llmapi_tensorrt_engine
|
||||
- examples/test_llm_api_with_mpi.py::test_llm_api_single_gpu_with_mpirun[TinyLlama-1.1B-Chat-v1.0]
|
||||
- unittest/others/test_kv_cache_transceiver.py::test_kv_cache_transceiver_single_process[NIXL-mha-ctx_fp16_gen_fp16]
|
||||
- unittest/others/test_kv_cache_transceiver.py::test_kv_cache_transceiver_single_process[UCX-mha-ctx_fp16_gen_fp16]
|
||||
|
||||
@ -64,8 +64,13 @@ def ctx_gen_kv_cache_dtype(request):
|
||||
@pytest.mark.parametrize("attention_type",
|
||||
[AttentionTypeCpp.DEFAULT, AttentionTypeCpp.MLA],
|
||||
ids=["mha", "mla"])
|
||||
@pytest.mark.parametrize("backend", [
|
||||
trtllm.CacheTransceiverBackendType.NIXL,
|
||||
trtllm.CacheTransceiverBackendType.UCX
|
||||
],
|
||||
ids=["NIXL", "UCX"])
|
||||
def test_kv_cache_transceiver_single_process(ctx_gen_kv_cache_dtype,
|
||||
attention_type):
|
||||
attention_type, backend):
|
||||
# Init kv_cache manager and cache transceiver
|
||||
mapping = Mapping(world_size=1, rank=0)
|
||||
ctx_kv_cache_dtype, gen_kv_cache_dtype = ctx_gen_kv_cache_dtype
|
||||
@ -73,8 +78,7 @@ def test_kv_cache_transceiver_single_process(ctx_gen_kv_cache_dtype,
|
||||
kv_cache_manager_gen = create_kv_cache_manager(mapping, gen_kv_cache_dtype)
|
||||
|
||||
cache_transceiver_config = trtllm.CacheTransceiverConfig(
|
||||
backend=trtllm.CacheTransceiverBackendType.DEFAULT,
|
||||
max_tokens_in_buffer=512)
|
||||
backend=backend, max_tokens_in_buffer=512)
|
||||
dist = MPIDist(mapping=mapping)
|
||||
kv_cache_transceiver_ctx = create_kv_cache_transceiver(
|
||||
mapping, dist, kv_cache_manager_ctx, attention_type,
|
||||
|
||||
@ -55,6 +55,7 @@ def test_pip_install():
|
||||
if not os.path.exists("/usr/local/mpi/bin/mpicc"):
|
||||
subprocess.check_call("apt-get -y install libopenmpi-dev", shell=True)
|
||||
|
||||
subprocess.check_call("apt-get -y install libzmq3-dev", shell=True)
|
||||
subprocess.check_call("apt-get -y install python3-pip", shell=True)
|
||||
subprocess.check_call("pip3 install --upgrade pip || true", shell=True)
|
||||
subprocess.check_call("pip3 install --upgrade setuptools || true",
|
||||
|
||||
Loading…
Reference in New Issue
Block a user