[TRTLLM-7964][infra] Set nixl to default cache transceiver backend (#7926)

Signed-off-by: Bo Deng <deemod@nvidia.com>
This commit is contained in:
Bo Deng 2025-10-19 19:24:43 +08:00 committed by GitHub
parent e185173240
commit dd25595ae8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 23 additions and 15 deletions

View File

@ -89,7 +89,7 @@ std::unique_ptr<BaseCacheTransceiver> CacheTransceiverFactory::createCacheTransc
}
else
{
backendType = executor::CacheTransceiverConfig::BackendType::UCX;
backendType = executor::CacheTransceiverConfig::BackendType::NIXL;
}
}
cacheTransceiverConfig.value().setBackendType(backendType);

View File

@ -91,6 +91,17 @@ RUN if [ -f /etc/redhat-release ]; then \
grep "^Name:" METADATA; \
fi
# Install UCX first
COPY docker/common/install_ucx.sh install_ucx.sh
RUN bash ./install_ucx.sh && rm install_ucx.sh
# Install NIXL
COPY docker/common/install_nixl.sh install_nixl.sh
RUN bash ./install_nixl.sh && rm install_nixl.sh
# Install etcd
COPY docker/common/install_etcd.sh install_etcd.sh
RUN bash ./install_etcd.sh && rm install_etcd.sh
FROM ${TRITON_IMAGE}:${TRITON_BASE_TAG} AS triton
@ -111,15 +122,6 @@ COPY docker/common/install_triton.sh \
RUN bash ./install_triton.sh && rm install_triton.sh
# Install UCX first
RUN bash ./install_ucx.sh && rm install_ucx.sh
# Install NIXL
RUN bash ./install_nixl.sh && rm install_nixl.sh
# Install etcd
RUN bash ./install_etcd.sh && rm install_etcd.sh
FROM ${DEVEL_IMAGE} AS wheel
WORKDIR /src/tensorrt_llm
COPY benchmarks benchmarks

View File

@ -106,7 +106,7 @@ cache_transceiver_config:
max_tokens_in_buffer: <int>
```
`backend` specifies the communication backend for transferring the kvCache, valid options include `DEFAULT`,`UCX`, `NIXL`, and `MPI`, the default backend is UCX.
`backend` specifies the communication backend for transferring the kvCache, valid options include `DEFAULT`,`UCX`, `NIXL`, and `MPI`, the default backend is NIXL.
`max_tokens_in_buffer` defines the buffer size for kvCache transfers, it is recommended to set this value greater than or equal to the maximum ISL (Input Sequence Length) of all requests for optimal performance.

View File

@ -22,6 +22,9 @@
pip3 install torch==2.7.1 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128
sudo apt-get -y install libopenmpi-dev
# Optional step: Only required for disagg-serving
sudo apt-get -y install libzmq3-dev
```
PyTorch CUDA 12.8 package is required for supporting NVIDIA Blackwell GPUs and SBSA platform. On prior GPUs or Linux x86_64 platform, this extra installation is not required.

View File

@ -12,7 +12,7 @@ The `trtllm-serve` command supports the `extra-llm-config.yaml` parameter. In th
```yaml
cache_transceiver_config:
# KV cache transmission backend. Valid options include `DEFAULT` (i.e., UCX), `UCX`, `NIXL`.
# KV cache transmission backend. Valid options include `DEFAULT` (i.e., NIXL), `UCX`, `NIXL`.
backend: <str>
# KV cache buffer size. Set it ≥ the maximum ISL (Input Sequence Length) for best performance.
max_tokens_in_buffer: <int>

View File

@ -38,10 +38,10 @@ def create_kv_cache_transceiver(
if cache_transceiver_config.backend == BackendTypeCpp.DEFAULT:
# When cache_transceiver_config.backend is not set, fallback to env_vars settings
# UCX is the default backend
cache_transceiver_config.backend = BackendTypeCpp.UCX
# NIXL is the default backend
cache_transceiver_config.backend = BackendTypeCpp.NIXL
# Ordered by priority
env_vars = [("TRTLLM_USE_NIXL_KVCACHE", BackendTypeCpp.NIXL),
env_vars = [("TRTLLM_USE_UCX_KVCACHE", BackendTypeCpp.UCX),
("TRTLLM_USE_MPI_KVCACHE", BackendTypeCpp.MPI)]
for env_var, be_type in env_vars:
if getenv(env_var) == "1":

View File

@ -426,6 +426,9 @@ def run_disaggregated_test(example_dir,
config_file
]
else:
pytest.skip(
"https://nvbugs/5584607 Ray orchestrator is not supported with NIXL(DEFAULT) cache transceiver backend."
)
with open(config_file, 'r') as f:
config = yaml.safe_load(f)