mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
7 lines
245 B
YAML
7 lines
245 B
YAML
# The overlap scheduler for context servers is currently disabled, as it is
|
|
# not yet supported in disaggregated context server architectures.
|
|
disable_overlap_scheduler: True
|
|
cache_transceiver_config:
|
|
backend: UCX
|
|
max_tokens_in_buffer: 2048
|