[KV Connector] Update lmcache kv_offloading_backend to use LMCacheMPConnector (#42865)

Signed-off-by: baoloongmao <baoloongmao@tencent.com>
This commit is contained in:
maobaolong
2026-06-04 10:23:55 +08:00
committed by GitHub
parent 0c1e6f63f5
commit b58e082d95
2 changed files with 47 additions and 18 deletions
+41 -8
View File
@@ -6,25 +6,56 @@
import pytest
from vllm.config import CacheConfig, KVTransferConfig, ParallelConfig, VllmConfig
from vllm.distributed.kv_transfer.kv_connector.factory import KVConnectorFactory
pytestmark = pytest.mark.cpu_test
class _StubLMCacheMPConnector:
"""Stand-in for LMCacheMPConnector used in config-translation tests.
The real connector module hard-imports the optional ``lmcache`` package
at module load time, which is not installed in the cpu_test image. This
test only asserts on the connector *name* and the ``extra_config`` dict
produced by ``VllmConfig``, never instantiates the connector, so a bare
placeholder class is sufficient. Not subclassing ``SupportsHMA`` mirrors
the real connector's HMA support (it does not support HMA either)."""
@pytest.fixture
def stub_lmcache_mp_connector(monkeypatch):
"""Replace the lazy loader so VllmConfig.__post_init__ does not import
``lmcache_mp_connector`` (and thus ``lmcache``) during config tests."""
monkeypatch.setitem(
KVConnectorFactory._registry,
"LMCacheMPConnector",
lambda: _StubLMCacheMPConnector,
)
@pytest.mark.parametrize(
"kv_offloading_backend,kv_offloading_size,tp,pp,expected_backend,expected_bytes",
[
("native", 4.0, 1, 1, "OffloadingConnector", 4.0 * (1 << 30)),
# bytes per rank: 8.0 GiB / (2 * 2) = 2.0 GiB
("native", 8.0, 2, 2, "OffloadingConnector", 8.0 * (1 << 30)),
("lmcache", 4.0, 1, 1, "LMCacheConnectorV1", 4.0),
# size per rank: 8.0 GiB / (2 * 2) = 2.0 GiB
("lmcache", 8.0, 2, 2, "LMCacheConnectorV1", 2.0),
# ``lmcache`` backend now defaults to LMCacheMPConnector. The KV
# storage capacity is owned by the standalone LMCache server, so
# ``kv_offloading_size`` is intentionally not propagated.
("lmcache", 4.0, 1, 1, "LMCacheMPConnector", None),
("lmcache", 8.0, 2, 2, "LMCacheMPConnector", None),
# When kv_offloading_size is None, offloading is disabled (backend is ignored)
("native", None, 1, 1, None, None),
],
)
def test_kv_connector(
kv_offloading_backend, kv_offloading_size, tp, pp, expected_backend, expected_bytes
stub_lmcache_mp_connector,
kv_offloading_backend,
kv_offloading_size,
tp,
pp,
expected_backend,
expected_bytes,
):
kv_transfer_config = (
KVTransferConfig(kv_connector_extra_config={"existing_key": "existing_value"})
@@ -59,10 +90,12 @@ def test_kv_connector(
# Existing config should be preserved
assert kv_connector_extra_config["existing_key"] == "existing_value"
elif kv_offloading_backend == "lmcache":
assert kv_connector_extra_config["lmcache.local_cpu"] is True
assert kv_connector_extra_config["lmcache.max_local_cpu_size"] == expected_bytes
# Existing config should be replaced
assert "existing_key" not in kv_connector_extra_config
# MP mode does not push lmcache.local_cpu / max_local_cpu_size into
# extra config (the LMCache server owns capacity). Pre-existing
# extra config entries are preserved as-is.
assert "lmcache.local_cpu" not in kv_connector_extra_config
assert "lmcache.max_local_cpu_size" not in kv_connector_extra_config
assert kv_connector_extra_config["existing_key"] == "existing_value"
def _build_config(