[KV Connector] Update lmcache kv_offloading_backend to use LMCacheMPConnector (#42865)

Signed-off-by: baoloongmao <baoloongmao@tencent.com>
2026-06-06 00:16:14 +00:00 · 2026-06-04 10:23:55 +08:00
parent 0c1e6f63f5
commit b58e082d95
2 changed files with 47 additions and 18 deletions
@@ -6,25 +6,56 @@
 import pytest

 from vllm.config import CacheConfig, KVTransferConfig, ParallelConfig, VllmConfig
+from vllm.distributed.kv_transfer.kv_connector.factory import KVConnectorFactory

 pytestmark = pytest.mark.cpu_test


+class _StubLMCacheMPConnector:
+    """Stand-in for LMCacheMPConnector used in config-translation tests.
+
+    The real connector module hard-imports the optional ``lmcache`` package
+    at module load time, which is not installed in the cpu_test image. This
+    test only asserts on the connector *name* and the ``extra_config`` dict
+    produced by ``VllmConfig``, never instantiates the connector, so a bare
+    placeholder class is sufficient. Not subclassing ``SupportsHMA`` mirrors
+    the real connector's HMA support (it does not support HMA either)."""
+
+
+@pytest.fixture
+def stub_lmcache_mp_connector(monkeypatch):
+    """Replace the lazy loader so VllmConfig.__post_init__ does not import
+    ``lmcache_mp_connector`` (and thus ``lmcache``) during config tests."""
+    monkeypatch.setitem(
+        KVConnectorFactory._registry,
+        "LMCacheMPConnector",
+        lambda: _StubLMCacheMPConnector,
+    )
+
+
@pytest.mark.parametrize(
    "kv_offloading_backend,kv_offloading_size,tp,pp,expected_backend,expected_bytes",
    [
        ("native", 4.0, 1, 1, "OffloadingConnector", 4.0 * (1 << 30)),
        # bytes per rank: 8.0 GiB / (2 * 2) = 2.0 GiB
        ("native", 8.0, 2, 2, "OffloadingConnector", 8.0 * (1 << 30)),
-        ("lmcache", 4.0, 1, 1, "LMCacheConnectorV1", 4.0),
-        # size per rank: 8.0 GiB / (2 * 2) = 2.0 GiB
-        ("lmcache", 8.0, 2, 2, "LMCacheConnectorV1", 2.0),
+        # ``lmcache`` backend now defaults to LMCacheMPConnector. The KV
+        # storage capacity is owned by the standalone LMCache server, so
+        # ``kv_offloading_size`` is intentionally not propagated.
+        ("lmcache", 4.0, 1, 1, "LMCacheMPConnector", None),
+        ("lmcache", 8.0, 2, 2, "LMCacheMPConnector", None),
        # When kv_offloading_size is None, offloading is disabled (backend is ignored)
        ("native", None, 1, 1, None, None),
    ],
 )
 def test_kv_connector(
-    kv_offloading_backend, kv_offloading_size, tp, pp, expected_backend, expected_bytes
+    stub_lmcache_mp_connector,
+    kv_offloading_backend,
+    kv_offloading_size,
+    tp,
+    pp,
+    expected_backend,
+    expected_bytes,
 ):
    kv_transfer_config = (
        KVTransferConfig(kv_connector_extra_config={"existing_key": "existing_value"})
@@ -59,10 +90,12 @@ def test_kv_connector(
        # Existing config should be preserved
        assert kv_connector_extra_config["existing_key"] == "existing_value"
    elif kv_offloading_backend == "lmcache":
-        assert kv_connector_extra_config["lmcache.local_cpu"] is True
-        assert kv_connector_extra_config["lmcache.max_local_cpu_size"] == expected_bytes
-        # Existing config should be replaced
-        assert "existing_key" not in kv_connector_extra_config
+        # MP mode does not push lmcache.local_cpu / max_local_cpu_size into
+        # extra config (the LMCache server owns capacity). Pre-existing
+        # extra config entries are preserved as-is.
+        assert "lmcache.local_cpu" not in kv_connector_extra_config
+        assert "lmcache.max_local_cpu_size" not in kv_connector_extra_config
+        assert kv_connector_extra_config["existing_key"] == "existing_value"


 def _build_config(