mirror of
https://github.com/vllm-project/vllm.git
synced 2026-06-06 00:16:14 +00:00
[KV Connector] Update lmcache kv_offloading_backend to use LMCacheMPConnector (#42865)
Signed-off-by: baoloongmao <baoloongmao@tencent.com>
This commit is contained in:
@@ -6,25 +6,56 @@
|
||||
import pytest
|
||||
|
||||
from vllm.config import CacheConfig, KVTransferConfig, ParallelConfig, VllmConfig
|
||||
from vllm.distributed.kv_transfer.kv_connector.factory import KVConnectorFactory
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
|
||||
class _StubLMCacheMPConnector:
|
||||
"""Stand-in for LMCacheMPConnector used in config-translation tests.
|
||||
|
||||
The real connector module hard-imports the optional ``lmcache`` package
|
||||
at module load time, which is not installed in the cpu_test image. This
|
||||
test only asserts on the connector *name* and the ``extra_config`` dict
|
||||
produced by ``VllmConfig``, never instantiates the connector, so a bare
|
||||
placeholder class is sufficient. Not subclassing ``SupportsHMA`` mirrors
|
||||
the real connector's HMA support (it does not support HMA either)."""
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def stub_lmcache_mp_connector(monkeypatch):
|
||||
"""Replace the lazy loader so VllmConfig.__post_init__ does not import
|
||||
``lmcache_mp_connector`` (and thus ``lmcache``) during config tests."""
|
||||
monkeypatch.setitem(
|
||||
KVConnectorFactory._registry,
|
||||
"LMCacheMPConnector",
|
||||
lambda: _StubLMCacheMPConnector,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"kv_offloading_backend,kv_offloading_size,tp,pp,expected_backend,expected_bytes",
|
||||
[
|
||||
("native", 4.0, 1, 1, "OffloadingConnector", 4.0 * (1 << 30)),
|
||||
# bytes per rank: 8.0 GiB / (2 * 2) = 2.0 GiB
|
||||
("native", 8.0, 2, 2, "OffloadingConnector", 8.0 * (1 << 30)),
|
||||
("lmcache", 4.0, 1, 1, "LMCacheConnectorV1", 4.0),
|
||||
# size per rank: 8.0 GiB / (2 * 2) = 2.0 GiB
|
||||
("lmcache", 8.0, 2, 2, "LMCacheConnectorV1", 2.0),
|
||||
# ``lmcache`` backend now defaults to LMCacheMPConnector. The KV
|
||||
# storage capacity is owned by the standalone LMCache server, so
|
||||
# ``kv_offloading_size`` is intentionally not propagated.
|
||||
("lmcache", 4.0, 1, 1, "LMCacheMPConnector", None),
|
||||
("lmcache", 8.0, 2, 2, "LMCacheMPConnector", None),
|
||||
# When kv_offloading_size is None, offloading is disabled (backend is ignored)
|
||||
("native", None, 1, 1, None, None),
|
||||
],
|
||||
)
|
||||
def test_kv_connector(
|
||||
kv_offloading_backend, kv_offloading_size, tp, pp, expected_backend, expected_bytes
|
||||
stub_lmcache_mp_connector,
|
||||
kv_offloading_backend,
|
||||
kv_offloading_size,
|
||||
tp,
|
||||
pp,
|
||||
expected_backend,
|
||||
expected_bytes,
|
||||
):
|
||||
kv_transfer_config = (
|
||||
KVTransferConfig(kv_connector_extra_config={"existing_key": "existing_value"})
|
||||
@@ -59,10 +90,12 @@ def test_kv_connector(
|
||||
# Existing config should be preserved
|
||||
assert kv_connector_extra_config["existing_key"] == "existing_value"
|
||||
elif kv_offloading_backend == "lmcache":
|
||||
assert kv_connector_extra_config["lmcache.local_cpu"] is True
|
||||
assert kv_connector_extra_config["lmcache.max_local_cpu_size"] == expected_bytes
|
||||
# Existing config should be replaced
|
||||
assert "existing_key" not in kv_connector_extra_config
|
||||
# MP mode does not push lmcache.local_cpu / max_local_cpu_size into
|
||||
# extra config (the LMCache server owns capacity). Pre-existing
|
||||
# extra config entries are preserved as-is.
|
||||
assert "lmcache.local_cpu" not in kv_connector_extra_config
|
||||
assert "lmcache.max_local_cpu_size" not in kv_connector_extra_config
|
||||
assert kv_connector_extra_config["existing_key"] == "existing_value"
|
||||
|
||||
|
||||
def _build_config(
|
||||
|
||||
Reference in New Issue
Block a user