From 0c1e6f63f5608f76f459355cb6e164f093bb807b Mon Sep 17 00:00:00 2001 From: Ted Mostly Date: Thu, 4 Jun 2026 10:22:03 +0800 Subject: [PATCH] =?UTF-8?q?[Bugfix]=20Fix=20VLLMNotFoundError=20when=20usi?= =?UTF-8?q?ng=20LoRA=20adapter=20name=20in=20poolin=E2=80=A6=20(#44410)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ted Mostly --- .../serve/lora/test_serving_models.py | 61 +++++++++++++++++++ vllm/entrypoints/pooling/base/serving.py | 1 + 2 files changed, 62 insertions(+) diff --git a/tests/entrypoints/serve/lora/test_serving_models.py b/tests/entrypoints/serve/lora/test_serving_models.py index ce9fdcc2bfb..0cab3fd42cf 100644 --- a/tests/entrypoints/serve/lora/test_serving_models.py +++ b/tests/entrypoints/serve/lora/test_serving_models.py @@ -6,6 +6,7 @@ from unittest.mock import MagicMock import pytest +from vllm import PoolingParams from vllm.config import ModelConfig from vllm.engine.protocol import EngineClient from vllm.entrypoints.openai.engine.protocol import ( @@ -13,10 +14,13 @@ from vllm.entrypoints.openai.engine.protocol import ( ) from vllm.entrypoints.openai.models.protocol import BaseModelPath from vllm.entrypoints.openai.models.serving import OpenAIServingModels +from vllm.entrypoints.pooling.base.serving import PoolingServingBase +from vllm.entrypoints.pooling.typing import PoolingServeContext from vllm.entrypoints.serve.lora.protocol import ( LoadLoRAAdapterRequest, UnloadLoRAAdapterRequest, ) +from vllm.exceptions import VLLMNotFoundError from vllm.lora.request import LoRARequest MODEL_NAME = "hmellor/tiny-random-LlamaForCausalLM" @@ -130,3 +134,60 @@ async def test_unload_lora_adapter_not_found(): assert isinstance(response, ErrorResponse) assert response.error.type == "NotFoundError" assert response.error.code == HTTPStatus.NOT_FOUND + + +class _ConcretePoolingServing(PoolingServingBase): + """Minimal concrete subclass used only in these unit tests.""" + + request_id_prefix = "test" + + def get_io_processor(self, request): + raise NotImplementedError + + def _build_response(self, ctx): + raise NotImplementedError + + +def _make_pooling_serving(lora_name: str) -> _ConcretePoolingServing: + lora_request = LoRARequest( + lora_name=lora_name, lora_int_id=1, lora_path="/path/to/lora" + ) + mock_models = MagicMock() + mock_models.lora_requests = {lora_name: lora_request} + mock_models.is_base_model.side_effect = lambda name: name == MODEL_NAME + + serving = object.__new__(_ConcretePoolingServing) + serving.models = mock_models + return serving + + +def _make_pooling_ctx(model_name: str) -> PoolingServeContext: + mock_request = MagicMock() + mock_request.model = model_name + return PoolingServeContext( + request=mock_request, + model_name=MODEL_NAME, + request_id="test-id", + pooling_params=PoolingParams(), + ) + + +def test_pooling_maybe_get_adapters_lora_name_sets_lora_request(): + """LoRA adapter name must populate ctx.lora_request without raising.""" + lora_name = "bot-embed-lora" + serving = _make_pooling_serving(lora_name) + ctx = _make_pooling_ctx(lora_name) + + serving._maybe_get_adapters(ctx) + + assert ctx.lora_request is not None + assert ctx.lora_request.lora_name == lora_name + + +def test_pooling_maybe_get_adapters_unknown_model_raises(): + """An unrecognised model name must still raise VLLMNotFoundError.""" + serving = _make_pooling_serving("some-lora") + ctx = _make_pooling_ctx("unknown-model") + + with pytest.raises(VLLMNotFoundError): + serving._maybe_get_adapters(ctx) diff --git a/vllm/entrypoints/pooling/base/serving.py b/vllm/entrypoints/pooling/base/serving.py index 4a9ef4a0628..d44d5f7f734 100644 --- a/vllm/entrypoints/pooling/base/serving.py +++ b/vllm/entrypoints/pooling/base/serving.py @@ -283,6 +283,7 @@ class PoolingServingBase(ABC): request = ctx.request if request.model in self.models.lora_requests: ctx.lora_request = self.models.lora_requests[request.model] + return None # Currently only support default modality specific loras # if we have exactly one lora matched on the request.