import os import tempfile import openai import pytest import requests import yaml from utils.llm_data import llm_models_root from ..test_llm import get_model_path from .openai_server import RemoteMMEncoderServer pytestmark = pytest.mark.threadleak(enabled=False) @pytest.fixture(scope="module", ids=["Qwen2.5-VL-3B-Instruct"]) def model_name(): return "Qwen2.5-VL-3B-Instruct" @pytest.fixture(scope="module", params=[True, False], ids=["extra_options", "no_extra_options"]) def extra_encoder_options(request): return request.param @pytest.fixture(scope="module") def temp_extra_encoder_options_file(request): temp_dir = tempfile.gettempdir() temp_file_path = os.path.join(temp_dir, "extra_encoder_options.yaml") try: extra_encoder_options_dict = { "max_batch_size": 8, "max_num_tokens": 16384 } with open(temp_file_path, 'w') as f: yaml.dump(extra_encoder_options_dict, f) yield temp_file_path finally: if os.path.exists(temp_file_path): os.remove(temp_file_path) @pytest.fixture(scope="module") def server(model_name: str, extra_encoder_options: bool, temp_extra_encoder_options_file: str): model_path = get_model_path(model_name) args = ["--max_batch_size", "8"] if extra_encoder_options: args.extend( ["--extra_encoder_options", temp_extra_encoder_options_file]) with RemoteMMEncoderServer(model_path, args) as remote_server: yield remote_server @pytest.fixture(scope="module") def client(server: RemoteMMEncoderServer): return server.get_client() @pytest.fixture(scope="module") def async_client(server: RemoteMMEncoderServer): return server.get_async_client() def test_multimodal_content_mm_encoder(client: openai.OpenAI, model_name: str): content_text = "Describe the natural environment in the image." image_url = str(llm_models_root() / "multimodals" / "test_data" / "seashore.png") messages = [{ "role": "user", "content": [{ "type": "text", "text": content_text }, { "type": "image_url", "image_url": { "url": image_url } }], }] chat_completion = client.chat.completions.create( model=model_name, messages=messages, temperature=0.0, ) assert chat_completion.id is not None assert len(chat_completion.choices) == 1 choice = chat_completion.choices[0] # Verify mm_embedding_handle is present assert hasattr(choice, 'mm_embedding_handle') assert choice.mm_embedding_handle is not None # Verify the handle contains tensor information mm_handle = choice.mm_embedding_handle assert "tensor_size" in mm_handle assert mm_handle["tensor_size"][ 0] == 324 # qwen2.5-vl: 324 tokens for the same image assert mm_handle["tensor_size"][ 1] == 2048 # qwen2.5-vl: hidden_size of the vision encoder def test_health(server: RemoteMMEncoderServer): health_url = server.url_for("health") response = requests.get(health_url) assert response.status_code == 200 def test_models_endpoint(client: openai.OpenAI, model_name: str): models = client.models.list() assert len(models.data) >= 1 model_names = [model.id for model in models.data] # The model name might be transformed, so check if any model contains our base name expected_name = model_name.split('/')[-1] assert any(expected_name in name for name in model_names)