mirror of
https://github.com/vllm-project/vllm.git
synced 2026-06-06 00:16:14 +00:00
[Refactor] Extract extract_types_from_schema utility from Minimax M2 tool parser (#43025)
Signed-off-by: sfeng33 <4florafeng@gmail.com>
This commit is contained in:
@@ -3,7 +3,10 @@
|
||||
|
||||
import pytest
|
||||
|
||||
from vllm.tool_parsers.utils import coerce_to_schema_type
|
||||
from vllm.tool_parsers.utils import (
|
||||
coerce_to_schema_type,
|
||||
extract_types_from_schema,
|
||||
)
|
||||
|
||||
|
||||
class TestCoerceToSchemaType:
|
||||
@@ -146,3 +149,66 @@ class TestCoerceToSchemaType:
|
||||
|
||||
def test_unrecognized_type_falls_back_to_json(self):
|
||||
assert coerce_to_schema_type("42", "interval") == 42
|
||||
|
||||
|
||||
class TestExtractTypesFromSchema:
|
||||
def test_direct_type_string(self):
|
||||
assert extract_types_from_schema({"type": "string"}) == ["string"]
|
||||
|
||||
def test_direct_type_integer(self):
|
||||
assert extract_types_from_schema({"type": "integer"}) == ["integer"]
|
||||
|
||||
def test_type_array(self):
|
||||
result = set(extract_types_from_schema({"type": ["string", "null"]}))
|
||||
assert result == {"string", "null"}
|
||||
|
||||
def test_anyof(self):
|
||||
schema = {"anyOf": [{"type": "object"}, {"type": "null"}]}
|
||||
result = set(extract_types_from_schema(schema))
|
||||
assert result == {"object", "null"}
|
||||
|
||||
def test_oneof(self):
|
||||
schema = {"oneOf": [{"type": "integer"}, {"type": "string"}]}
|
||||
result = set(extract_types_from_schema(schema))
|
||||
assert result == {"integer", "string"}
|
||||
|
||||
def test_allof(self):
|
||||
schema = {"allOf": [{"type": "object"}]}
|
||||
assert extract_types_from_schema(schema) == ["object"]
|
||||
|
||||
def test_enum_infers_types(self):
|
||||
schema = {"enum": [1, "a", None]}
|
||||
result = set(extract_types_from_schema(schema))
|
||||
assert result == {"integer", "string", "null"}
|
||||
|
||||
def test_enum_with_bool(self):
|
||||
schema = {"enum": [True, False]}
|
||||
assert extract_types_from_schema(schema) == ["boolean"]
|
||||
|
||||
def test_enum_with_float(self):
|
||||
schema = {"enum": [1.5, 2.5]}
|
||||
assert extract_types_from_schema(schema) == ["number"]
|
||||
|
||||
def test_enum_with_list_and_dict(self):
|
||||
schema = {"enum": [[1, 2], {"a": 1}]}
|
||||
result = set(extract_types_from_schema(schema))
|
||||
assert result == {"array", "object"}
|
||||
|
||||
def test_none_schema_defaults_to_string(self):
|
||||
assert extract_types_from_schema(None) == ["string"]
|
||||
|
||||
def test_non_dict_schema_defaults_to_string(self):
|
||||
assert extract_types_from_schema("string") == ["string"]
|
||||
|
||||
def test_empty_dict_defaults_to_string(self):
|
||||
assert extract_types_from_schema({}) == ["string"]
|
||||
|
||||
def test_nested_anyof(self):
|
||||
schema = {
|
||||
"anyOf": [
|
||||
{"anyOf": [{"type": "integer"}, {"type": "null"}]},
|
||||
{"type": "string"},
|
||||
]
|
||||
}
|
||||
result = set(extract_types_from_schema(schema))
|
||||
assert result == {"integer", "null", "string"}
|
||||
|
||||
@@ -4,7 +4,6 @@
|
||||
import json
|
||||
import uuid
|
||||
from collections.abc import Sequence
|
||||
from typing import Any
|
||||
|
||||
import regex as re
|
||||
|
||||
@@ -25,7 +24,11 @@ from vllm.tool_parsers.abstract_tool_parser import (
|
||||
Tool,
|
||||
ToolParser,
|
||||
)
|
||||
from vllm.tool_parsers.utils import coerce_to_schema_type
|
||||
from vllm.tool_parsers.utils import (
|
||||
coerce_to_schema_type,
|
||||
extract_types_from_schema,
|
||||
find_tool_properties,
|
||||
)
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
@@ -87,89 +90,6 @@ class MinimaxM2ToolParser(ToolParser):
|
||||
return name_str[1:-1]
|
||||
return name_str
|
||||
|
||||
def _extract_types_from_schema(self, schema: Any) -> list[str]:
|
||||
"""
|
||||
Extract all possible types from a JSON schema definition.
|
||||
Handles anyOf, oneOf, allOf, type arrays, and enum fields.
|
||||
|
||||
Args:
|
||||
schema: The JSON schema definition for a parameter
|
||||
|
||||
Returns:
|
||||
List of type strings (e.g., ["string", "integer", "null"])
|
||||
"""
|
||||
if schema is None:
|
||||
return ["string"]
|
||||
|
||||
if not isinstance(schema, dict):
|
||||
return ["string"]
|
||||
|
||||
types: set[str] = set()
|
||||
|
||||
# Handle direct "type" field
|
||||
if "type" in schema:
|
||||
type_value = schema["type"]
|
||||
if isinstance(type_value, str):
|
||||
types.add(type_value)
|
||||
elif isinstance(type_value, list):
|
||||
for t in type_value:
|
||||
if isinstance(t, str):
|
||||
types.add(t)
|
||||
|
||||
# Handle enum - infer types from enum values
|
||||
if "enum" in schema and isinstance(schema["enum"], list) and schema["enum"]:
|
||||
for value in schema["enum"]:
|
||||
if value is None:
|
||||
types.add("null")
|
||||
elif isinstance(value, bool):
|
||||
types.add("boolean")
|
||||
elif isinstance(value, int):
|
||||
types.add("integer")
|
||||
elif isinstance(value, float):
|
||||
types.add("number")
|
||||
elif isinstance(value, str):
|
||||
types.add("string")
|
||||
elif isinstance(value, list):
|
||||
types.add("array")
|
||||
elif isinstance(value, dict):
|
||||
types.add("object")
|
||||
|
||||
# Handle anyOf, oneOf, allOf - recursively extract types
|
||||
for choice_field in ("anyOf", "oneOf", "allOf"):
|
||||
if choice_field in schema and isinstance(schema[choice_field], list):
|
||||
for choice in schema[choice_field]:
|
||||
extracted = self._extract_types_from_schema(choice)
|
||||
types.update(extracted)
|
||||
|
||||
# If no types found, default to string
|
||||
if not types:
|
||||
return ["string"]
|
||||
|
||||
return list(types)
|
||||
|
||||
def _get_param_types_from_config(
|
||||
self, param_name: str, param_config: dict
|
||||
) -> list[str]:
|
||||
"""
|
||||
Get parameter types from parameter configuration.
|
||||
Handles anyOf, oneOf, allOf, and direct type definitions.
|
||||
|
||||
Args:
|
||||
param_name: The name of the parameter
|
||||
param_config: The properties dict from the tool schema
|
||||
|
||||
Returns:
|
||||
List of type strings
|
||||
"""
|
||||
if param_name not in param_config:
|
||||
return ["string"]
|
||||
|
||||
param_schema = param_config[param_name]
|
||||
if not isinstance(param_schema, dict):
|
||||
return ["string"]
|
||||
|
||||
return self._extract_types_from_schema(param_schema)
|
||||
|
||||
def _parse_single_invoke(
|
||||
self, invoke_str: str, tools: list | None
|
||||
) -> ToolCall | None:
|
||||
@@ -180,20 +100,7 @@ class MinimaxM2ToolParser(ToolParser):
|
||||
return None
|
||||
|
||||
function_name = self._extract_name(name_match.group(1))
|
||||
|
||||
# Get parameter configuration
|
||||
param_config = {}
|
||||
if tools:
|
||||
for tool in tools:
|
||||
if (
|
||||
hasattr(tool, "function")
|
||||
and tool.function.name == function_name
|
||||
and hasattr(tool.function, "parameters")
|
||||
):
|
||||
params = tool.function.parameters
|
||||
if isinstance(params, dict) and "properties" in params:
|
||||
param_config = params["properties"]
|
||||
break
|
||||
tool_properties = find_tool_properties(tools, function_name)
|
||||
|
||||
# Extract parameters
|
||||
param_dict = {}
|
||||
@@ -202,12 +109,10 @@ class MinimaxM2ToolParser(ToolParser):
|
||||
if param_match:
|
||||
param_name = self._extract_name(param_match.group(1))
|
||||
param_value = param_match.group(2).strip()
|
||||
|
||||
# Get parameter types (supports anyOf/oneOf/allOf)
|
||||
param_type = self._get_param_types_from_config(param_name, param_config)
|
||||
|
||||
# Convert value
|
||||
param_dict[param_name] = coerce_to_schema_type(param_value, param_type)
|
||||
param_types = extract_types_from_schema(
|
||||
tool_properties.get(param_name, {})
|
||||
)
|
||||
param_dict[param_name] = coerce_to_schema_type(param_value, param_types)
|
||||
|
||||
return ToolCall(
|
||||
type="function",
|
||||
|
||||
@@ -450,6 +450,52 @@ def make_valid_python(text: str) -> tuple[str, str] | None:
|
||||
return candidate, added_text
|
||||
|
||||
|
||||
def extract_types_from_schema(schema: Any) -> list[str]:
|
||||
"""Extract all possible type strings from a JSON Schema definition.
|
||||
|
||||
Handles ``type`` (string or list), ``enum`` value inference, and
|
||||
recursive ``anyOf``/``oneOf``/``allOf``. Returns ``["string"]``
|
||||
when no type information can be determined.
|
||||
"""
|
||||
if schema is None or not isinstance(schema, dict):
|
||||
return ["string"]
|
||||
|
||||
types: set[str] = set()
|
||||
|
||||
if "type" in schema:
|
||||
type_value = schema["type"]
|
||||
if isinstance(type_value, str):
|
||||
types.add(type_value)
|
||||
elif isinstance(type_value, list):
|
||||
for t in type_value:
|
||||
if isinstance(t, str):
|
||||
types.add(t)
|
||||
|
||||
if "enum" in schema and isinstance(schema["enum"], list) and schema["enum"]:
|
||||
for value in schema["enum"]:
|
||||
if value is None:
|
||||
types.add("null")
|
||||
elif isinstance(value, bool):
|
||||
types.add("boolean")
|
||||
elif isinstance(value, int):
|
||||
types.add("integer")
|
||||
elif isinstance(value, float):
|
||||
types.add("number")
|
||||
elif isinstance(value, str):
|
||||
types.add("string")
|
||||
elif isinstance(value, list):
|
||||
types.add("array")
|
||||
elif isinstance(value, dict):
|
||||
types.add("object")
|
||||
|
||||
for choice_field in ("anyOf", "oneOf", "allOf"):
|
||||
if choice_field in schema and isinstance(schema[choice_field], list):
|
||||
for choice in schema[choice_field]:
|
||||
types.update(extract_types_from_schema(choice))
|
||||
|
||||
return list(types) if types else ["string"]
|
||||
|
||||
|
||||
_TYPE_ALIASES: dict[str, str] = {
|
||||
"str": "string",
|
||||
"text": "string",
|
||||
|
||||
Reference in New Issue
Block a user