[Refactor] Extract extract_types_from_schema utility from Minimax M2 tool parser (#43025)

Signed-off-by: sfeng33 <4florafeng@gmail.com>
This commit is contained in:
Flora Feng
2026-05-19 11:21:12 -04:00
committed by GitHub
parent 1c6158083a
commit 42b4f1fdf7
3 changed files with 123 additions and 106 deletions
+67 -1
View File
@@ -3,7 +3,10 @@
import pytest
from vllm.tool_parsers.utils import coerce_to_schema_type
from vllm.tool_parsers.utils import (
coerce_to_schema_type,
extract_types_from_schema,
)
class TestCoerceToSchemaType:
@@ -146,3 +149,66 @@ class TestCoerceToSchemaType:
def test_unrecognized_type_falls_back_to_json(self):
assert coerce_to_schema_type("42", "interval") == 42
class TestExtractTypesFromSchema:
def test_direct_type_string(self):
assert extract_types_from_schema({"type": "string"}) == ["string"]
def test_direct_type_integer(self):
assert extract_types_from_schema({"type": "integer"}) == ["integer"]
def test_type_array(self):
result = set(extract_types_from_schema({"type": ["string", "null"]}))
assert result == {"string", "null"}
def test_anyof(self):
schema = {"anyOf": [{"type": "object"}, {"type": "null"}]}
result = set(extract_types_from_schema(schema))
assert result == {"object", "null"}
def test_oneof(self):
schema = {"oneOf": [{"type": "integer"}, {"type": "string"}]}
result = set(extract_types_from_schema(schema))
assert result == {"integer", "string"}
def test_allof(self):
schema = {"allOf": [{"type": "object"}]}
assert extract_types_from_schema(schema) == ["object"]
def test_enum_infers_types(self):
schema = {"enum": [1, "a", None]}
result = set(extract_types_from_schema(schema))
assert result == {"integer", "string", "null"}
def test_enum_with_bool(self):
schema = {"enum": [True, False]}
assert extract_types_from_schema(schema) == ["boolean"]
def test_enum_with_float(self):
schema = {"enum": [1.5, 2.5]}
assert extract_types_from_schema(schema) == ["number"]
def test_enum_with_list_and_dict(self):
schema = {"enum": [[1, 2], {"a": 1}]}
result = set(extract_types_from_schema(schema))
assert result == {"array", "object"}
def test_none_schema_defaults_to_string(self):
assert extract_types_from_schema(None) == ["string"]
def test_non_dict_schema_defaults_to_string(self):
assert extract_types_from_schema("string") == ["string"]
def test_empty_dict_defaults_to_string(self):
assert extract_types_from_schema({}) == ["string"]
def test_nested_anyof(self):
schema = {
"anyOf": [
{"anyOf": [{"type": "integer"}, {"type": "null"}]},
{"type": "string"},
]
}
result = set(extract_types_from_schema(schema))
assert result == {"integer", "null", "string"}
+10 -105
View File
@@ -4,7 +4,6 @@
import json
import uuid
from collections.abc import Sequence
from typing import Any
import regex as re
@@ -25,7 +24,11 @@ from vllm.tool_parsers.abstract_tool_parser import (
Tool,
ToolParser,
)
from vllm.tool_parsers.utils import coerce_to_schema_type
from vllm.tool_parsers.utils import (
coerce_to_schema_type,
extract_types_from_schema,
find_tool_properties,
)
logger = init_logger(__name__)
@@ -87,89 +90,6 @@ class MinimaxM2ToolParser(ToolParser):
return name_str[1:-1]
return name_str
def _extract_types_from_schema(self, schema: Any) -> list[str]:
"""
Extract all possible types from a JSON schema definition.
Handles anyOf, oneOf, allOf, type arrays, and enum fields.
Args:
schema: The JSON schema definition for a parameter
Returns:
List of type strings (e.g., ["string", "integer", "null"])
"""
if schema is None:
return ["string"]
if not isinstance(schema, dict):
return ["string"]
types: set[str] = set()
# Handle direct "type" field
if "type" in schema:
type_value = schema["type"]
if isinstance(type_value, str):
types.add(type_value)
elif isinstance(type_value, list):
for t in type_value:
if isinstance(t, str):
types.add(t)
# Handle enum - infer types from enum values
if "enum" in schema and isinstance(schema["enum"], list) and schema["enum"]:
for value in schema["enum"]:
if value is None:
types.add("null")
elif isinstance(value, bool):
types.add("boolean")
elif isinstance(value, int):
types.add("integer")
elif isinstance(value, float):
types.add("number")
elif isinstance(value, str):
types.add("string")
elif isinstance(value, list):
types.add("array")
elif isinstance(value, dict):
types.add("object")
# Handle anyOf, oneOf, allOf - recursively extract types
for choice_field in ("anyOf", "oneOf", "allOf"):
if choice_field in schema and isinstance(schema[choice_field], list):
for choice in schema[choice_field]:
extracted = self._extract_types_from_schema(choice)
types.update(extracted)
# If no types found, default to string
if not types:
return ["string"]
return list(types)
def _get_param_types_from_config(
self, param_name: str, param_config: dict
) -> list[str]:
"""
Get parameter types from parameter configuration.
Handles anyOf, oneOf, allOf, and direct type definitions.
Args:
param_name: The name of the parameter
param_config: The properties dict from the tool schema
Returns:
List of type strings
"""
if param_name not in param_config:
return ["string"]
param_schema = param_config[param_name]
if not isinstance(param_schema, dict):
return ["string"]
return self._extract_types_from_schema(param_schema)
def _parse_single_invoke(
self, invoke_str: str, tools: list | None
) -> ToolCall | None:
@@ -180,20 +100,7 @@ class MinimaxM2ToolParser(ToolParser):
return None
function_name = self._extract_name(name_match.group(1))
# Get parameter configuration
param_config = {}
if tools:
for tool in tools:
if (
hasattr(tool, "function")
and tool.function.name == function_name
and hasattr(tool.function, "parameters")
):
params = tool.function.parameters
if isinstance(params, dict) and "properties" in params:
param_config = params["properties"]
break
tool_properties = find_tool_properties(tools, function_name)
# Extract parameters
param_dict = {}
@@ -202,12 +109,10 @@ class MinimaxM2ToolParser(ToolParser):
if param_match:
param_name = self._extract_name(param_match.group(1))
param_value = param_match.group(2).strip()
# Get parameter types (supports anyOf/oneOf/allOf)
param_type = self._get_param_types_from_config(param_name, param_config)
# Convert value
param_dict[param_name] = coerce_to_schema_type(param_value, param_type)
param_types = extract_types_from_schema(
tool_properties.get(param_name, {})
)
param_dict[param_name] = coerce_to_schema_type(param_value, param_types)
return ToolCall(
type="function",
+46
View File
@@ -450,6 +450,52 @@ def make_valid_python(text: str) -> tuple[str, str] | None:
return candidate, added_text
def extract_types_from_schema(schema: Any) -> list[str]:
"""Extract all possible type strings from a JSON Schema definition.
Handles ``type`` (string or list), ``enum`` value inference, and
recursive ``anyOf``/``oneOf``/``allOf``. Returns ``["string"]``
when no type information can be determined.
"""
if schema is None or not isinstance(schema, dict):
return ["string"]
types: set[str] = set()
if "type" in schema:
type_value = schema["type"]
if isinstance(type_value, str):
types.add(type_value)
elif isinstance(type_value, list):
for t in type_value:
if isinstance(t, str):
types.add(t)
if "enum" in schema and isinstance(schema["enum"], list) and schema["enum"]:
for value in schema["enum"]:
if value is None:
types.add("null")
elif isinstance(value, bool):
types.add("boolean")
elif isinstance(value, int):
types.add("integer")
elif isinstance(value, float):
types.add("number")
elif isinstance(value, str):
types.add("string")
elif isinstance(value, list):
types.add("array")
elif isinstance(value, dict):
types.add("object")
for choice_field in ("anyOf", "oneOf", "allOf"):
if choice_field in schema and isinstance(schema[choice_field], list):
for choice in schema[choice_field]:
types.update(extract_types_from_schema(choice))
return list(types) if types else ["string"]
_TYPE_ALIASES: dict[str, str] = {
"str": "string",
"text": "string",