feat(types): add Audio class and audio field to Message for multimodal support

This commit is contained in:
Ghraven
2026-04-29 12:23:29 +08:00
parent dbccf192ac
commit 75fb01034b
+40
View File
@@ -186,6 +186,34 @@ class Image(BaseModel):
raise ValueError('Invalid image data, expected base64 string or path to image file') from Exception raise ValueError('Invalid image data, expected base64 string or path to image file') from Exception
class Audio(BaseModel):
value: Union[str, bytes, Path]
@model_serializer
def serialize_model(self):
if isinstance(self.value, (Path, bytes)):
return b64encode(self.value.read_bytes() if isinstance(self.value, Path) else self.value).decode()
if isinstance(self.value, str):
try:
if Path(self.value).exists():
return b64encode(Path(self.value).read_bytes()).decode()
except Exception:
# Long base64 string can't be wrapped in Path, so try to treat as base64 string
pass
# String might be a file path, but might not exist
if self.value.split('.')[-1] in ('mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'ogg', 'wav', 'webm'):
raise ValueError(f'File {self.value} does not exist')
try:
# Try to decode to check if it is already base64
b64decode(self.value)
return self.value
except Exception:
raise ValueError('Invalid audio data, expected base64 string or path to audio file') from Exception
class GenerateRequest(BaseGenerateRequest): class GenerateRequest(BaseGenerateRequest):
prompt: Optional[str] = None prompt: Optional[str] = None
'Prompt to generate response from.' 'Prompt to generate response from.'
@@ -327,6 +355,18 @@ class Message(SubscriptableBaseModel):
Valid image formats depend on the model. See the model card for more information. Valid image formats depend on the model. See the model card for more information.
""" """
audio: Optional[Sequence[Audio]] = None
"""
Optional list of audio data for multimodal models.
Valid input types are:
- `str` or path-like object: path to audio file
- `bytes` or bytes-like object: raw audio data
Valid audio formats depend on the model. See the model card for more information.
"""
tool_name: Optional[str] = None tool_name: Optional[str] = None
'Name of the executed tool.' 'Name of the executed tool.'