feat(types): add Audio class and audio field to Message for multimodal support

This commit is contained in:
Ghraven 2026-04-29 12:23:29 +08:00
parent dbccf192ac
commit 75fb01034b

View File

@ -186,6 +186,34 @@ class Image(BaseModel):
raise ValueError('Invalid image data, expected base64 string or path to image file') from Exception
class Audio(BaseModel):
value: Union[str, bytes, Path]
@model_serializer
def serialize_model(self):
if isinstance(self.value, (Path, bytes)):
return b64encode(self.value.read_bytes() if isinstance(self.value, Path) else self.value).decode()
if isinstance(self.value, str):
try:
if Path(self.value).exists():
return b64encode(Path(self.value).read_bytes()).decode()
except Exception:
# Long base64 string can't be wrapped in Path, so try to treat as base64 string
pass
# String might be a file path, but might not exist
if self.value.split('.')[-1] in ('mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'ogg', 'wav', 'webm'):
raise ValueError(f'File {self.value} does not exist')
try:
# Try to decode to check if it is already base64
b64decode(self.value)
return self.value
except Exception:
raise ValueError('Invalid audio data, expected base64 string or path to audio file') from Exception
class GenerateRequest(BaseGenerateRequest):
prompt: Optional[str] = None
'Prompt to generate response from.'
@ -327,6 +355,18 @@ class Message(SubscriptableBaseModel):
Valid image formats depend on the model. See the model card for more information.
"""
audio: Optional[Sequence[Audio]] = None
"""
Optional list of audio data for multimodal models.
Valid input types are:
- `str` or path-like object: path to audio file
- `bytes` or bytes-like object: raw audio data
Valid audio formats depend on the model. See the model card for more information.
"""
tool_name: Optional[str] = None
'Name of the executed tool.'