feat(types): add Audio class and audio field to Message for multimodal support

2026-05-01 11:48:17 +08:00 · 2026-04-29 12:23:29 +08:00 · 2026-04-29 12:23:29 +08:00 · 75fb01034b
commit 75fb01034b
parent dbccf192ac
1 changed files with 40 additions and 0 deletions
--- a/ollama/_types.py
+++ b/ollama/_types.py
@ -186,6 +186,34 @@ class Image(BaseModel):
        raise ValueError('Invalid image data, expected base64 string or path to image file') from Exception


+class Audio(BaseModel):
+  value: Union[str, bytes, Path]
+
+  @model_serializer
+  def serialize_model(self):
+    if isinstance(self.value, (Path, bytes)):
+      return b64encode(self.value.read_bytes() if isinstance(self.value, Path) else self.value).decode()
+
+    if isinstance(self.value, str):
+      try:
+        if Path(self.value).exists():
+          return b64encode(Path(self.value).read_bytes()).decode()
+      except Exception:
+        # Long base64 string can't be wrapped in Path, so try to treat as base64 string
+        pass
+
+      # String might be a file path, but might not exist
+      if self.value.split('.')[-1] in ('mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'ogg', 'wav', 'webm'):
+        raise ValueError(f'File {self.value} does not exist')
+
+      try:
+        # Try to decode to check if it is already base64
+        b64decode(self.value)
+        return self.value
+      except Exception:
+        raise ValueError('Invalid audio data, expected base64 string or path to audio file') from Exception
+
+
 class GenerateRequest(BaseGenerateRequest):
  prompt: Optional[str] = None
  'Prompt to generate response from.'
@ -327,6 +355,18 @@ class Message(SubscriptableBaseModel):
  Valid image formats depend on the model. See the model card for more information.
  """

+  audio: Optional[Sequence[Audio]] = None
+  """
+  Optional list of audio data for multimodal models.
+
+  Valid input types are:
+
+  - `str` or path-like object: path to audio file
+  - `bytes` or bytes-like object: raw audio data
+
+  Valid audio formats depend on the model. See the model card for more information.
+  """
+
  tool_name: Optional[str] = None
  'Name of the executed tool.'