Merge pull request #16 from jmorganca/ollama-host

fix: update OLLAMA_HOST parsing to match ollama CLI
s/base_url/host/
2026-06-19 06:33:24 +00:00 · 2024-01-19 14:09:47 -08:00 · 2024-01-19 14:00:49 -08:00 · 2024-01-19 13:17:42 -08:00 · 2024-01-18 14:01:00 -08:00 · 2024-01-18 11:20:54 -08:00
14 changed files with 154 additions and 9 deletions
@@ -11,6 +11,7 @@ jobs:
    environment: release
    permissions:
      id-token: write
+      contents: write
    steps:
      - uses: actions/checkout@v4
      - run: pipx install poetry
@@ -0,0 +1,3 @@
+# async-chat-stream
+
+This example demonstrates how to create a conversation history using an asynchronous Ollama client and the chat endpoint. The streaming response is outputted to `stdout` as well as a TTS if enabled with `--speak` and available. Supported TTS are `say` on macOS and `espeak` on Linux.
@@ -0,0 +1,59 @@
+import shutil
+import asyncio
+import argparse
+
+import ollama
+
+
+async def speak(speaker, content):
+  if speaker:
+    p = await asyncio.create_subprocess_exec(speaker, content)
+    await p.communicate()
+
+
+async def main():
+  parser = argparse.ArgumentParser()
+  parser.add_argument('--speak', default=False, action='store_true')
+  args = parser.parse_args()
+
+  speaker = None
+  if not args.speak:
+    ...
+  elif say := shutil.which('say'):
+    speaker = say
+  elif (espeak := shutil.which('espeak')) or (espeak := shutil.which('espeak-ng')):
+    speaker = espeak
+
+  client = ollama.AsyncClient()
+
+  messages = []
+
+  while True:
+    if content_in := input('>>> '):
+      messages.append({'role': 'user', 'content': content_in})
+
+      content_out = ''
+      message = {'role': 'assistant', 'content': ''}
+      async for response in await client.chat(model='mistral', messages=messages, stream=True):
+        if response['done']:
+          messages.append(message)
+
+        content = response['message']['content']
+        print(content, end='', flush=True)
+
+        content_out += content
+        if content in ['.', '!', '?', '\n']:
+          await speak(speaker, content_out)
+          content_out = ''
+
+        message['content'] += content
+
+      if content_out:
+        await speak(speaker, content_out)
+      print()
+
+
+try:
+  asyncio.run(main())
+except (KeyboardInterrupt, EOFError):
+  ...
@@ -0,0 +1,9 @@
+# pull-progress
+
+This example emulates `ollama pull` using the Python library and [`tqdm`](https://tqdm.github.io/).
+
+## Setup
+
+```shell
+pip install -r requirements.txt
+```
@@ -0,0 +1,21 @@
+from tqdm import tqdm
+from ollama import pull
+
+
+current_digest, bars = '', {}
+for progress in pull('mistral', stream=True):
+  digest = progress.get('digest', '')
+  if digest != current_digest and current_digest in bars:
+    bars[current_digest].close()
+
+  if not digest:
+    print(progress.get('status'))
+    continue
+
+  if digest not in bars and (total := progress.get('total')):
+    bars[digest] = tqdm(total=total, desc=f'pushing {digest[7:19]}', unit='B', unit_scale=True)
+
+  if completed := progress.get('completed'):
+    bars[digest].update(completed - bars[digest].n)
+
+  current_digest = digest
@@ -0,0 +1 @@
+tqdm==4.66.1
@@ -2,6 +2,7 @@ import os
 import io
 import json
 import httpx
+import urllib.parse
 from os import PathLike
 from pathlib import Path
 from hashlib import sha256
@@ -23,7 +24,7 @@ class BaseClient:
  def __init__(
    self,
    client,
-    base_url: Optional[str] = None,
+    host: Optional[str] = None,
    follow_redirects: bool = True,
    timeout: Any = None,
    **kwargs,
@@ -31,15 +32,12 @@ class BaseClient:
    """
    Creates a httpx client. Default parameters are the same as those defined in httpx
    except for the following:
-
-    - `base_url`: http://127.0.0.1:11434
    - `follow_redirects`: True
    - `timeout`: None
-
    `kwargs` are passed to the httpx client.
    """
    self._client = client(
-      base_url=base_url or os.getenv('OLLAMA_HOST', 'http://127.0.0.1:11434'),
+      base_url=_parse_host(host or os.getenv('OLLAMA_HOST')),
      follow_redirects=follow_redirects,
      timeout=timeout,
      **kwargs,
@@ -47,8 +45,8 @@ class BaseClient:


 class Client(BaseClient):
-  def __init__(self, base_url: Optional[str] = None, **kwargs) -> None:
-    super().__init__(httpx.Client, base_url, **kwargs)
+  def __init__(self, host: Optional[str] = None, **kwargs) -> None:
+    super().__init__(httpx.Client, host, **kwargs)

  def _request(self, method: str, url: str, **kwargs) -> httpx.Response:
    response = self._client.request(method, url, **kwargs)
@@ -308,8 +306,8 @@ class Client(BaseClient):


 class AsyncClient(BaseClient):
-  def __init__(self, base_url: Optional[str] = None, **kwargs) -> None:
-    super().__init__(httpx.AsyncClient, base_url, **kwargs)
+  def __init__(self, host: Optional[str] = None, **kwargs) -> None:
+    super().__init__(httpx.AsyncClient, host, **kwargs)

  async def _request(self, method: str, url: str, **kwargs) -> httpx.Response:
    response = await self._client.request(method, url, **kwargs)
@@ -607,3 +605,53 @@ def _as_bytesio(s: Any) -> Union[io.BytesIO, None]:
  elif isinstance(s, bytes):
    return io.BytesIO(s)
  return None
+
+
+def _parse_host(host: Optional[str]) -> str:
+  """
+  >>> _parse_host(None)
+  'http://127.0.0.1:11434'
+  >>> _parse_host('')
+  'http://127.0.0.1:11434'
+  >>> _parse_host('1.2.3.4')
+  'http://1.2.3.4:11434'
+  >>> _parse_host(':56789')
+  'http://127.0.0.1:56789'
+  >>> _parse_host('1.2.3.4:56789')
+  'http://1.2.3.4:56789'
+  >>> _parse_host('http://1.2.3.4')
+  'http://1.2.3.4:80'
+  >>> _parse_host('https://1.2.3.4')
+  'https://1.2.3.4:443'
+  >>> _parse_host('https://1.2.3.4:56789')
+  'https://1.2.3.4:56789'
+  >>> _parse_host('example.com')
+  'http://example.com:11434'
+  >>> _parse_host('example.com:56789')
+  'http://example.com:56789'
+  >>> _parse_host('http://example.com')
+  'http://example.com:80'
+  >>> _parse_host('https://example.com')
+  'https://example.com:443'
+  >>> _parse_host('https://example.com:56789')
+  'https://example.com:56789'
+  >>> _parse_host('example.com/')
+  'http://example.com:11434'
+  >>> _parse_host('example.com:56789/')
+  'http://example.com:56789'
+  """
+
+  host, port = host or '', 11434
+  scheme, _, hostport = host.partition('://')
+  if not hostport:
+    scheme, hostport = 'http', host
+  elif scheme == 'http':
+    port = 80
+  elif scheme == 'https':
+    port = 443
+
+  split = urllib.parse.urlsplit('://'.join([scheme, hostport]))
+  host = split.hostname or '127.0.0.1'
+  port = split.port or port
+
+  return f'{scheme}://{host}:{port}'
@@ -35,3 +35,6 @@ indent-style = "space"
 [tool.ruff.lint]
 select = ["E", "F", "B"]
 ignore = ["E501"]
+
+[tool.pytest.ini_options]
+addopts = '--doctest-modules --ignore examples'
Author	SHA1	Message	Date
Michael Yang	43d36489b2	Merge pull request #16 from jmorganca/ollama-host fix: update OLLAMA_HOST parsing to match ollama CLI	2024-01-19 14:09:47 -08:00
Michael Yang	3119736eda	s/base_url/host/	2024-01-19 14:00:49 -08:00
Michael Yang	bae245ea63	update pytest configs	2024-01-19 13:17:42 -08:00
Michael Yang	bf46f31936	Merge pull request #15 from jmorganca/mxyng/examples examples	2024-01-18 14:01:00 -08:00
Michael Yang	81e253997d	examples: add pull progress readme	2024-01-18 11:20:54 -08:00
Michael Yang	89c4b54316	examples: add async chat	2024-01-18 11:20:48 -08:00
Michael Yang	8497ffa3d4	examples: rename directories	2024-01-18 11:20:36 -08:00
Michael Yang	af02cb3fba	Merge pull request #13 from jmorganca/mxyng/pull-progress add example for pulling with progress bar	2024-01-17 14:40:22 -08:00
Michael Yang	a01b48814a	add example for pulling with progress bar	2024-01-17 14:37:11 -08:00
Michael Yang	f00c93eab3	Merge pull request #11 from jmorganca/mxyng/ci-publish-perms ci: give publish job content perms	2024-01-16 12:52:30 -08:00
Michael Yang	516fb19076	ci: give publish job content perms	2024-01-16 12:35:39 -08:00