Update examples and readmes

2026-03-14 12:24:36 +08:00 · 2024-11-19 14:31:18 -08:00 · 2024-11-19 14:31:18 -08:00 · ffe7588093
commit ffe7588093
parent 238f142a5c
7 changed files with 143 additions and 107 deletions
--- a/README.md
+++ b/README.md
@ -4,8 +4,9 @@ The Ollama Python library provides the easiest way to integrate Python 3.8+ proj
 ## Prerequisites
- Install [Ollama](https://ollama.com/download)
+- [Ollama](https://ollama.com/download) should be installed and running
- Pull a model: `ollama pull <model>` See [Ollama models](https://ollama.com/models)
+- Pull a model to use with the library: `ollama pull <model>` e.g. `ollama pull llama3.1`
  - See [Ollama models](https://ollama.com/models) for more information on the models available.
 ## Install
@ -16,24 +17,32 @@ pip install ollama
 ## Usage
 ```python
-import ollama
+from ollama import chat
-response = ollama.chat(model='llama3.1', messages=[
+from ollama._types import ChatResponse
 response: ChatResponse = chat(model='llama3.1', messages=[
  {
    'role': 'user',
    'content': 'Why is the sky blue?',
  },
 ])
 print(response['message']['content'])
 # or access fields directly from the response object
 print(response.message.content)
 ```
 See [_types.py](ollama/_types.py) for more information on the response types.
 ## Streaming responses
-Response streaming can be enabled by setting `stream=True`, modifying function calls to return a Python generator where each part is an object in the stream.
+Response streaming can be enabled by setting `stream=True`
 Streaming Tool/Function calling is not yet supported.
 ```python
-import ollama
+from ollama import chat
-stream = ollama.chat(
+stream = chat(
    model='llama3.1',
    messages=[{'role': 'user', 'content': 'Why is the sky blue?'}],
    stream=True,
@ -43,6 +52,62 @@ for chunk in stream:
  print(chunk['message']['content'], end='', flush=True)
 ```
 ## Custom client
 A custom client can be created with the following fields:
 - `host`: The Ollama host (default: `http://localhost:11434`)
 - `timeout`: The timeout for requests (default: `None`)
 - `follow_redirects`: Whether to follow redirects (default: `True`)
 - `headers`: Additional headers to send with requests (default: `{}`)
 ```python
 from ollama import Client
 client = Client()
 # or 
 client = Client(
  host='http://localhost:11434',
  timeout=None,
  follow_redirects=True,
  headers={'x-some-header': 'some-value'}
 )
 response = client.chat(model='llama3.1', messages=[
  {
    'role': 'user',
    'content': 'Why is the sky blue?',
  },
 ])
 ```
 ## Async client
 The `AsyncClient` class is used to make asynchronous requests. It can be configured with the same fields as the `Client` class.
 ```python
 import asyncio
 from ollama import AsyncClient
 async def chat():
  message = {'role': 'user', 'content': 'Why is the sky blue?'}
  response = await AsyncClient().chat(model='llama3.1', messages=[message])
 asyncio.run(chat())
 ```
 Setting `stream=True` modifies functions to return a Python asynchronous generator:
 ```python
 import asyncio
 from ollama import AsyncClient
 async def chat():
  message = {'role': 'user', 'content': 'Why is the sky blue?'}
  async for part in await AsyncClient().chat(model='llama3.1', messages=[message], stream=True):
    print(part['message']['content'], end='', flush=True)
 asyncio.run(chat())
 ```
 ## API
 The Ollama Python library's API is designed around the [Ollama REST API](https://github.com/ollama/ollama/blob/main/docs/api.md)
@ -124,50 +189,6 @@ ollama.embed(model='llama3.1', input=['The sky is blue because of rayleigh scatt
 ollama.ps()
 ```
 ## Custom client
 A custom client can be created with the following fields:
 - `host`: The Ollama host to connect to
 - `timeout`: The timeout for requests
 ```python
 from ollama import Client
 client = Client(host='http://localhost:11434')
 response = client.chat(model='llama3.1', messages=[
  {
    'role': 'user',
    'content': 'Why is the sky blue?',
  },
 ])
 ```
 ## Async client
 ```python
 import asyncio
 from ollama import AsyncClient
 async def chat():
  message = {'role': 'user', 'content': 'Why is the sky blue?'}
  response = await AsyncClient().chat(model='llama3.1', messages=[message])
 asyncio.run(chat())
 ```
 Setting `stream=True` modifies functions to return a Python asynchronous generator:
 ```python
 import asyncio
 from ollama import AsyncClient
 async def chat():
  message = {'role': 'user', 'content': 'Why is the sky blue?'}
  async for part in await AsyncClient().chat(model='llama3.1', messages=[message], stream=True):
    print(part['message']['content'], end='', flush=True)
 asyncio.run(chat())
 ```
 ## Errors
--- a/examples/README.md
+++ b/examples/README.md
@ -9,53 +9,47 @@ cd examples/
 python3 <example>.py
 ```
-### Chat
+### Chat - Chat with a model
- [chat.py](chat.py) - Basic chat with model
+- [chat.py](chat.py)
- [chat-stream.py](chat-stream.py) - Stream chat with model
+- [async-chat.py](async-chat.py)
- [async-chat.py](async-chat.py) - Async chat with model
+- [chat-stream.py](chat-stream.py) - Streamed outputs
-### Generate
+### Generate - Generate text with a model
- [generate.py](generate.py) - Generate text with model
+- [generate.py](generate.py)
- [generate-stream.py](generate-stream.py) - Stream generate text with model
+- [async-generate.py](async-generate.py)
- [async-generate.py](async-generate.py) - Async generate text with model
+- [generate-stream.py](generate-stream.py) - Streamed outputs
-### List
+### Tools/Function Calling - Call a function with a model
- [list.py](list.py) - List all downloaded models and their properties
+- [tools.py](tools.py) - Simple example of Tools/Function Calling
- [async-list.py](async-list.py) - Async list all downloaded models and their properties
+- [async-tools.py](async-tools.py)
-### Fill in the middle
+### Multimodal - Chat with a multimodal model
- [fill-in-middle.py](fill-in-middle.py) - Fill in the middle with model
+- [multimodal_chat.py](multimodal_chat.py)
 - [multimodal_generate.py](multimodal_generate.py)
 ### Multimodal
 - [multimodal.py](multimodal.py) - Multimodal chat with model
-### Pull Progress
+### Ollama List - List all downloaded models and their properties
 - [list.py](list.py)
 - [async-list.py](async-list.py)
 ### Ollama Pull - Pull a model from Ollama
 Requirement: `pip install tqdm`
- [pull-progress.py](pull-progress.py) - Pull progress with model
+- [pull.py](pull.py) 
-### Ollama create (create a model)
+### Ollama Create - Create a model from a Modelfile
 - [create.py](create.py) - Create a model
 ### Ollama ps (show model status - cpu/gpu usage)
 - [ollama-ps.py](ollama-ps.py) - Ollama ps
 ### Tools/Function Calling
 - [tools.py](tools.py) - Simple example of Tools/Function Calling
 - [async-tools.py](async-tools.py) - Async example of Tools/Function Calling
 ## Configuring Clients
 Custom parameters can be passed to the client when initializing:
 ```python
-import ollama
+python create.py <model> <modelfile>
 client = ollama.Client(
  host='http://localhost:11434',
  timeout=10.0, # Default: None
  follow_redirects=True, # Default: True
  headers={'x-some-header': 'some-value'}
 )
 ```
 - [create.py](create.py) 
 See [ollama/docs/modelfile.md](https://github.com/ollama/ollama/blob/main/docs/modelfile.md) for more information on the Modelfile format.
 ### Fill in the middle
 - [fill-in-middle.py](fill-in-middle.py) - Given a prefix and suffix, fill in the middle
 ### Ollama ps - Show model status with CPU/GPU usage
 - [ps.py](ps.py)
 Similarly, the `AsyncClient` class can be configured with the same parameters.
--- a/examples/create.py
+++ b/examples/create.py
@ -8,13 +8,23 @@ if len(args) == 2:
  # create from local file
  path = args[1]
 else:
-  print('usage: python main.py <name> <filepath>')
+  print('usage: python create.py <name> <filepath>')
  sys.exit(1)
 # TODO: update to real Modelfile values
 modelfile = f"""
 FROM {path}
 """
 example_modelfile = """
 FROM llama3.2
 # sets the temperature to 1 [higher is more creative, lower is more coherent]
 PARAMETER temperature 1
 # sets the context window size to 4096, this controls how many tokens the LLM can use as context to generate the next token
 PARAMETER num_ctx 4096
 # sets a custom system message to specify the behavior of the chat assistant
 SYSTEM You are Mario from super mario bros, acting as an assistant.
 """
 for response in create(model=args[0], modelfile=modelfile, stream=True):
  print(response['status'])
--- a/examples/multimodal_chat.py
+++ b/examples/multimodal_chat.py
@ -0,0 +1,17 @@
 from ollama import Client
 client = Client()
 path = ''
 # Passing in wrong path for image error sucks
 response = client.chat(
  model='llama3.2-vision',
  messages=[
    {
      'role': 'user',
      'content': 'What is in this image? Be concise. Respond with the structure {"focal": "...", "subject": "...", "background": "..."}',
      'images': [path],
    }
  ],
 )
 print(response.message.content)
--- a/examples/multimodal_generate.py
+++ b/examples/multimodal_generate.py
--- a/examples/ps.py
+++ b/examples/ps.py
@ -1,5 +1,7 @@
-from ollama import ps, pull, chat
+from ollama import ps, pull
 from ollama._types import ProcessResponse
 # Ensure at least one model is loaded
 response = pull('llama3.1', stream=True)
 progress_states = set()
 for progress in response:
@ -10,22 +12,14 @@ for progress in response:
 print('\n')
 response = chat('llama3.1', messages=[{'role': 'user', 'content': 'Hello!'}])
 print(response['message']['content'])
-print('\n')
+response: ProcessResponse = ps()
 for model in response.models:
  print(f'Model: {model.model}')
  print(f'Digest: {model.digest}')
  print(f'Expires at: {model.expires_at}')
  print(f'Size: {model.size}')
  print(f'Size vram: {model.size_vram}')
  print(f'Details: {model.details}')
-response = ps()
+  print('---' * 10)
 name = response['models'][0]['name']
 size = response['models'][0]['size']
 size_vram = response['models'][0]['size_vram']
 if size == size_vram:
  print(f'{name}: 100% GPU')
 elif not size_vram:
  print(f'{name}: 100% CPU')
 else:
  size_cpu = size - size_vram
  cpu_percent = round(size_cpu / size * 100)
  print(f'{name}: {cpu_percent}% CPU/{100 - cpu_percent}% GPU')
--- a/examples/pull-progress.py
+++ b/examples/pull-progress.py