From 64c1eb78fff4b7594398e3e8e993f27ffde137b2 Mon Sep 17 00:00:00 2001
From: Parth Sareen <parth.sareen@ollama.com>
Date: Thu, 21 Nov 2024 15:14:59 -0800
Subject: [PATCH] Examples refactor (#329)

* Examples and README updates

---------

Co-authored-by: fujitatomoya <tomoya.fujita825@gmail.com>
Co-authored-by: Michael Yang <mxyng@pm.me>
---
 README.md                                     | 209 ++++++++++--------
 examples/README.md                            |  57 +++++
 examples/async-chat-stream/README.md          |   3 -
 examples/async-chat-stream/main.py            |  59 -----
 examples/async-chat.py                        |  19 ++
 examples/async-generate.py                    |  15 ++
 examples/async-tools.py                       |  78 +++++++
 .../{chat-stream/main.py => chat-stream.py}   |   3 +-
 examples/chat-with-history.py                 |  38 ++++
 examples/{chat/main.py => chat.py}            |   3 +-
 examples/create.py                            |  30 +++
 examples/create/main.py                       |  20 --
 examples/embed.py                             |   4 +
 .../main.py => fill-in-middle.py}             |   0
 .../main.py => generate-stream.py}            |   2 +-
 examples/{generate/main.py => generate.py}    |   2 +-
 examples/list.py                              |  14 ++
 examples/multimodal-chat.py                   |  23 ++
 .../main.py => multimodal-generate.py}        |   0
 examples/ps.py                                |  27 +++
 examples/ps/main.py                           |  31 ---
 examples/pull-progress/README.md              |   9 -
 examples/pull-progress/requirements.txt       |   1 -
 examples/{pull-progress/main.py => pull.py}   |   2 +-
 examples/tools.py                             |  66 ++++++
 examples/tools/README.md                      |   3 -
 examples/tools/main.py                        |  87 --------
 ollama/_client.py                             |   4 +-
 28 files changed, 492 insertions(+), 317 deletions(-)
 create mode 100644 examples/README.md
 delete mode 100644 examples/async-chat-stream/README.md
 delete mode 100644 examples/async-chat-stream/main.py
 create mode 100644 examples/async-chat.py
 create mode 100644 examples/async-generate.py
 create mode 100644 examples/async-tools.py
 rename examples/{chat-stream/main.py => chat-stream.py} (68%)
 create mode 100644 examples/chat-with-history.py
 rename examples/{chat/main.py => chat.py} (75%)
 create mode 100644 examples/create.py
 delete mode 100644 examples/create/main.py
 create mode 100644 examples/embed.py
 rename examples/{fill-in-middle/main.py => fill-in-middle.py} (100%)
 rename examples/{generate-stream/main.py => generate-stream.py} (51%)
 rename examples/{generate/main.py => generate.py} (50%)
 create mode 100644 examples/list.py
 create mode 100644 examples/multimodal-chat.py
 rename examples/{multimodal/main.py => multimodal-generate.py} (100%)
 create mode 100644 examples/ps.py
 delete mode 100644 examples/ps/main.py
 delete mode 100644 examples/pull-progress/README.md
 delete mode 100644 examples/pull-progress/requirements.txt
 rename examples/{pull-progress/main.py => pull.py} (92%)
 create mode 100644 examples/tools.py
 delete mode 100644 examples/tools/README.md
 delete mode 100644 examples/tools/main.py
diff --git a/README.md b/README.md
index e03ea00..454c159 100644
--- a/README.md
+++ b/README.md
@@ -2,6 +2,12 @@
 
 The Ollama Python library provides the easiest way to integrate Python 3.8+ projects with [Ollama](https://github.com/ollama/ollama).
 
+## Prerequisites
+
+- [Ollama](https://ollama.com/download) should be installed and running
+- Pull a model to use with the library: `ollama pull <model>` e.g. `ollama pull llama3.2`
+  - See [Ollama.com](https://ollama.com/search) for more information on the models available.
+
 ## Install
 
 ```sh
@@ -11,25 +17,34 @@ pip install ollama
 ## Usage
 
 ```python
-import ollama
-response = ollama.chat(model='llama3.1', messages=[
+from ollama import chat
+from ollama import ChatResponse
+
+response: ChatResponse = chat(model='llama3.2', messages=[
   {
     'role': 'user',
     'content': 'Why is the sky blue?',
   },
 ])
 print(response['message']['content'])
+# or access fields directly from the response object
+print(response.message.content)
 ```
 
+See [_types.py](ollama/_types.py) for more information on the response types.
+
 ## Streaming responses
 
-Response streaming can be enabled by setting `stream=True`, modifying function calls to return a Python generator where each part is an object in the stream.
+Response streaming can be enabled by setting `stream=True`.
+
+> [!NOTE]
+> Streaming Tool/Function calling is not yet supported.
 
 ```python
-import ollama
+from ollama import chat
 
-stream = ollama.chat(
-    model='llama3.1',
+stream = chat(
+    model='llama3.2',
     messages=[{'role': 'user', 'content': 'Why is the sky blue?'}],
     stream=True,
 )
@@ -38,98 +53,18 @@ for chunk in stream:
   print(chunk['message']['content'], end='', flush=True)
 ```
 
-## API
-
-The Ollama Python library's API is designed around the [Ollama REST API](https://github.com/ollama/ollama/blob/main/docs/api.md)
-
-### Chat
-
-```python
-ollama.chat(model='llama3.1', messages=[{'role': 'user', 'content': 'Why is the sky blue?'}])
-```
-
-### Generate
-
-```python
-ollama.generate(model='llama3.1', prompt='Why is the sky blue?')
-```
-
-### List
-
-```python
-ollama.list()
-```
-
-### Show
-
-```python
-ollama.show('llama3.1')
-```
-
-### Create
-
-```python
-modelfile='''
-FROM llama3.1
-SYSTEM You are mario from super mario bros.
-'''
-
-ollama.create(model='example', modelfile=modelfile)
-```
-
-### Copy
-
-```python
-ollama.copy('llama3.1', 'user/llama3.1')
-```
-
-### Delete
-
-```python
-ollama.delete('llama3.1')
-```
-
-### Pull
-
-```python
-ollama.pull('llama3.1')
-```
-
-### Push
-
-```python
-ollama.push('user/llama3.1')
-```
-
-### Embed
-
-```python
-ollama.embed(model='llama3.1', input='The sky is blue because of rayleigh scattering')
-```
-
-### Embed (batch)
-
-```python
-ollama.embed(model='llama3.1', input=['The sky is blue because of rayleigh scattering', 'Grass is green because of chlorophyll'])
-```
-
-### Ps
-
-```python
-ollama.ps()
-```
-
 ## Custom client
+A custom client can be created by instantiating `Client` or `AsyncClient` from `ollama`.
 
-A custom client can be created with the following fields:
-
-- `host`: The Ollama host to connect to
-- `timeout`: The timeout for requests
+All extra keyword arguments are passed into the [`httpx.Client`](https://www.python-httpx.org/api/#client).
 
 ```python
 from ollama import Client
-client = Client(host='http://localhost:11434')
-response = client.chat(model='llama3.1', messages=[
+client = Client(
+  host='http://localhost:11434',
+  headers={'x-some-header': 'some-value'}
+)
+response = client.chat(model='llama3.2', messages=[
   {
     'role': 'user',
     'content': 'Why is the sky blue?',
@@ -139,13 +74,15 @@ response = client.chat(model='llama3.1', messages=[
 
 ## Async client
 
+The `AsyncClient` class is used to make asynchronous requests. It can be configured with the same fields as the `Client` class.
+
 ```python
 import asyncio
 from ollama import AsyncClient
 
 async def chat():
   message = {'role': 'user', 'content': 'Why is the sky blue?'}
-  response = await AsyncClient().chat(model='llama3.1', messages=[message])
+  response = await AsyncClient().chat(model='llama3.2', messages=[message])
 
 asyncio.run(chat())
 ```
@@ -158,12 +95,94 @@ from ollama import AsyncClient
 
 async def chat():
   message = {'role': 'user', 'content': 'Why is the sky blue?'}
-  async for part in await AsyncClient().chat(model='llama3.1', messages=[message], stream=True):
+  async for part in await AsyncClient().chat(model='llama3.2', messages=[message], stream=True):
     print(part['message']['content'], end='', flush=True)
 
 asyncio.run(chat())
 ```
 
+## API
+
+The Ollama Python library's API is designed around the [Ollama REST API](https://github.com/ollama/ollama/blob/main/docs/api.md)
+
+### Chat
+
+```python
+ollama.chat(model='llama3.2', messages=[{'role': 'user', 'content': 'Why is the sky blue?'}])
+```
+
+### Generate
+
+```python
+ollama.generate(model='llama3.2', prompt='Why is the sky blue?')
+```
+
+### List
+
+```python
+ollama.list()
+```
+
+### Show
+
+```python
+ollama.show('llama3.2')
+```
+
+### Create
+
+```python
+modelfile='''
+FROM llama3.2
+SYSTEM You are mario from super mario bros.
+'''
+
+ollama.create(model='example', modelfile=modelfile)
+```
+
+### Copy
+
+```python
+ollama.copy('llama3.2', 'user/llama3.2')
+```
+
+### Delete
+
+```python
+ollama.delete('llama3.2')
+```
+
+### Pull
+
+```python
+ollama.pull('llama3.2')
+```
+
+### Push
+
+```python
+ollama.push('user/llama3.2')
+```
+
+### Embed
+
+```python
+ollama.embed(model='llama3.2', input='The sky is blue because of rayleigh scattering')
+```
+
+### Embed (batch)
+
+```python
+ollama.embed(model='llama3.2', input=['The sky is blue because of rayleigh scattering', 'Grass is green because of chlorophyll'])
+```
+
+### Ps
+
+```python
+ollama.ps()
+```
+
+
 ## Errors
 
 Errors are raised if requests return an error status or if an error is detected while streaming.
diff --git a/examples/README.md b/examples/README.md
new file mode 100644
index 0000000..a455c60
--- /dev/null
+++ b/examples/README.md
@@ -0,0 +1,57 @@
+# Running Examples
+
+Run the examples in this directory with:
+```sh
+# Run example
+python3 examples/<example>.py
+```
+
+### Chat - Chat with a model
+- [chat.py](chat.py)
+- [async-chat.py](async-chat.py)
+- [chat-stream.py](chat-stream.py) - Streamed outputs
+- [chat-with-history.py](chat-with-history.py) - Chat with model and maintain history of the conversation
+
+
+### Generate - Generate text with a model
+- [generate.py](generate.py)
+- [async-generate.py](async-generate.py)
+- [generate-stream.py](generate-stream.py) - Streamed outputs
+- [fill-in-middle.py](fill-in-middle.py) - Given a prefix and suffix, fill in the middle
+
+
+### Tools/Function Calling - Call a function with a model
+- [tools.py](tools.py) - Simple example of Tools/Function Calling
+- [async-tools.py](async-tools.py)
+
+
+### Multimodal with Images - Chat with a multimodal (image chat) model
+- [multimodal_chat.py](multimodal_chat.py)
+- [multimodal_generate.py](multimodal_generate.py)
+
+
+### Ollama List - List all downloaded models and their properties
+- [list.py](list.py)
+
+
+### Ollama ps - Show model status with CPU/GPU usage
+- [ps.py](ps.py)
+
+
+### Ollama Pull - Pull a model from Ollama
+Requirement: `pip install tqdm`
+- [pull.py](pull.py) 
+
+
+### Ollama Create - Create a model from a Modelfile
+```python
+python create.py <model> <modelfile>
+```
+- [create.py](create.py) 
+
+See [ollama/docs/modelfile.md](https://github.com/ollama/ollama/blob/main/docs/modelfile.md) for more information on the Modelfile format.
+
+
+### Ollama Embed - Generate embeddings with a model
+- [embed.py](embed.py)
+
diff --git a/examples/async-chat-stream/README.md b/examples/async-chat-stream/README.md
deleted file mode 100644
index 611295a..0000000
--- a/examples/async-chat-stream/README.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# async-chat-stream
-
-This example demonstrates how to create a conversation history using an asynchronous Ollama client and the chat endpoint. The streaming response is outputted to `stdout` as well as a TTS if enabled with `--speak` and available. Supported TTS are `say` on macOS and `espeak` on Linux.
diff --git a/examples/async-chat-stream/main.py b/examples/async-chat-stream/main.py
deleted file mode 100644
index 6504776..0000000
--- a/examples/async-chat-stream/main.py
+++ /dev/null
@@ -1,59 +0,0 @@
-import shutil
-import asyncio
-import argparse
-
-import ollama
-
-
-async def speak(speaker, content):
-  if speaker:
-    p = await asyncio.create_subprocess_exec(speaker, content)
-    await p.communicate()
-
-
-async def main():
-  parser = argparse.ArgumentParser()
-  parser.add_argument('--speak', default=False, action='store_true')
-  args = parser.parse_args()
-
-  speaker = None
-  if not args.speak:
-    ...
-  elif say := shutil.which('say'):
-    speaker = say
-  elif (espeak := shutil.which('espeak')) or (espeak := shutil.which('espeak-ng')):
-    speaker = espeak
-
-  client = ollama.AsyncClient()
-
-  messages = []
-
-  while True:
-    if content_in := input('>>> '):
-      messages.append({'role': 'user', 'content': content_in})
-
-      content_out = ''
-      message = {'role': 'assistant', 'content': ''}
-      async for response in await client.chat(model='mistral', messages=messages, stream=True):
-        if response['done']:
-          messages.append(message)
-
-        content = response['message']['content']
-        print(content, end='', flush=True)
-
-        content_out += content
-        if content in ['.', '!', '?', '\n']:
-          await speak(speaker, content_out)
-          content_out = ''
-
-        message['content'] += content
-
-      if content_out:
-        await speak(speaker, content_out)
-      print()
-
-
-try:
-  asyncio.run(main())
-except (KeyboardInterrupt, EOFError):
-  ...
diff --git a/examples/async-chat.py b/examples/async-chat.py
new file mode 100644
index 0000000..81a50d9
--- /dev/null
+++ b/examples/async-chat.py
@@ -0,0 +1,19 @@
+import asyncio
+from ollama import AsyncClient
+
+
+async def main():
+  messages = [
+    {
+      'role': 'user',
+      'content': 'Why is the sky blue?',
+    },
+  ]
+
+  client = AsyncClient()
+  response = await client.chat('llama3.2', messages=messages)
+  print(response['message']['content'])
+
+
+if __name__ == '__main__':
+  asyncio.run(main())
diff --git a/examples/async-generate.py b/examples/async-generate.py
new file mode 100644
index 0000000..0097af1
--- /dev/null
+++ b/examples/async-generate.py
@@ -0,0 +1,15 @@
+import asyncio
+import ollama
+
+
+async def main():
+  client = ollama.AsyncClient()
+  response = await client.generate('llama3.2', 'Why is the sky blue?')
+  print(response['response'])
+
+
+if __name__ == '__main__':
+  try:
+    asyncio.run(main())
+  except KeyboardInterrupt:
+    print('\nGoodbye!')
diff --git a/examples/async-tools.py b/examples/async-tools.py
new file mode 100644
index 0000000..07b3c4a
--- /dev/null
+++ b/examples/async-tools.py
@@ -0,0 +1,78 @@
+import asyncio
+from ollama import ChatResponse
+import ollama
+
+
+def add_two_numbers(a: int, b: int) -> int:
+  """
+  Add two numbers
+
+  Args:
+    a (int): The first number
+    b (int): The second number
+
+  Returns:
+    int: The sum of the two numbers
+  """
+  return a + b
+
+
+def subtract_two_numbers(a: int, b: int) -> int:
+  """
+  Subtract two numbers
+  """
+  return a - b
+
+
+# Tools can still be manually defined and passed into chat
+subtract_two_numbers_tool = {
+  'type': 'function',
+  'function': {
+    'name': 'subtract_two_numbers',
+    'description': 'Subtract two numbers',
+    'parameters': {
+      'type': 'object',
+      'required': ['a', 'b'],
+      'properties': {
+        'a': {'type': 'integer', 'description': 'The first number'},
+        'b': {'type': 'integer', 'description': 'The second number'},
+      },
+    },
+  },
+}
+
+
+async def main():
+  client = ollama.AsyncClient()
+
+  prompt = 'What is three plus one?'
+  print('Prompt:', prompt)
+
+  available_functions = {
+    'add_two_numbers': add_two_numbers,
+    'subtract_two_numbers': subtract_two_numbers,
+  }
+
+  response: ChatResponse = await client.chat(
+    'llama3.1',
+    messages=[{'role': 'user', 'content': prompt}],
+    tools=[add_two_numbers, subtract_two_numbers_tool],
+  )
+
+  if response.message.tool_calls:
+    # There may be multiple tool calls in the response
+    for tool in response.message.tool_calls:
+      # Ensure the function is available, and then call it
+      if function_to_call := available_functions.get(tool.function.name):
+        print('Calling function:', tool.function.name)
+        print('Arguments:', tool.function.arguments)
+        print('Function output:', function_to_call(**tool.function.arguments))
+      else:
+        print('Function', tool.function.name, 'not found')
+
+
+if __name__ == '__main__':
+  try:
+    asyncio.run(main())
+  except KeyboardInterrupt:
+    print('\nGoodbye!')
diff --git a/examples/chat-stream/main.py b/examples/chat-stream.py
similarity index 68%
rename from examples/chat-stream/main.py
rename to examples/chat-stream.py
index 2a57346..cccab01 100644
--- a/examples/chat-stream/main.py
+++ b/examples/chat-stream.py
@@ -8,8 +8,7 @@ messages = [
   },
 ]
 
-for part in chat('mistral', messages=messages, stream=True):
+for part in chat('llama3.2', messages=messages, stream=True):
   print(part['message']['content'], end='', flush=True)
 
-# end with a newline
 print()
diff --git a/examples/chat-with-history.py b/examples/chat-with-history.py
new file mode 100644
index 0000000..e98d15f
--- /dev/null
+++ b/examples/chat-with-history.py
@@ -0,0 +1,38 @@
+from ollama import chat
+
+
+messages = [
+  {
+    'role': 'user',
+    'content': 'Why is the sky blue?',
+  },
+  {
+    'role': 'assistant',
+    'content': "The sky is blue because of the way the Earth's atmosphere scatters sunlight.",
+  },
+  {
+    'role': 'user',
+    'content': 'What is the weather in Tokyo?',
+  },
+  {
+    'role': 'assistant',
+    'content': 'The weather in Tokyo is typically warm and humid during the summer months, with temperatures often exceeding 30°C (86°F). The city experiences a rainy season from June to September, with heavy rainfall and occasional typhoons. Winter is mild, with temperatures rarely dropping below freezing. The city is known for its high-tech and vibrant culture, with many popular tourist attractions such as the Tokyo Tower, Senso-ji Temple, and the bustling Shibuya district.',
+  },
+]
+
+while True:
+  user_input = input('Chat with history: ')
+  response = chat(
+    'llama3.2',
+    messages=messages
+    + [
+      {'role': 'user', 'content': user_input},
+    ],
+  )
+
+  # Add the response to the messages to maintain the history
+  messages.append(
+    {'role': 'user', 'content': user_input},
+    {'role': 'assistant', 'content': response.message.content},
+  )
+  print(response.message.content + '\n')
diff --git a/examples/chat/main.py b/examples/chat.py
similarity index 75%
rename from examples/chat/main.py
rename to examples/chat.py
index 90c5f90..2a30f8a 100644
--- a/examples/chat/main.py
+++ b/examples/chat.py
@@ -1,6 +1,5 @@
 from ollama import chat
 
-
 messages = [
   {
     'role': 'user',
@@ -8,5 +7,5 @@ messages = [
   },
 ]
 
-response = chat('mistral', messages=messages)
+response = chat('llama3.2', messages=messages)
 print(response['message']['content'])
diff --git a/examples/create.py b/examples/create.py
new file mode 100644
index 0000000..d4b5b1f
--- /dev/null
+++ b/examples/create.py
@@ -0,0 +1,30 @@
+import sys
+
+from ollama import create
+
+
+args = sys.argv[1:]
+if len(args) == 2:
+  # create from local file
+  path = args[1]
+else:
+  print('usage: python create.py <name> <filepath>')
+  sys.exit(1)
+
+# TODO: update to real Modelfile values
+modelfile = f"""
+FROM {path}
+"""
+example_modelfile = """
+FROM llama3.2
+# sets the temperature to 1 [higher is more creative, lower is more coherent]
+PARAMETER temperature 1
+# sets the context window size to 4096, this controls how many tokens the LLM can use as context to generate the next token
+PARAMETER num_ctx 4096
+
+# sets a custom system message to specify the behavior of the chat assistant
+SYSTEM You are Mario from super mario bros, acting as an assistant.
+"""
+
+for response in create(model=args[0], modelfile=modelfile, stream=True):
+  print(response['status'])
diff --git a/examples/create/main.py b/examples/create/main.py
deleted file mode 100644
index 0a1161d..0000000
--- a/examples/create/main.py
+++ /dev/null
@@ -1,20 +0,0 @@
-import sys
-
-from ollama import create
-
-
-args = sys.argv[1:]
-if len(args) == 2:
-  # create from local file
-  path = args[1]
-else:
-  print('usage: python main.py <name> <filepath>')
-  sys.exit(1)
-
-# TODO: update to real Modelfile values
-modelfile = f"""
-FROM {path}
-"""
-
-for response in create(model=args[0], modelfile=modelfile, stream=True):
-  print(response['status'])
diff --git a/examples/embed.py b/examples/embed.py
new file mode 100644
index 0000000..5af145e
--- /dev/null
+++ b/examples/embed.py
@@ -0,0 +1,4 @@
+from ollama import embed
+
+response = embed(model='llama3.2', input='Hello, world!')
+print(response['embeddings'])
diff --git a/examples/fill-in-middle/main.py b/examples/fill-in-middle.py
similarity index 100%
rename from examples/fill-in-middle/main.py
rename to examples/fill-in-middle.py
diff --git a/examples/generate-stream/main.py b/examples/generate-stream.py
similarity index 51%
rename from examples/generate-stream/main.py
rename to examples/generate-stream.py
index a24b410..10b7dc7 100644
--- a/examples/generate-stream/main.py
+++ b/examples/generate-stream.py
@@ -1,5 +1,5 @@
 from ollama import generate
 
 
-for part in generate('mistral', 'Why is the sky blue?', stream=True):
+for part in generate('llama3.2', 'Why is the sky blue?', stream=True):
   print(part['response'], end='', flush=True)
diff --git a/examples/generate/main.py b/examples/generate.py
similarity index 50%
rename from examples/generate/main.py
rename to examples/generate.py
index e39e295..1a2311d 100644
--- a/examples/generate/main.py
+++ b/examples/generate.py
@@ -1,5 +1,5 @@
 from ollama import generate
 
 
-response = generate('mistral', 'Why is the sky blue?')
+response = generate('llama3.2', 'Why is the sky blue?')
 print(response['response'])
diff --git a/examples/list.py b/examples/list.py
new file mode 100644
index 0000000..32d4525
--- /dev/null
+++ b/examples/list.py
@@ -0,0 +1,14 @@
+from ollama import list
+from ollama import ListResponse
+
+response: ListResponse = list()
+
+for model in response.models:
+  print('Name:', model.model)
+  print('  Size (MB):', f'{(model.size.real / 1024 / 1024):.2f}')
+  if model.details:
+    print('  Format:', model.details.format)
+    print('  Family:', model.details.family)
+    print('  Parameter Size:', model.details.parameter_size)
+    print('  Quantization Level:', model.details.quantization_level)
+  print('\n')
diff --git a/examples/multimodal-chat.py b/examples/multimodal-chat.py
new file mode 100644
index 0000000..8aff9f4
--- /dev/null
+++ b/examples/multimodal-chat.py
@@ -0,0 +1,23 @@
+from ollama import chat
+# from pathlib import Path
+
+# Pass in the path to the image
+path = input('Please enter the path to the image: ')
+
+# You can also pass in base64 encoded image data
+# img = base64.b64encode(Path(path).read_bytes()).decode()
+# or the raw bytes
+# img = Path(path).read_bytes()
+
+response = chat(
+  model='llama3.2-vision',
+  messages=[
+    {
+      'role': 'user',
+      'content': 'What is in this image? Be concise.',
+      'images': [path],
+    }
+  ],
+)
+
+print(response.message.content)
diff --git a/examples/multimodal/main.py b/examples/multimodal-generate.py
similarity index 100%
rename from examples/multimodal/main.py
rename to examples/multimodal-generate.py
diff --git a/examples/ps.py b/examples/ps.py
new file mode 100644
index 0000000..34d5230
--- /dev/null
+++ b/examples/ps.py
@@ -0,0 +1,27 @@
+from ollama import ps, pull, chat
+from ollama import ProcessResponse
+
+# Ensure at least one model is loaded
+response = pull('llama3.2', stream=True)
+progress_states = set()
+for progress in response:
+  if progress.get('status') in progress_states:
+    continue
+  progress_states.add(progress.get('status'))
+  print(progress.get('status'))
+
+print('\n')
+
+print('Waiting for model to load... \n')
+chat(model='llama3.2', messages=[{'role': 'user', 'content': 'Why is the sky blue?'}])
+
+
+response: ProcessResponse = ps()
+for model in response.models:
+  print('Model: ', model.model)
+  print('  Digest: ', model.digest)
+  print('  Expires at: ', model.expires_at)
+  print('  Size: ', model.size)
+  print('  Size vram: ', model.size_vram)
+  print('  Details: ', model.details)
+  print('\n')
diff --git a/examples/ps/main.py b/examples/ps/main.py
deleted file mode 100644
index 822d09a..0000000
--- a/examples/ps/main.py
+++ /dev/null
@@ -1,31 +0,0 @@
-from ollama import ps, pull, chat
-
-response = pull('mistral', stream=True)
-progress_states = set()
-for progress in response:
-  if progress.get('status') in progress_states:
-    continue
-  progress_states.add(progress.get('status'))
-  print(progress.get('status'))
-
-print('\n')
-
-response = chat('mistral', messages=[{'role': 'user', 'content': 'Hello!'}])
-print(response['message']['content'])
-
-print('\n')
-
-response = ps()
-
-name = response['models'][0]['name']
-size = response['models'][0]['size']
-size_vram = response['models'][0]['size_vram']
-
-if size == size_vram:
-  print(f'{name}: 100% GPU')
-elif not size_vram:
-  print(f'{name}: 100% CPU')
-else:
-  size_cpu = size - size_vram
-  cpu_percent = round(size_cpu / size * 100)
-  print(f'{name}: {cpu_percent}% CPU/{100 - cpu_percent}% GPU')
diff --git a/examples/pull-progress/README.md b/examples/pull-progress/README.md
deleted file mode 100644
index 8a44f60..0000000
--- a/examples/pull-progress/README.md
+++ /dev/null
@@ -1,9 +0,0 @@
-# pull-progress
-
-This example emulates `ollama pull` using the Python library and [`tqdm`](https://tqdm.github.io/).
-
-## Setup
-
-```shell
-pip install -r requirements.txt
-```
diff --git a/examples/pull-progress/requirements.txt b/examples/pull-progress/requirements.txt
deleted file mode 100644
index ae3df91..0000000
--- a/examples/pull-progress/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-tqdm==4.66.1
diff --git a/examples/pull-progress/main.py b/examples/pull.py
similarity index 92%
rename from examples/pull-progress/main.py
rename to examples/pull.py
index 89b2f3a..e24f2e9 100644
--- a/examples/pull-progress/main.py
+++ b/examples/pull.py
@@ -3,7 +3,7 @@ from ollama import pull
 
 
 current_digest, bars = '', {}
-for progress in pull('mistral', stream=True):
+for progress in pull('llama3.2', stream=True):
   digest = progress.get('digest', '')
   if digest != current_digest and current_digest in bars:
     bars[current_digest].close()
diff --git a/examples/tools.py b/examples/tools.py
new file mode 100644
index 0000000..6151cd9
--- /dev/null
+++ b/examples/tools.py
@@ -0,0 +1,66 @@
+from ollama import chat
+from ollama import ChatResponse
+
+
+def add_two_numbers(a: int, b: int) -> int:
+  """
+  Add two numbers
+
+  Args:
+    a (int): The first number
+    b (int): The second number
+
+  Returns:
+    int: The sum of the two numbers
+  """
+  return a + b
+
+
+def subtract_two_numbers(a: int, b: int) -> int:
+  """
+  Subtract two numbers
+  """
+  return a - b
+
+
+# Tools can still be manually defined and passed into chat
+subtract_two_numbers_tool = {
+  'type': 'function',
+  'function': {
+    'name': 'subtract_two_numbers',
+    'description': 'Subtract two numbers',
+    'parameters': {
+      'type': 'object',
+      'required': ['a', 'b'],
+      'properties': {
+        'a': {'type': 'integer', 'description': 'The first number'},
+        'b': {'type': 'integer', 'description': 'The second number'},
+      },
+    },
+  },
+}
+
+prompt = 'What is three plus one?'
+print('Prompt:', prompt)
+
+available_functions = {
+  'add_two_numbers': add_two_numbers,
+  'subtract_two_numbers': subtract_two_numbers,
+}
+
+response: ChatResponse = chat(
+  'llama3.1',
+  messages=[{'role': 'user', 'content': prompt}],
+  tools=[add_two_numbers, subtract_two_numbers_tool],
+)
+
+if response.message.tool_calls:
+  # There may be multiple tool calls in the response
+  for tool in response.message.tool_calls:
+    # Ensure the function is available, and then call it
+    if function_to_call := available_functions.get(tool.function.name):
+      print('Calling function:', tool.function.name)
+      print('Arguments:', tool.function.arguments)
+      print('Function output:', function_to_call(**tool.function.arguments))
+    else:
+      print('Function', tool.function.name, 'not found')
diff --git a/examples/tools/README.md b/examples/tools/README.md
deleted file mode 100644
index 85ca5dd..0000000
--- a/examples/tools/README.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# tools
-
-This example demonstrates how to utilize tool calls with an asynchronous Ollama client and the chat endpoint. 
diff --git a/examples/tools/main.py b/examples/tools/main.py
deleted file mode 100644
index 133b238..0000000
--- a/examples/tools/main.py
+++ /dev/null
@@ -1,87 +0,0 @@
-import json
-import ollama
-import asyncio
-
-
-# Simulates an API call to get flight times
-# In a real application, this would fetch data from a live database or API
-def get_flight_times(departure: str, arrival: str) -> str:
-  flights = {
-    'NYC-LAX': {'departure': '08:00 AM', 'arrival': '11:30 AM', 'duration': '5h 30m'},
-    'LAX-NYC': {'departure': '02:00 PM', 'arrival': '10:30 PM', 'duration': '5h 30m'},
-    'LHR-JFK': {'departure': '10:00 AM', 'arrival': '01:00 PM', 'duration': '8h 00m'},
-    'JFK-LHR': {'departure': '09:00 PM', 'arrival': '09:00 AM', 'duration': '7h 00m'},
-    'CDG-DXB': {'departure': '11:00 AM', 'arrival': '08:00 PM', 'duration': '6h 00m'},
-    'DXB-CDG': {'departure': '03:00 AM', 'arrival': '07:30 AM', 'duration': '7h 30m'},
-  }
-
-  key = f'{departure}-{arrival}'.upper()
-  return json.dumps(flights.get(key, {'error': 'Flight not found'}))
-
-
-async def run(model: str):
-  client = ollama.AsyncClient()
-  # Initialize conversation with a user query
-  messages = [{'role': 'user', 'content': 'What is the flight time from New York (NYC) to Los Angeles (LAX)?'}]
-
-  # First API call: Send the query and function description to the model
-  response = await client.chat(
-    model=model,
-    messages=messages,
-    tools=[
-      {
-        'type': 'function',
-        'function': {
-          'name': 'get_flight_times',
-          'description': 'Get the flight times between two cities',
-          'parameters': {
-            'type': 'object',
-            'properties': {
-              'departure': {
-                'type': 'string',
-                'description': 'The departure city (airport code)',
-              },
-              'arrival': {
-                'type': 'string',
-                'description': 'The arrival city (airport code)',
-              },
-            },
-            'required': ['departure', 'arrival'],
-          },
-        },
-      },
-    ],
-  )
-
-  # Add the model's response to the conversation history
-  messages.append(response['message'])
-
-  # Check if the model decided to use the provided function
-  if not response['message'].get('tool_calls'):
-    print("The model didn't use the function. Its response was:")
-    print(response['message']['content'])
-    return
-
-  # Process function calls made by the model
-  if response['message'].get('tool_calls'):
-    available_functions = {
-      'get_flight_times': get_flight_times,
-    }
-    for tool in response['message']['tool_calls']:
-      function_to_call = available_functions[tool['function']['name']]
-      function_response = function_to_call(tool['function']['arguments']['departure'], tool['function']['arguments']['arrival'])
-      # Add function response to the conversation
-      messages.append(
-        {
-          'role': 'tool',
-          'content': function_response,
-        }
-      )
-
-  # Second API call: Get final response from the model
-  final_response = await client.chat(model=model, messages=messages)
-  print(final_response['message']['content'])
-
-
-# Run the async function
-asyncio.run(run('mistral'))
diff --git a/ollama/_client.py b/ollama/_client.py
index a8a19d3..548f343 100644
--- a/ollama/_client.py
+++ b/ollama/_client.py
@@ -319,7 +319,7 @@ class Client(BaseClient):
         '''
         return a + b
 
-      client.chat(model='llama3.1:8b', tools=[add_two_numbers], messages=[...])
+      client.chat(model='llama3.2', tools=[add_two_numbers], messages=[...])
 
     Raises `RequestError` if a model is not provided.
 
@@ -821,7 +821,7 @@ class AsyncClient(BaseClient):
         '''
         return a + b
 
-      await client.chat(model='llama3.1:8b', tools=[add_two_numbers], messages=[...])
+      await client.chat(model='llama3.2', tools=[add_two_numbers], messages=[...])
 
     Raises `RequestError` if a model is not provided.