From 64c1eb78fff4b7594398e3e8e993f27ffde137b2 Mon Sep 17 00:00:00 2001 From: Parth Sareen Date: Thu, 21 Nov 2024 15:14:59 -0800 Subject: [PATCH] Examples refactor (#329) * Examples and README updates --------- Co-authored-by: fujitatomoya Co-authored-by: Michael Yang --- README.md | 209 ++++++++++-------- examples/README.md | 57 +++++ examples/async-chat-stream/README.md | 3 - examples/async-chat-stream/main.py | 59 ----- examples/async-chat.py | 19 ++ examples/async-generate.py | 15 ++ examples/async-tools.py | 78 +++++++ .../{chat-stream/main.py => chat-stream.py} | 3 +- examples/chat-with-history.py | 38 ++++ examples/{chat/main.py => chat.py} | 3 +- examples/create.py | 30 +++ examples/create/main.py | 20 -- examples/embed.py | 4 + .../main.py => fill-in-middle.py} | 0 .../main.py => generate-stream.py} | 2 +- examples/{generate/main.py => generate.py} | 2 +- examples/list.py | 14 ++ examples/multimodal-chat.py | 23 ++ .../main.py => multimodal-generate.py} | 0 examples/ps.py | 27 +++ examples/ps/main.py | 31 --- examples/pull-progress/README.md | 9 - examples/pull-progress/requirements.txt | 1 - examples/{pull-progress/main.py => pull.py} | 2 +- examples/tools.py | 66 ++++++ examples/tools/README.md | 3 - examples/tools/main.py | 87 -------- ollama/_client.py | 4 +- 28 files changed, 492 insertions(+), 317 deletions(-) create mode 100644 examples/README.md delete mode 100644 examples/async-chat-stream/README.md delete mode 100644 examples/async-chat-stream/main.py create mode 100644 examples/async-chat.py create mode 100644 examples/async-generate.py create mode 100644 examples/async-tools.py rename examples/{chat-stream/main.py => chat-stream.py} (68%) create mode 100644 examples/chat-with-history.py rename examples/{chat/main.py => chat.py} (75%) create mode 100644 examples/create.py delete mode 100644 examples/create/main.py create mode 100644 examples/embed.py rename examples/{fill-in-middle/main.py => fill-in-middle.py} (100%) rename examples/{generate-stream/main.py => generate-stream.py} (51%) rename examples/{generate/main.py => generate.py} (50%) create mode 100644 examples/list.py create mode 100644 examples/multimodal-chat.py rename examples/{multimodal/main.py => multimodal-generate.py} (100%) create mode 100644 examples/ps.py delete mode 100644 examples/ps/main.py delete mode 100644 examples/pull-progress/README.md delete mode 100644 examples/pull-progress/requirements.txt rename examples/{pull-progress/main.py => pull.py} (92%) create mode 100644 examples/tools.py delete mode 100644 examples/tools/README.md delete mode 100644 examples/tools/main.py diff --git a/README.md b/README.md index e03ea00..454c159 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,12 @@ The Ollama Python library provides the easiest way to integrate Python 3.8+ projects with [Ollama](https://github.com/ollama/ollama). +## Prerequisites + +- [Ollama](https://ollama.com/download) should be installed and running +- Pull a model to use with the library: `ollama pull ` e.g. `ollama pull llama3.2` + - See [Ollama.com](https://ollama.com/search) for more information on the models available. + ## Install ```sh @@ -11,25 +17,34 @@ pip install ollama ## Usage ```python -import ollama -response = ollama.chat(model='llama3.1', messages=[ +from ollama import chat +from ollama import ChatResponse + +response: ChatResponse = chat(model='llama3.2', messages=[ { 'role': 'user', 'content': 'Why is the sky blue?', }, ]) print(response['message']['content']) +# or access fields directly from the response object +print(response.message.content) ``` +See [_types.py](ollama/_types.py) for more information on the response types. + ## Streaming responses -Response streaming can be enabled by setting `stream=True`, modifying function calls to return a Python generator where each part is an object in the stream. +Response streaming can be enabled by setting `stream=True`. + +> [!NOTE] +> Streaming Tool/Function calling is not yet supported. ```python -import ollama +from ollama import chat -stream = ollama.chat( - model='llama3.1', +stream = chat( + model='llama3.2', messages=[{'role': 'user', 'content': 'Why is the sky blue?'}], stream=True, ) @@ -38,98 +53,18 @@ for chunk in stream: print(chunk['message']['content'], end='', flush=True) ``` -## API - -The Ollama Python library's API is designed around the [Ollama REST API](https://github.com/ollama/ollama/blob/main/docs/api.md) - -### Chat - -```python -ollama.chat(model='llama3.1', messages=[{'role': 'user', 'content': 'Why is the sky blue?'}]) -``` - -### Generate - -```python -ollama.generate(model='llama3.1', prompt='Why is the sky blue?') -``` - -### List - -```python -ollama.list() -``` - -### Show - -```python -ollama.show('llama3.1') -``` - -### Create - -```python -modelfile=''' -FROM llama3.1 -SYSTEM You are mario from super mario bros. -''' - -ollama.create(model='example', modelfile=modelfile) -``` - -### Copy - -```python -ollama.copy('llama3.1', 'user/llama3.1') -``` - -### Delete - -```python -ollama.delete('llama3.1') -``` - -### Pull - -```python -ollama.pull('llama3.1') -``` - -### Push - -```python -ollama.push('user/llama3.1') -``` - -### Embed - -```python -ollama.embed(model='llama3.1', input='The sky is blue because of rayleigh scattering') -``` - -### Embed (batch) - -```python -ollama.embed(model='llama3.1', input=['The sky is blue because of rayleigh scattering', 'Grass is green because of chlorophyll']) -``` - -### Ps - -```python -ollama.ps() -``` - ## Custom client +A custom client can be created by instantiating `Client` or `AsyncClient` from `ollama`. -A custom client can be created with the following fields: - -- `host`: The Ollama host to connect to -- `timeout`: The timeout for requests +All extra keyword arguments are passed into the [`httpx.Client`](https://www.python-httpx.org/api/#client). ```python from ollama import Client -client = Client(host='http://localhost:11434') -response = client.chat(model='llama3.1', messages=[ +client = Client( + host='http://localhost:11434', + headers={'x-some-header': 'some-value'} +) +response = client.chat(model='llama3.2', messages=[ { 'role': 'user', 'content': 'Why is the sky blue?', @@ -139,13 +74,15 @@ response = client.chat(model='llama3.1', messages=[ ## Async client +The `AsyncClient` class is used to make asynchronous requests. It can be configured with the same fields as the `Client` class. + ```python import asyncio from ollama import AsyncClient async def chat(): message = {'role': 'user', 'content': 'Why is the sky blue?'} - response = await AsyncClient().chat(model='llama3.1', messages=[message]) + response = await AsyncClient().chat(model='llama3.2', messages=[message]) asyncio.run(chat()) ``` @@ -158,12 +95,94 @@ from ollama import AsyncClient async def chat(): message = {'role': 'user', 'content': 'Why is the sky blue?'} - async for part in await AsyncClient().chat(model='llama3.1', messages=[message], stream=True): + async for part in await AsyncClient().chat(model='llama3.2', messages=[message], stream=True): print(part['message']['content'], end='', flush=True) asyncio.run(chat()) ``` +## API + +The Ollama Python library's API is designed around the [Ollama REST API](https://github.com/ollama/ollama/blob/main/docs/api.md) + +### Chat + +```python +ollama.chat(model='llama3.2', messages=[{'role': 'user', 'content': 'Why is the sky blue?'}]) +``` + +### Generate + +```python +ollama.generate(model='llama3.2', prompt='Why is the sky blue?') +``` + +### List + +```python +ollama.list() +``` + +### Show + +```python +ollama.show('llama3.2') +``` + +### Create + +```python +modelfile=''' +FROM llama3.2 +SYSTEM You are mario from super mario bros. +''' + +ollama.create(model='example', modelfile=modelfile) +``` + +### Copy + +```python +ollama.copy('llama3.2', 'user/llama3.2') +``` + +### Delete + +```python +ollama.delete('llama3.2') +``` + +### Pull + +```python +ollama.pull('llama3.2') +``` + +### Push + +```python +ollama.push('user/llama3.2') +``` + +### Embed + +```python +ollama.embed(model='llama3.2', input='The sky is blue because of rayleigh scattering') +``` + +### Embed (batch) + +```python +ollama.embed(model='llama3.2', input=['The sky is blue because of rayleigh scattering', 'Grass is green because of chlorophyll']) +``` + +### Ps + +```python +ollama.ps() +``` + + ## Errors Errors are raised if requests return an error status or if an error is detected while streaming. diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 0000000..a455c60 --- /dev/null +++ b/examples/README.md @@ -0,0 +1,57 @@ +# Running Examples + +Run the examples in this directory with: +```sh +# Run example +python3 examples/.py +``` + +### Chat - Chat with a model +- [chat.py](chat.py) +- [async-chat.py](async-chat.py) +- [chat-stream.py](chat-stream.py) - Streamed outputs +- [chat-with-history.py](chat-with-history.py) - Chat with model and maintain history of the conversation + + +### Generate - Generate text with a model +- [generate.py](generate.py) +- [async-generate.py](async-generate.py) +- [generate-stream.py](generate-stream.py) - Streamed outputs +- [fill-in-middle.py](fill-in-middle.py) - Given a prefix and suffix, fill in the middle + + +### Tools/Function Calling - Call a function with a model +- [tools.py](tools.py) - Simple example of Tools/Function Calling +- [async-tools.py](async-tools.py) + + +### Multimodal with Images - Chat with a multimodal (image chat) model +- [multimodal_chat.py](multimodal_chat.py) +- [multimodal_generate.py](multimodal_generate.py) + + +### Ollama List - List all downloaded models and their properties +- [list.py](list.py) + + +### Ollama ps - Show model status with CPU/GPU usage +- [ps.py](ps.py) + + +### Ollama Pull - Pull a model from Ollama +Requirement: `pip install tqdm` +- [pull.py](pull.py) + + +### Ollama Create - Create a model from a Modelfile +```python +python create.py +``` +- [create.py](create.py) + +See [ollama/docs/modelfile.md](https://github.com/ollama/ollama/blob/main/docs/modelfile.md) for more information on the Modelfile format. + + +### Ollama Embed - Generate embeddings with a model +- [embed.py](embed.py) + diff --git a/examples/async-chat-stream/README.md b/examples/async-chat-stream/README.md deleted file mode 100644 index 611295a..0000000 --- a/examples/async-chat-stream/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# async-chat-stream - -This example demonstrates how to create a conversation history using an asynchronous Ollama client and the chat endpoint. The streaming response is outputted to `stdout` as well as a TTS if enabled with `--speak` and available. Supported TTS are `say` on macOS and `espeak` on Linux. diff --git a/examples/async-chat-stream/main.py b/examples/async-chat-stream/main.py deleted file mode 100644 index 6504776..0000000 --- a/examples/async-chat-stream/main.py +++ /dev/null @@ -1,59 +0,0 @@ -import shutil -import asyncio -import argparse - -import ollama - - -async def speak(speaker, content): - if speaker: - p = await asyncio.create_subprocess_exec(speaker, content) - await p.communicate() - - -async def main(): - parser = argparse.ArgumentParser() - parser.add_argument('--speak', default=False, action='store_true') - args = parser.parse_args() - - speaker = None - if not args.speak: - ... - elif say := shutil.which('say'): - speaker = say - elif (espeak := shutil.which('espeak')) or (espeak := shutil.which('espeak-ng')): - speaker = espeak - - client = ollama.AsyncClient() - - messages = [] - - while True: - if content_in := input('>>> '): - messages.append({'role': 'user', 'content': content_in}) - - content_out = '' - message = {'role': 'assistant', 'content': ''} - async for response in await client.chat(model='mistral', messages=messages, stream=True): - if response['done']: - messages.append(message) - - content = response['message']['content'] - print(content, end='', flush=True) - - content_out += content - if content in ['.', '!', '?', '\n']: - await speak(speaker, content_out) - content_out = '' - - message['content'] += content - - if content_out: - await speak(speaker, content_out) - print() - - -try: - asyncio.run(main()) -except (KeyboardInterrupt, EOFError): - ... diff --git a/examples/async-chat.py b/examples/async-chat.py new file mode 100644 index 0000000..81a50d9 --- /dev/null +++ b/examples/async-chat.py @@ -0,0 +1,19 @@ +import asyncio +from ollama import AsyncClient + + +async def main(): + messages = [ + { + 'role': 'user', + 'content': 'Why is the sky blue?', + }, + ] + + client = AsyncClient() + response = await client.chat('llama3.2', messages=messages) + print(response['message']['content']) + + +if __name__ == '__main__': + asyncio.run(main()) diff --git a/examples/async-generate.py b/examples/async-generate.py new file mode 100644 index 0000000..0097af1 --- /dev/null +++ b/examples/async-generate.py @@ -0,0 +1,15 @@ +import asyncio +import ollama + + +async def main(): + client = ollama.AsyncClient() + response = await client.generate('llama3.2', 'Why is the sky blue?') + print(response['response']) + + +if __name__ == '__main__': + try: + asyncio.run(main()) + except KeyboardInterrupt: + print('\nGoodbye!') diff --git a/examples/async-tools.py b/examples/async-tools.py new file mode 100644 index 0000000..07b3c4a --- /dev/null +++ b/examples/async-tools.py @@ -0,0 +1,78 @@ +import asyncio +from ollama import ChatResponse +import ollama + + +def add_two_numbers(a: int, b: int) -> int: + """ + Add two numbers + + Args: + a (int): The first number + b (int): The second number + + Returns: + int: The sum of the two numbers + """ + return a + b + + +def subtract_two_numbers(a: int, b: int) -> int: + """ + Subtract two numbers + """ + return a - b + + +# Tools can still be manually defined and passed into chat +subtract_two_numbers_tool = { + 'type': 'function', + 'function': { + 'name': 'subtract_two_numbers', + 'description': 'Subtract two numbers', + 'parameters': { + 'type': 'object', + 'required': ['a', 'b'], + 'properties': { + 'a': {'type': 'integer', 'description': 'The first number'}, + 'b': {'type': 'integer', 'description': 'The second number'}, + }, + }, + }, +} + + +async def main(): + client = ollama.AsyncClient() + + prompt = 'What is three plus one?' + print('Prompt:', prompt) + + available_functions = { + 'add_two_numbers': add_two_numbers, + 'subtract_two_numbers': subtract_two_numbers, + } + + response: ChatResponse = await client.chat( + 'llama3.1', + messages=[{'role': 'user', 'content': prompt}], + tools=[add_two_numbers, subtract_two_numbers_tool], + ) + + if response.message.tool_calls: + # There may be multiple tool calls in the response + for tool in response.message.tool_calls: + # Ensure the function is available, and then call it + if function_to_call := available_functions.get(tool.function.name): + print('Calling function:', tool.function.name) + print('Arguments:', tool.function.arguments) + print('Function output:', function_to_call(**tool.function.arguments)) + else: + print('Function', tool.function.name, 'not found') + + +if __name__ == '__main__': + try: + asyncio.run(main()) + except KeyboardInterrupt: + print('\nGoodbye!') diff --git a/examples/chat-stream/main.py b/examples/chat-stream.py similarity index 68% rename from examples/chat-stream/main.py rename to examples/chat-stream.py index 2a57346..cccab01 100644 --- a/examples/chat-stream/main.py +++ b/examples/chat-stream.py @@ -8,8 +8,7 @@ messages = [ }, ] -for part in chat('mistral', messages=messages, stream=True): +for part in chat('llama3.2', messages=messages, stream=True): print(part['message']['content'], end='', flush=True) -# end with a newline print() diff --git a/examples/chat-with-history.py b/examples/chat-with-history.py new file mode 100644 index 0000000..e98d15f --- /dev/null +++ b/examples/chat-with-history.py @@ -0,0 +1,38 @@ +from ollama import chat + + +messages = [ + { + 'role': 'user', + 'content': 'Why is the sky blue?', + }, + { + 'role': 'assistant', + 'content': "The sky is blue because of the way the Earth's atmosphere scatters sunlight.", + }, + { + 'role': 'user', + 'content': 'What is the weather in Tokyo?', + }, + { + 'role': 'assistant', + 'content': 'The weather in Tokyo is typically warm and humid during the summer months, with temperatures often exceeding 30°C (86°F). The city experiences a rainy season from June to September, with heavy rainfall and occasional typhoons. Winter is mild, with temperatures rarely dropping below freezing. The city is known for its high-tech and vibrant culture, with many popular tourist attractions such as the Tokyo Tower, Senso-ji Temple, and the bustling Shibuya district.', + }, +] + +while True: + user_input = input('Chat with history: ') + response = chat( + 'llama3.2', + messages=messages + + [ + {'role': 'user', 'content': user_input}, + ], + ) + + # Add the response to the messages to maintain the history + messages.append( + {'role': 'user', 'content': user_input}, + {'role': 'assistant', 'content': response.message.content}, + ) + print(response.message.content + '\n') diff --git a/examples/chat/main.py b/examples/chat.py similarity index 75% rename from examples/chat/main.py rename to examples/chat.py index 90c5f90..2a30f8a 100644 --- a/examples/chat/main.py +++ b/examples/chat.py @@ -1,6 +1,5 @@ from ollama import chat - messages = [ { 'role': 'user', @@ -8,5 +7,5 @@ messages = [ }, ] -response = chat('mistral', messages=messages) +response = chat('llama3.2', messages=messages) print(response['message']['content']) diff --git a/examples/create.py b/examples/create.py new file mode 100644 index 0000000..d4b5b1f --- /dev/null +++ b/examples/create.py @@ -0,0 +1,30 @@ +import sys + +from ollama import create + + +args = sys.argv[1:] +if len(args) == 2: + # create from local file + path = args[1] +else: + print('usage: python create.py ') + sys.exit(1) + +# TODO: update to real Modelfile values +modelfile = f""" +FROM {path} +""" +example_modelfile = """ +FROM llama3.2 +# sets the temperature to 1 [higher is more creative, lower is more coherent] +PARAMETER temperature 1 +# sets the context window size to 4096, this controls how many tokens the LLM can use as context to generate the next token +PARAMETER num_ctx 4096 + +# sets a custom system message to specify the behavior of the chat assistant +SYSTEM You are Mario from super mario bros, acting as an assistant. +""" + +for response in create(model=args[0], modelfile=modelfile, stream=True): + print(response['status']) diff --git a/examples/create/main.py b/examples/create/main.py deleted file mode 100644 index 0a1161d..0000000 --- a/examples/create/main.py +++ /dev/null @@ -1,20 +0,0 @@ -import sys - -from ollama import create - - -args = sys.argv[1:] -if len(args) == 2: - # create from local file - path = args[1] -else: - print('usage: python main.py ') - sys.exit(1) - -# TODO: update to real Modelfile values -modelfile = f""" -FROM {path} -""" - -for response in create(model=args[0], modelfile=modelfile, stream=True): - print(response['status']) diff --git a/examples/embed.py b/examples/embed.py new file mode 100644 index 0000000..5af145e --- /dev/null +++ b/examples/embed.py @@ -0,0 +1,4 @@ +from ollama import embed + +response = embed(model='llama3.2', input='Hello, world!') +print(response['embeddings']) diff --git a/examples/fill-in-middle/main.py b/examples/fill-in-middle.py similarity index 100% rename from examples/fill-in-middle/main.py rename to examples/fill-in-middle.py diff --git a/examples/generate-stream/main.py b/examples/generate-stream.py similarity index 51% rename from examples/generate-stream/main.py rename to examples/generate-stream.py index a24b410..10b7dc7 100644 --- a/examples/generate-stream/main.py +++ b/examples/generate-stream.py @@ -1,5 +1,5 @@ from ollama import generate -for part in generate('mistral', 'Why is the sky blue?', stream=True): +for part in generate('llama3.2', 'Why is the sky blue?', stream=True): print(part['response'], end='', flush=True) diff --git a/examples/generate/main.py b/examples/generate.py similarity index 50% rename from examples/generate/main.py rename to examples/generate.py index e39e295..1a2311d 100644 --- a/examples/generate/main.py +++ b/examples/generate.py @@ -1,5 +1,5 @@ from ollama import generate -response = generate('mistral', 'Why is the sky blue?') +response = generate('llama3.2', 'Why is the sky blue?') print(response['response']) diff --git a/examples/list.py b/examples/list.py new file mode 100644 index 0000000..32d4525 --- /dev/null +++ b/examples/list.py @@ -0,0 +1,14 @@ +from ollama import list +from ollama import ListResponse + +response: ListResponse = list() + +for model in response.models: + print('Name:', model.model) + print(' Size (MB):', f'{(model.size.real / 1024 / 1024):.2f}') + if model.details: + print(' Format:', model.details.format) + print(' Family:', model.details.family) + print(' Parameter Size:', model.details.parameter_size) + print(' Quantization Level:', model.details.quantization_level) + print('\n') diff --git a/examples/multimodal-chat.py b/examples/multimodal-chat.py new file mode 100644 index 0000000..8aff9f4 --- /dev/null +++ b/examples/multimodal-chat.py @@ -0,0 +1,23 @@ +from ollama import chat +# from pathlib import Path + +# Pass in the path to the image +path = input('Please enter the path to the image: ') + +# You can also pass in base64 encoded image data +# img = base64.b64encode(Path(path).read_bytes()).decode() +# or the raw bytes +# img = Path(path).read_bytes() + +response = chat( + model='llama3.2-vision', + messages=[ + { + 'role': 'user', + 'content': 'What is in this image? Be concise.', + 'images': [path], + } + ], +) + +print(response.message.content) diff --git a/examples/multimodal/main.py b/examples/multimodal-generate.py similarity index 100% rename from examples/multimodal/main.py rename to examples/multimodal-generate.py diff --git a/examples/ps.py b/examples/ps.py new file mode 100644 index 0000000..34d5230 --- /dev/null +++ b/examples/ps.py @@ -0,0 +1,27 @@ +from ollama import ps, pull, chat +from ollama import ProcessResponse + +# Ensure at least one model is loaded +response = pull('llama3.2', stream=True) +progress_states = set() +for progress in response: + if progress.get('status') in progress_states: + continue + progress_states.add(progress.get('status')) + print(progress.get('status')) + +print('\n') + +print('Waiting for model to load... \n') +chat(model='llama3.2', messages=[{'role': 'user', 'content': 'Why is the sky blue?'}]) + + +response: ProcessResponse = ps() +for model in response.models: + print('Model: ', model.model) + print(' Digest: ', model.digest) + print(' Expires at: ', model.expires_at) + print(' Size: ', model.size) + print(' Size vram: ', model.size_vram) + print(' Details: ', model.details) + print('\n') diff --git a/examples/ps/main.py b/examples/ps/main.py deleted file mode 100644 index 822d09a..0000000 --- a/examples/ps/main.py +++ /dev/null @@ -1,31 +0,0 @@ -from ollama import ps, pull, chat - -response = pull('mistral', stream=True) -progress_states = set() -for progress in response: - if progress.get('status') in progress_states: - continue - progress_states.add(progress.get('status')) - print(progress.get('status')) - -print('\n') - -response = chat('mistral', messages=[{'role': 'user', 'content': 'Hello!'}]) -print(response['message']['content']) - -print('\n') - -response = ps() - -name = response['models'][0]['name'] -size = response['models'][0]['size'] -size_vram = response['models'][0]['size_vram'] - -if size == size_vram: - print(f'{name}: 100% GPU') -elif not size_vram: - print(f'{name}: 100% CPU') -else: - size_cpu = size - size_vram - cpu_percent = round(size_cpu / size * 100) - print(f'{name}: {cpu_percent}% CPU/{100 - cpu_percent}% GPU') diff --git a/examples/pull-progress/README.md b/examples/pull-progress/README.md deleted file mode 100644 index 8a44f60..0000000 --- a/examples/pull-progress/README.md +++ /dev/null @@ -1,9 +0,0 @@ -# pull-progress - -This example emulates `ollama pull` using the Python library and [`tqdm`](https://tqdm.github.io/). - -## Setup - -```shell -pip install -r requirements.txt -``` diff --git a/examples/pull-progress/requirements.txt b/examples/pull-progress/requirements.txt deleted file mode 100644 index ae3df91..0000000 --- a/examples/pull-progress/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -tqdm==4.66.1 diff --git a/examples/pull-progress/main.py b/examples/pull.py similarity index 92% rename from examples/pull-progress/main.py rename to examples/pull.py index 89b2f3a..e24f2e9 100644 --- a/examples/pull-progress/main.py +++ b/examples/pull.py @@ -3,7 +3,7 @@ from ollama import pull current_digest, bars = '', {} -for progress in pull('mistral', stream=True): +for progress in pull('llama3.2', stream=True): digest = progress.get('digest', '') if digest != current_digest and current_digest in bars: bars[current_digest].close() diff --git a/examples/tools.py b/examples/tools.py new file mode 100644 index 0000000..6151cd9 --- /dev/null +++ b/examples/tools.py @@ -0,0 +1,66 @@ +from ollama import chat +from ollama import ChatResponse + + +def add_two_numbers(a: int, b: int) -> int: + """ + Add two numbers + + Args: + a (int): The first number + b (int): The second number + + Returns: + int: The sum of the two numbers + """ + return a + b + + +def subtract_two_numbers(a: int, b: int) -> int: + """ + Subtract two numbers + """ + return a - b + + +# Tools can still be manually defined and passed into chat +subtract_two_numbers_tool = { + 'type': 'function', + 'function': { + 'name': 'subtract_two_numbers', + 'description': 'Subtract two numbers', + 'parameters': { + 'type': 'object', + 'required': ['a', 'b'], + 'properties': { + 'a': {'type': 'integer', 'description': 'The first number'}, + 'b': {'type': 'integer', 'description': 'The second number'}, + }, + }, + }, +} + +prompt = 'What is three plus one?' +print('Prompt:', prompt) + +available_functions = { + 'add_two_numbers': add_two_numbers, + 'subtract_two_numbers': subtract_two_numbers, +} + +response: ChatResponse = chat( + 'llama3.1', + messages=[{'role': 'user', 'content': prompt}], + tools=[add_two_numbers, subtract_two_numbers_tool], +) + +if response.message.tool_calls: + # There may be multiple tool calls in the response + for tool in response.message.tool_calls: + # Ensure the function is available, and then call it + if function_to_call := available_functions.get(tool.function.name): + print('Calling function:', tool.function.name) + print('Arguments:', tool.function.arguments) + print('Function output:', function_to_call(**tool.function.arguments)) + else: + print('Function', tool.function.name, 'not found') diff --git a/examples/tools/README.md b/examples/tools/README.md deleted file mode 100644 index 85ca5dd..0000000 --- a/examples/tools/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# tools - -This example demonstrates how to utilize tool calls with an asynchronous Ollama client and the chat endpoint. diff --git a/examples/tools/main.py b/examples/tools/main.py deleted file mode 100644 index 133b238..0000000 --- a/examples/tools/main.py +++ /dev/null @@ -1,87 +0,0 @@ -import json -import ollama -import asyncio - - -# Simulates an API call to get flight times -# In a real application, this would fetch data from a live database or API -def get_flight_times(departure: str, arrival: str) -> str: - flights = { - 'NYC-LAX': {'departure': '08:00 AM', 'arrival': '11:30 AM', 'duration': '5h 30m'}, - 'LAX-NYC': {'departure': '02:00 PM', 'arrival': '10:30 PM', 'duration': '5h 30m'}, - 'LHR-JFK': {'departure': '10:00 AM', 'arrival': '01:00 PM', 'duration': '8h 00m'}, - 'JFK-LHR': {'departure': '09:00 PM', 'arrival': '09:00 AM', 'duration': '7h 00m'}, - 'CDG-DXB': {'departure': '11:00 AM', 'arrival': '08:00 PM', 'duration': '6h 00m'}, - 'DXB-CDG': {'departure': '03:00 AM', 'arrival': '07:30 AM', 'duration': '7h 30m'}, - } - - key = f'{departure}-{arrival}'.upper() - return json.dumps(flights.get(key, {'error': 'Flight not found'})) - - -async def run(model: str): - client = ollama.AsyncClient() - # Initialize conversation with a user query - messages = [{'role': 'user', 'content': 'What is the flight time from New York (NYC) to Los Angeles (LAX)?'}] - - # First API call: Send the query and function description to the model - response = await client.chat( - model=model, - messages=messages, - tools=[ - { - 'type': 'function', - 'function': { - 'name': 'get_flight_times', - 'description': 'Get the flight times between two cities', - 'parameters': { - 'type': 'object', - 'properties': { - 'departure': { - 'type': 'string', - 'description': 'The departure city (airport code)', - }, - 'arrival': { - 'type': 'string', - 'description': 'The arrival city (airport code)', - }, - }, - 'required': ['departure', 'arrival'], - }, - }, - }, - ], - ) - - # Add the model's response to the conversation history - messages.append(response['message']) - - # Check if the model decided to use the provided function - if not response['message'].get('tool_calls'): - print("The model didn't use the function. Its response was:") - print(response['message']['content']) - return - - # Process function calls made by the model - if response['message'].get('tool_calls'): - available_functions = { - 'get_flight_times': get_flight_times, - } - for tool in response['message']['tool_calls']: - function_to_call = available_functions[tool['function']['name']] - function_response = function_to_call(tool['function']['arguments']['departure'], tool['function']['arguments']['arrival']) - # Add function response to the conversation - messages.append( - { - 'role': 'tool', - 'content': function_response, - } - ) - - # Second API call: Get final response from the model - final_response = await client.chat(model=model, messages=messages) - print(final_response['message']['content']) - - -# Run the async function -asyncio.run(run('mistral')) diff --git a/ollama/_client.py b/ollama/_client.py index a8a19d3..548f343 100644 --- a/ollama/_client.py +++ b/ollama/_client.py @@ -319,7 +319,7 @@ class Client(BaseClient): ''' return a + b - client.chat(model='llama3.1:8b', tools=[add_two_numbers], messages=[...]) + client.chat(model='llama3.2', tools=[add_two_numbers], messages=[...]) Raises `RequestError` if a model is not provided. @@ -821,7 +821,7 @@ class AsyncClient(BaseClient): ''' return a + b - await client.chat(model='llama3.1:8b', tools=[add_two_numbers], messages=[...]) + await client.chat(model='llama3.2', tools=[add_two_numbers], messages=[...]) Raises `RequestError` if a model is not provided.