mirror of
https://github.com/ollama/ollama-python.git
synced 2026-01-13 21:57:16 +08:00
Examples refactor (#329)
* Examples and README updates --------- Co-authored-by: fujitatomoya <tomoya.fujita825@gmail.com> Co-authored-by: Michael Yang <mxyng@pm.me>
This commit is contained in:
parent
139c89e833
commit
64c1eb78ff
209
README.md
209
README.md
@ -2,6 +2,12 @@
|
||||
|
||||
The Ollama Python library provides the easiest way to integrate Python 3.8+ projects with [Ollama](https://github.com/ollama/ollama).
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- [Ollama](https://ollama.com/download) should be installed and running
|
||||
- Pull a model to use with the library: `ollama pull <model>` e.g. `ollama pull llama3.2`
|
||||
- See [Ollama.com](https://ollama.com/search) for more information on the models available.
|
||||
|
||||
## Install
|
||||
|
||||
```sh
|
||||
@ -11,25 +17,34 @@ pip install ollama
|
||||
## Usage
|
||||
|
||||
```python
|
||||
import ollama
|
||||
response = ollama.chat(model='llama3.1', messages=[
|
||||
from ollama import chat
|
||||
from ollama import ChatResponse
|
||||
|
||||
response: ChatResponse = chat(model='llama3.2', messages=[
|
||||
{
|
||||
'role': 'user',
|
||||
'content': 'Why is the sky blue?',
|
||||
},
|
||||
])
|
||||
print(response['message']['content'])
|
||||
# or access fields directly from the response object
|
||||
print(response.message.content)
|
||||
```
|
||||
|
||||
See [_types.py](ollama/_types.py) for more information on the response types.
|
||||
|
||||
## Streaming responses
|
||||
|
||||
Response streaming can be enabled by setting `stream=True`, modifying function calls to return a Python generator where each part is an object in the stream.
|
||||
Response streaming can be enabled by setting `stream=True`.
|
||||
|
||||
> [!NOTE]
|
||||
> Streaming Tool/Function calling is not yet supported.
|
||||
|
||||
```python
|
||||
import ollama
|
||||
from ollama import chat
|
||||
|
||||
stream = ollama.chat(
|
||||
model='llama3.1',
|
||||
stream = chat(
|
||||
model='llama3.2',
|
||||
messages=[{'role': 'user', 'content': 'Why is the sky blue?'}],
|
||||
stream=True,
|
||||
)
|
||||
@ -38,98 +53,18 @@ for chunk in stream:
|
||||
print(chunk['message']['content'], end='', flush=True)
|
||||
```
|
||||
|
||||
## API
|
||||
|
||||
The Ollama Python library's API is designed around the [Ollama REST API](https://github.com/ollama/ollama/blob/main/docs/api.md)
|
||||
|
||||
### Chat
|
||||
|
||||
```python
|
||||
ollama.chat(model='llama3.1', messages=[{'role': 'user', 'content': 'Why is the sky blue?'}])
|
||||
```
|
||||
|
||||
### Generate
|
||||
|
||||
```python
|
||||
ollama.generate(model='llama3.1', prompt='Why is the sky blue?')
|
||||
```
|
||||
|
||||
### List
|
||||
|
||||
```python
|
||||
ollama.list()
|
||||
```
|
||||
|
||||
### Show
|
||||
|
||||
```python
|
||||
ollama.show('llama3.1')
|
||||
```
|
||||
|
||||
### Create
|
||||
|
||||
```python
|
||||
modelfile='''
|
||||
FROM llama3.1
|
||||
SYSTEM You are mario from super mario bros.
|
||||
'''
|
||||
|
||||
ollama.create(model='example', modelfile=modelfile)
|
||||
```
|
||||
|
||||
### Copy
|
||||
|
||||
```python
|
||||
ollama.copy('llama3.1', 'user/llama3.1')
|
||||
```
|
||||
|
||||
### Delete
|
||||
|
||||
```python
|
||||
ollama.delete('llama3.1')
|
||||
```
|
||||
|
||||
### Pull
|
||||
|
||||
```python
|
||||
ollama.pull('llama3.1')
|
||||
```
|
||||
|
||||
### Push
|
||||
|
||||
```python
|
||||
ollama.push('user/llama3.1')
|
||||
```
|
||||
|
||||
### Embed
|
||||
|
||||
```python
|
||||
ollama.embed(model='llama3.1', input='The sky is blue because of rayleigh scattering')
|
||||
```
|
||||
|
||||
### Embed (batch)
|
||||
|
||||
```python
|
||||
ollama.embed(model='llama3.1', input=['The sky is blue because of rayleigh scattering', 'Grass is green because of chlorophyll'])
|
||||
```
|
||||
|
||||
### Ps
|
||||
|
||||
```python
|
||||
ollama.ps()
|
||||
```
|
||||
|
||||
## Custom client
|
||||
A custom client can be created by instantiating `Client` or `AsyncClient` from `ollama`.
|
||||
|
||||
A custom client can be created with the following fields:
|
||||
|
||||
- `host`: The Ollama host to connect to
|
||||
- `timeout`: The timeout for requests
|
||||
All extra keyword arguments are passed into the [`httpx.Client`](https://www.python-httpx.org/api/#client).
|
||||
|
||||
```python
|
||||
from ollama import Client
|
||||
client = Client(host='http://localhost:11434')
|
||||
response = client.chat(model='llama3.1', messages=[
|
||||
client = Client(
|
||||
host='http://localhost:11434',
|
||||
headers={'x-some-header': 'some-value'}
|
||||
)
|
||||
response = client.chat(model='llama3.2', messages=[
|
||||
{
|
||||
'role': 'user',
|
||||
'content': 'Why is the sky blue?',
|
||||
@ -139,13 +74,15 @@ response = client.chat(model='llama3.1', messages=[
|
||||
|
||||
## Async client
|
||||
|
||||
The `AsyncClient` class is used to make asynchronous requests. It can be configured with the same fields as the `Client` class.
|
||||
|
||||
```python
|
||||
import asyncio
|
||||
from ollama import AsyncClient
|
||||
|
||||
async def chat():
|
||||
message = {'role': 'user', 'content': 'Why is the sky blue?'}
|
||||
response = await AsyncClient().chat(model='llama3.1', messages=[message])
|
||||
response = await AsyncClient().chat(model='llama3.2', messages=[message])
|
||||
|
||||
asyncio.run(chat())
|
||||
```
|
||||
@ -158,12 +95,94 @@ from ollama import AsyncClient
|
||||
|
||||
async def chat():
|
||||
message = {'role': 'user', 'content': 'Why is the sky blue?'}
|
||||
async for part in await AsyncClient().chat(model='llama3.1', messages=[message], stream=True):
|
||||
async for part in await AsyncClient().chat(model='llama3.2', messages=[message], stream=True):
|
||||
print(part['message']['content'], end='', flush=True)
|
||||
|
||||
asyncio.run(chat())
|
||||
```
|
||||
|
||||
## API
|
||||
|
||||
The Ollama Python library's API is designed around the [Ollama REST API](https://github.com/ollama/ollama/blob/main/docs/api.md)
|
||||
|
||||
### Chat
|
||||
|
||||
```python
|
||||
ollama.chat(model='llama3.2', messages=[{'role': 'user', 'content': 'Why is the sky blue?'}])
|
||||
```
|
||||
|
||||
### Generate
|
||||
|
||||
```python
|
||||
ollama.generate(model='llama3.2', prompt='Why is the sky blue?')
|
||||
```
|
||||
|
||||
### List
|
||||
|
||||
```python
|
||||
ollama.list()
|
||||
```
|
||||
|
||||
### Show
|
||||
|
||||
```python
|
||||
ollama.show('llama3.2')
|
||||
```
|
||||
|
||||
### Create
|
||||
|
||||
```python
|
||||
modelfile='''
|
||||
FROM llama3.2
|
||||
SYSTEM You are mario from super mario bros.
|
||||
'''
|
||||
|
||||
ollama.create(model='example', modelfile=modelfile)
|
||||
```
|
||||
|
||||
### Copy
|
||||
|
||||
```python
|
||||
ollama.copy('llama3.2', 'user/llama3.2')
|
||||
```
|
||||
|
||||
### Delete
|
||||
|
||||
```python
|
||||
ollama.delete('llama3.2')
|
||||
```
|
||||
|
||||
### Pull
|
||||
|
||||
```python
|
||||
ollama.pull('llama3.2')
|
||||
```
|
||||
|
||||
### Push
|
||||
|
||||
```python
|
||||
ollama.push('user/llama3.2')
|
||||
```
|
||||
|
||||
### Embed
|
||||
|
||||
```python
|
||||
ollama.embed(model='llama3.2', input='The sky is blue because of rayleigh scattering')
|
||||
```
|
||||
|
||||
### Embed (batch)
|
||||
|
||||
```python
|
||||
ollama.embed(model='llama3.2', input=['The sky is blue because of rayleigh scattering', 'Grass is green because of chlorophyll'])
|
||||
```
|
||||
|
||||
### Ps
|
||||
|
||||
```python
|
||||
ollama.ps()
|
||||
```
|
||||
|
||||
|
||||
## Errors
|
||||
|
||||
Errors are raised if requests return an error status or if an error is detected while streaming.
|
||||
|
||||
57
examples/README.md
Normal file
57
examples/README.md
Normal file
@ -0,0 +1,57 @@
|
||||
# Running Examples
|
||||
|
||||
Run the examples in this directory with:
|
||||
```sh
|
||||
# Run example
|
||||
python3 examples/<example>.py
|
||||
```
|
||||
|
||||
### Chat - Chat with a model
|
||||
- [chat.py](chat.py)
|
||||
- [async-chat.py](async-chat.py)
|
||||
- [chat-stream.py](chat-stream.py) - Streamed outputs
|
||||
- [chat-with-history.py](chat-with-history.py) - Chat with model and maintain history of the conversation
|
||||
|
||||
|
||||
### Generate - Generate text with a model
|
||||
- [generate.py](generate.py)
|
||||
- [async-generate.py](async-generate.py)
|
||||
- [generate-stream.py](generate-stream.py) - Streamed outputs
|
||||
- [fill-in-middle.py](fill-in-middle.py) - Given a prefix and suffix, fill in the middle
|
||||
|
||||
|
||||
### Tools/Function Calling - Call a function with a model
|
||||
- [tools.py](tools.py) - Simple example of Tools/Function Calling
|
||||
- [async-tools.py](async-tools.py)
|
||||
|
||||
|
||||
### Multimodal with Images - Chat with a multimodal (image chat) model
|
||||
- [multimodal_chat.py](multimodal_chat.py)
|
||||
- [multimodal_generate.py](multimodal_generate.py)
|
||||
|
||||
|
||||
### Ollama List - List all downloaded models and their properties
|
||||
- [list.py](list.py)
|
||||
|
||||
|
||||
### Ollama ps - Show model status with CPU/GPU usage
|
||||
- [ps.py](ps.py)
|
||||
|
||||
|
||||
### Ollama Pull - Pull a model from Ollama
|
||||
Requirement: `pip install tqdm`
|
||||
- [pull.py](pull.py)
|
||||
|
||||
|
||||
### Ollama Create - Create a model from a Modelfile
|
||||
```python
|
||||
python create.py <model> <modelfile>
|
||||
```
|
||||
- [create.py](create.py)
|
||||
|
||||
See [ollama/docs/modelfile.md](https://github.com/ollama/ollama/blob/main/docs/modelfile.md) for more information on the Modelfile format.
|
||||
|
||||
|
||||
### Ollama Embed - Generate embeddings with a model
|
||||
- [embed.py](embed.py)
|
||||
|
||||
@ -1,3 +0,0 @@
|
||||
# async-chat-stream
|
||||
|
||||
This example demonstrates how to create a conversation history using an asynchronous Ollama client and the chat endpoint. The streaming response is outputted to `stdout` as well as a TTS if enabled with `--speak` and available. Supported TTS are `say` on macOS and `espeak` on Linux.
|
||||
@ -1,59 +0,0 @@
|
||||
import shutil
|
||||
import asyncio
|
||||
import argparse
|
||||
|
||||
import ollama
|
||||
|
||||
|
||||
async def speak(speaker, content):
|
||||
if speaker:
|
||||
p = await asyncio.create_subprocess_exec(speaker, content)
|
||||
await p.communicate()
|
||||
|
||||
|
||||
async def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--speak', default=False, action='store_true')
|
||||
args = parser.parse_args()
|
||||
|
||||
speaker = None
|
||||
if not args.speak:
|
||||
...
|
||||
elif say := shutil.which('say'):
|
||||
speaker = say
|
||||
elif (espeak := shutil.which('espeak')) or (espeak := shutil.which('espeak-ng')):
|
||||
speaker = espeak
|
||||
|
||||
client = ollama.AsyncClient()
|
||||
|
||||
messages = []
|
||||
|
||||
while True:
|
||||
if content_in := input('>>> '):
|
||||
messages.append({'role': 'user', 'content': content_in})
|
||||
|
||||
content_out = ''
|
||||
message = {'role': 'assistant', 'content': ''}
|
||||
async for response in await client.chat(model='mistral', messages=messages, stream=True):
|
||||
if response['done']:
|
||||
messages.append(message)
|
||||
|
||||
content = response['message']['content']
|
||||
print(content, end='', flush=True)
|
||||
|
||||
content_out += content
|
||||
if content in ['.', '!', '?', '\n']:
|
||||
await speak(speaker, content_out)
|
||||
content_out = ''
|
||||
|
||||
message['content'] += content
|
||||
|
||||
if content_out:
|
||||
await speak(speaker, content_out)
|
||||
print()
|
||||
|
||||
|
||||
try:
|
||||
asyncio.run(main())
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
...
|
||||
19
examples/async-chat.py
Normal file
19
examples/async-chat.py
Normal file
@ -0,0 +1,19 @@
|
||||
import asyncio
|
||||
from ollama import AsyncClient
|
||||
|
||||
|
||||
async def main():
|
||||
messages = [
|
||||
{
|
||||
'role': 'user',
|
||||
'content': 'Why is the sky blue?',
|
||||
},
|
||||
]
|
||||
|
||||
client = AsyncClient()
|
||||
response = await client.chat('llama3.2', messages=messages)
|
||||
print(response['message']['content'])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
15
examples/async-generate.py
Normal file
15
examples/async-generate.py
Normal file
@ -0,0 +1,15 @@
|
||||
import asyncio
|
||||
import ollama
|
||||
|
||||
|
||||
async def main():
|
||||
client = ollama.AsyncClient()
|
||||
response = await client.generate('llama3.2', 'Why is the sky blue?')
|
||||
print(response['response'])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
try:
|
||||
asyncio.run(main())
|
||||
except KeyboardInterrupt:
|
||||
print('\nGoodbye!')
|
||||
78
examples/async-tools.py
Normal file
78
examples/async-tools.py
Normal file
@ -0,0 +1,78 @@
|
||||
import asyncio
|
||||
from ollama import ChatResponse
|
||||
import ollama
|
||||
|
||||
|
||||
def add_two_numbers(a: int, b: int) -> int:
|
||||
"""
|
||||
Add two numbers
|
||||
|
||||
Args:
|
||||
a (int): The first number
|
||||
b (int): The second number
|
||||
|
||||
Returns:
|
||||
int: The sum of the two numbers
|
||||
"""
|
||||
return a + b
|
||||
|
||||
|
||||
def subtract_two_numbers(a: int, b: int) -> int:
|
||||
"""
|
||||
Subtract two numbers
|
||||
"""
|
||||
return a - b
|
||||
|
||||
|
||||
# Tools can still be manually defined and passed into chat
|
||||
subtract_two_numbers_tool = {
|
||||
'type': 'function',
|
||||
'function': {
|
||||
'name': 'subtract_two_numbers',
|
||||
'description': 'Subtract two numbers',
|
||||
'parameters': {
|
||||
'type': 'object',
|
||||
'required': ['a', 'b'],
|
||||
'properties': {
|
||||
'a': {'type': 'integer', 'description': 'The first number'},
|
||||
'b': {'type': 'integer', 'description': 'The second number'},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
async def main():
|
||||
client = ollama.AsyncClient()
|
||||
|
||||
prompt = 'What is three plus one?'
|
||||
print('Prompt:', prompt)
|
||||
|
||||
available_functions = {
|
||||
'add_two_numbers': add_two_numbers,
|
||||
'subtract_two_numbers': subtract_two_numbers,
|
||||
}
|
||||
|
||||
response: ChatResponse = await client.chat(
|
||||
'llama3.1',
|
||||
messages=[{'role': 'user', 'content': prompt}],
|
||||
tools=[add_two_numbers, subtract_two_numbers_tool],
|
||||
)
|
||||
|
||||
if response.message.tool_calls:
|
||||
# There may be multiple tool calls in the response
|
||||
for tool in response.message.tool_calls:
|
||||
# Ensure the function is available, and then call it
|
||||
if function_to_call := available_functions.get(tool.function.name):
|
||||
print('Calling function:', tool.function.name)
|
||||
print('Arguments:', tool.function.arguments)
|
||||
print('Function output:', function_to_call(**tool.function.arguments))
|
||||
else:
|
||||
print('Function', tool.function.name, 'not found')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
try:
|
||||
asyncio.run(main())
|
||||
except KeyboardInterrupt:
|
||||
print('\nGoodbye!')
|
||||
@ -8,8 +8,7 @@ messages = [
|
||||
},
|
||||
]
|
||||
|
||||
for part in chat('mistral', messages=messages, stream=True):
|
||||
for part in chat('llama3.2', messages=messages, stream=True):
|
||||
print(part['message']['content'], end='', flush=True)
|
||||
|
||||
# end with a newline
|
||||
print()
|
||||
38
examples/chat-with-history.py
Normal file
38
examples/chat-with-history.py
Normal file
@ -0,0 +1,38 @@
|
||||
from ollama import chat
|
||||
|
||||
|
||||
messages = [
|
||||
{
|
||||
'role': 'user',
|
||||
'content': 'Why is the sky blue?',
|
||||
},
|
||||
{
|
||||
'role': 'assistant',
|
||||
'content': "The sky is blue because of the way the Earth's atmosphere scatters sunlight.",
|
||||
},
|
||||
{
|
||||
'role': 'user',
|
||||
'content': 'What is the weather in Tokyo?',
|
||||
},
|
||||
{
|
||||
'role': 'assistant',
|
||||
'content': 'The weather in Tokyo is typically warm and humid during the summer months, with temperatures often exceeding 30°C (86°F). The city experiences a rainy season from June to September, with heavy rainfall and occasional typhoons. Winter is mild, with temperatures rarely dropping below freezing. The city is known for its high-tech and vibrant culture, with many popular tourist attractions such as the Tokyo Tower, Senso-ji Temple, and the bustling Shibuya district.',
|
||||
},
|
||||
]
|
||||
|
||||
while True:
|
||||
user_input = input('Chat with history: ')
|
||||
response = chat(
|
||||
'llama3.2',
|
||||
messages=messages
|
||||
+ [
|
||||
{'role': 'user', 'content': user_input},
|
||||
],
|
||||
)
|
||||
|
||||
# Add the response to the messages to maintain the history
|
||||
messages.append(
|
||||
{'role': 'user', 'content': user_input},
|
||||
{'role': 'assistant', 'content': response.message.content},
|
||||
)
|
||||
print(response.message.content + '\n')
|
||||
@ -1,6 +1,5 @@
|
||||
from ollama import chat
|
||||
|
||||
|
||||
messages = [
|
||||
{
|
||||
'role': 'user',
|
||||
@ -8,5 +7,5 @@ messages = [
|
||||
},
|
||||
]
|
||||
|
||||
response = chat('mistral', messages=messages)
|
||||
response = chat('llama3.2', messages=messages)
|
||||
print(response['message']['content'])
|
||||
30
examples/create.py
Normal file
30
examples/create.py
Normal file
@ -0,0 +1,30 @@
|
||||
import sys
|
||||
|
||||
from ollama import create
|
||||
|
||||
|
||||
args = sys.argv[1:]
|
||||
if len(args) == 2:
|
||||
# create from local file
|
||||
path = args[1]
|
||||
else:
|
||||
print('usage: python create.py <name> <filepath>')
|
||||
sys.exit(1)
|
||||
|
||||
# TODO: update to real Modelfile values
|
||||
modelfile = f"""
|
||||
FROM {path}
|
||||
"""
|
||||
example_modelfile = """
|
||||
FROM llama3.2
|
||||
# sets the temperature to 1 [higher is more creative, lower is more coherent]
|
||||
PARAMETER temperature 1
|
||||
# sets the context window size to 4096, this controls how many tokens the LLM can use as context to generate the next token
|
||||
PARAMETER num_ctx 4096
|
||||
|
||||
# sets a custom system message to specify the behavior of the chat assistant
|
||||
SYSTEM You are Mario from super mario bros, acting as an assistant.
|
||||
"""
|
||||
|
||||
for response in create(model=args[0], modelfile=modelfile, stream=True):
|
||||
print(response['status'])
|
||||
@ -1,20 +0,0 @@
|
||||
import sys
|
||||
|
||||
from ollama import create
|
||||
|
||||
|
||||
args = sys.argv[1:]
|
||||
if len(args) == 2:
|
||||
# create from local file
|
||||
path = args[1]
|
||||
else:
|
||||
print('usage: python main.py <name> <filepath>')
|
||||
sys.exit(1)
|
||||
|
||||
# TODO: update to real Modelfile values
|
||||
modelfile = f"""
|
||||
FROM {path}
|
||||
"""
|
||||
|
||||
for response in create(model=args[0], modelfile=modelfile, stream=True):
|
||||
print(response['status'])
|
||||
4
examples/embed.py
Normal file
4
examples/embed.py
Normal file
@ -0,0 +1,4 @@
|
||||
from ollama import embed
|
||||
|
||||
response = embed(model='llama3.2', input='Hello, world!')
|
||||
print(response['embeddings'])
|
||||
@ -1,5 +1,5 @@
|
||||
from ollama import generate
|
||||
|
||||
|
||||
for part in generate('mistral', 'Why is the sky blue?', stream=True):
|
||||
for part in generate('llama3.2', 'Why is the sky blue?', stream=True):
|
||||
print(part['response'], end='', flush=True)
|
||||
@ -1,5 +1,5 @@
|
||||
from ollama import generate
|
||||
|
||||
|
||||
response = generate('mistral', 'Why is the sky blue?')
|
||||
response = generate('llama3.2', 'Why is the sky blue?')
|
||||
print(response['response'])
|
||||
14
examples/list.py
Normal file
14
examples/list.py
Normal file
@ -0,0 +1,14 @@
|
||||
from ollama import list
|
||||
from ollama import ListResponse
|
||||
|
||||
response: ListResponse = list()
|
||||
|
||||
for model in response.models:
|
||||
print('Name:', model.model)
|
||||
print(' Size (MB):', f'{(model.size.real / 1024 / 1024):.2f}')
|
||||
if model.details:
|
||||
print(' Format:', model.details.format)
|
||||
print(' Family:', model.details.family)
|
||||
print(' Parameter Size:', model.details.parameter_size)
|
||||
print(' Quantization Level:', model.details.quantization_level)
|
||||
print('\n')
|
||||
23
examples/multimodal-chat.py
Normal file
23
examples/multimodal-chat.py
Normal file
@ -0,0 +1,23 @@
|
||||
from ollama import chat
|
||||
# from pathlib import Path
|
||||
|
||||
# Pass in the path to the image
|
||||
path = input('Please enter the path to the image: ')
|
||||
|
||||
# You can also pass in base64 encoded image data
|
||||
# img = base64.b64encode(Path(path).read_bytes()).decode()
|
||||
# or the raw bytes
|
||||
# img = Path(path).read_bytes()
|
||||
|
||||
response = chat(
|
||||
model='llama3.2-vision',
|
||||
messages=[
|
||||
{
|
||||
'role': 'user',
|
||||
'content': 'What is in this image? Be concise.',
|
||||
'images': [path],
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
print(response.message.content)
|
||||
27
examples/ps.py
Normal file
27
examples/ps.py
Normal file
@ -0,0 +1,27 @@
|
||||
from ollama import ps, pull, chat
|
||||
from ollama import ProcessResponse
|
||||
|
||||
# Ensure at least one model is loaded
|
||||
response = pull('llama3.2', stream=True)
|
||||
progress_states = set()
|
||||
for progress in response:
|
||||
if progress.get('status') in progress_states:
|
||||
continue
|
||||
progress_states.add(progress.get('status'))
|
||||
print(progress.get('status'))
|
||||
|
||||
print('\n')
|
||||
|
||||
print('Waiting for model to load... \n')
|
||||
chat(model='llama3.2', messages=[{'role': 'user', 'content': 'Why is the sky blue?'}])
|
||||
|
||||
|
||||
response: ProcessResponse = ps()
|
||||
for model in response.models:
|
||||
print('Model: ', model.model)
|
||||
print(' Digest: ', model.digest)
|
||||
print(' Expires at: ', model.expires_at)
|
||||
print(' Size: ', model.size)
|
||||
print(' Size vram: ', model.size_vram)
|
||||
print(' Details: ', model.details)
|
||||
print('\n')
|
||||
@ -1,31 +0,0 @@
|
||||
from ollama import ps, pull, chat
|
||||
|
||||
response = pull('mistral', stream=True)
|
||||
progress_states = set()
|
||||
for progress in response:
|
||||
if progress.get('status') in progress_states:
|
||||
continue
|
||||
progress_states.add(progress.get('status'))
|
||||
print(progress.get('status'))
|
||||
|
||||
print('\n')
|
||||
|
||||
response = chat('mistral', messages=[{'role': 'user', 'content': 'Hello!'}])
|
||||
print(response['message']['content'])
|
||||
|
||||
print('\n')
|
||||
|
||||
response = ps()
|
||||
|
||||
name = response['models'][0]['name']
|
||||
size = response['models'][0]['size']
|
||||
size_vram = response['models'][0]['size_vram']
|
||||
|
||||
if size == size_vram:
|
||||
print(f'{name}: 100% GPU')
|
||||
elif not size_vram:
|
||||
print(f'{name}: 100% CPU')
|
||||
else:
|
||||
size_cpu = size - size_vram
|
||||
cpu_percent = round(size_cpu / size * 100)
|
||||
print(f'{name}: {cpu_percent}% CPU/{100 - cpu_percent}% GPU')
|
||||
@ -1,9 +0,0 @@
|
||||
# pull-progress
|
||||
|
||||
This example emulates `ollama pull` using the Python library and [`tqdm`](https://tqdm.github.io/).
|
||||
|
||||
## Setup
|
||||
|
||||
```shell
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
@ -1 +0,0 @@
|
||||
tqdm==4.66.1
|
||||
@ -3,7 +3,7 @@ from ollama import pull
|
||||
|
||||
|
||||
current_digest, bars = '', {}
|
||||
for progress in pull('mistral', stream=True):
|
||||
for progress in pull('llama3.2', stream=True):
|
||||
digest = progress.get('digest', '')
|
||||
if digest != current_digest and current_digest in bars:
|
||||
bars[current_digest].close()
|
||||
66
examples/tools.py
Normal file
66
examples/tools.py
Normal file
@ -0,0 +1,66 @@
|
||||
from ollama import chat
|
||||
from ollama import ChatResponse
|
||||
|
||||
|
||||
def add_two_numbers(a: int, b: int) -> int:
|
||||
"""
|
||||
Add two numbers
|
||||
|
||||
Args:
|
||||
a (int): The first number
|
||||
b (int): The second number
|
||||
|
||||
Returns:
|
||||
int: The sum of the two numbers
|
||||
"""
|
||||
return a + b
|
||||
|
||||
|
||||
def subtract_two_numbers(a: int, b: int) -> int:
|
||||
"""
|
||||
Subtract two numbers
|
||||
"""
|
||||
return a - b
|
||||
|
||||
|
||||
# Tools can still be manually defined and passed into chat
|
||||
subtract_two_numbers_tool = {
|
||||
'type': 'function',
|
||||
'function': {
|
||||
'name': 'subtract_two_numbers',
|
||||
'description': 'Subtract two numbers',
|
||||
'parameters': {
|
||||
'type': 'object',
|
||||
'required': ['a', 'b'],
|
||||
'properties': {
|
||||
'a': {'type': 'integer', 'description': 'The first number'},
|
||||
'b': {'type': 'integer', 'description': 'The second number'},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
prompt = 'What is three plus one?'
|
||||
print('Prompt:', prompt)
|
||||
|
||||
available_functions = {
|
||||
'add_two_numbers': add_two_numbers,
|
||||
'subtract_two_numbers': subtract_two_numbers,
|
||||
}
|
||||
|
||||
response: ChatResponse = chat(
|
||||
'llama3.1',
|
||||
messages=[{'role': 'user', 'content': prompt}],
|
||||
tools=[add_two_numbers, subtract_two_numbers_tool],
|
||||
)
|
||||
|
||||
if response.message.tool_calls:
|
||||
# There may be multiple tool calls in the response
|
||||
for tool in response.message.tool_calls:
|
||||
# Ensure the function is available, and then call it
|
||||
if function_to_call := available_functions.get(tool.function.name):
|
||||
print('Calling function:', tool.function.name)
|
||||
print('Arguments:', tool.function.arguments)
|
||||
print('Function output:', function_to_call(**tool.function.arguments))
|
||||
else:
|
||||
print('Function', tool.function.name, 'not found')
|
||||
@ -1,3 +0,0 @@
|
||||
# tools
|
||||
|
||||
This example demonstrates how to utilize tool calls with an asynchronous Ollama client and the chat endpoint.
|
||||
@ -1,87 +0,0 @@
|
||||
import json
|
||||
import ollama
|
||||
import asyncio
|
||||
|
||||
|
||||
# Simulates an API call to get flight times
|
||||
# In a real application, this would fetch data from a live database or API
|
||||
def get_flight_times(departure: str, arrival: str) -> str:
|
||||
flights = {
|
||||
'NYC-LAX': {'departure': '08:00 AM', 'arrival': '11:30 AM', 'duration': '5h 30m'},
|
||||
'LAX-NYC': {'departure': '02:00 PM', 'arrival': '10:30 PM', 'duration': '5h 30m'},
|
||||
'LHR-JFK': {'departure': '10:00 AM', 'arrival': '01:00 PM', 'duration': '8h 00m'},
|
||||
'JFK-LHR': {'departure': '09:00 PM', 'arrival': '09:00 AM', 'duration': '7h 00m'},
|
||||
'CDG-DXB': {'departure': '11:00 AM', 'arrival': '08:00 PM', 'duration': '6h 00m'},
|
||||
'DXB-CDG': {'departure': '03:00 AM', 'arrival': '07:30 AM', 'duration': '7h 30m'},
|
||||
}
|
||||
|
||||
key = f'{departure}-{arrival}'.upper()
|
||||
return json.dumps(flights.get(key, {'error': 'Flight not found'}))
|
||||
|
||||
|
||||
async def run(model: str):
|
||||
client = ollama.AsyncClient()
|
||||
# Initialize conversation with a user query
|
||||
messages = [{'role': 'user', 'content': 'What is the flight time from New York (NYC) to Los Angeles (LAX)?'}]
|
||||
|
||||
# First API call: Send the query and function description to the model
|
||||
response = await client.chat(
|
||||
model=model,
|
||||
messages=messages,
|
||||
tools=[
|
||||
{
|
||||
'type': 'function',
|
||||
'function': {
|
||||
'name': 'get_flight_times',
|
||||
'description': 'Get the flight times between two cities',
|
||||
'parameters': {
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'departure': {
|
||||
'type': 'string',
|
||||
'description': 'The departure city (airport code)',
|
||||
},
|
||||
'arrival': {
|
||||
'type': 'string',
|
||||
'description': 'The arrival city (airport code)',
|
||||
},
|
||||
},
|
||||
'required': ['departure', 'arrival'],
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
)
|
||||
|
||||
# Add the model's response to the conversation history
|
||||
messages.append(response['message'])
|
||||
|
||||
# Check if the model decided to use the provided function
|
||||
if not response['message'].get('tool_calls'):
|
||||
print("The model didn't use the function. Its response was:")
|
||||
print(response['message']['content'])
|
||||
return
|
||||
|
||||
# Process function calls made by the model
|
||||
if response['message'].get('tool_calls'):
|
||||
available_functions = {
|
||||
'get_flight_times': get_flight_times,
|
||||
}
|
||||
for tool in response['message']['tool_calls']:
|
||||
function_to_call = available_functions[tool['function']['name']]
|
||||
function_response = function_to_call(tool['function']['arguments']['departure'], tool['function']['arguments']['arrival'])
|
||||
# Add function response to the conversation
|
||||
messages.append(
|
||||
{
|
||||
'role': 'tool',
|
||||
'content': function_response,
|
||||
}
|
||||
)
|
||||
|
||||
# Second API call: Get final response from the model
|
||||
final_response = await client.chat(model=model, messages=messages)
|
||||
print(final_response['message']['content'])
|
||||
|
||||
|
||||
# Run the async function
|
||||
asyncio.run(run('mistral'))
|
||||
@ -319,7 +319,7 @@ class Client(BaseClient):
|
||||
'''
|
||||
return a + b
|
||||
|
||||
client.chat(model='llama3.1:8b', tools=[add_two_numbers], messages=[...])
|
||||
client.chat(model='llama3.2', tools=[add_two_numbers], messages=[...])
|
||||
|
||||
Raises `RequestError` if a model is not provided.
|
||||
|
||||
@ -821,7 +821,7 @@ class AsyncClient(BaseClient):
|
||||
'''
|
||||
return a + b
|
||||
|
||||
await client.chat(model='llama3.1:8b', tools=[add_two_numbers], messages=[...])
|
||||
await client.chat(model='llama3.2', tools=[add_two_numbers], messages=[...])
|
||||
|
||||
Raises `RequestError` if a model is not provided.
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user