mirror of
https://github.com/ollama/ollama-python.git
synced 2026-03-14 12:24:36 +08:00
Update examples and readmes
This commit is contained in:
parent
238f142a5c
commit
ffe7588093
123
README.md
123
README.md
@ -4,8 +4,9 @@ The Ollama Python library provides the easiest way to integrate Python 3.8+ proj
|
|||||||
|
|
||||||
## Prerequisites
|
## Prerequisites
|
||||||
|
|
||||||
- Install [Ollama](https://ollama.com/download)
|
- [Ollama](https://ollama.com/download) should be installed and running
|
||||||
- Pull a model: `ollama pull <model>` See [Ollama models](https://ollama.com/models)
|
- Pull a model to use with the library: `ollama pull <model>` e.g. `ollama pull llama3.1`
|
||||||
|
- See [Ollama models](https://ollama.com/models) for more information on the models available.
|
||||||
|
|
||||||
## Install
|
## Install
|
||||||
|
|
||||||
@ -16,24 +17,32 @@ pip install ollama
|
|||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
```python
|
```python
|
||||||
import ollama
|
from ollama import chat
|
||||||
response = ollama.chat(model='llama3.1', messages=[
|
from ollama._types import ChatResponse
|
||||||
|
|
||||||
|
response: ChatResponse = chat(model='llama3.1', messages=[
|
||||||
{
|
{
|
||||||
'role': 'user',
|
'role': 'user',
|
||||||
'content': 'Why is the sky blue?',
|
'content': 'Why is the sky blue?',
|
||||||
},
|
},
|
||||||
])
|
])
|
||||||
print(response['message']['content'])
|
print(response['message']['content'])
|
||||||
|
# or access fields directly from the response object
|
||||||
|
print(response.message.content)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
See [_types.py](ollama/_types.py) for more information on the response types.
|
||||||
|
|
||||||
## Streaming responses
|
## Streaming responses
|
||||||
|
|
||||||
Response streaming can be enabled by setting `stream=True`, modifying function calls to return a Python generator where each part is an object in the stream.
|
Response streaming can be enabled by setting `stream=True`
|
||||||
|
|
||||||
|
Streaming Tool/Function calling is not yet supported.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
import ollama
|
from ollama import chat
|
||||||
|
|
||||||
stream = ollama.chat(
|
stream = chat(
|
||||||
model='llama3.1',
|
model='llama3.1',
|
||||||
messages=[{'role': 'user', 'content': 'Why is the sky blue?'}],
|
messages=[{'role': 'user', 'content': 'Why is the sky blue?'}],
|
||||||
stream=True,
|
stream=True,
|
||||||
@ -43,6 +52,62 @@ for chunk in stream:
|
|||||||
print(chunk['message']['content'], end='', flush=True)
|
print(chunk['message']['content'], end='', flush=True)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Custom client
|
||||||
|
|
||||||
|
A custom client can be created with the following fields:
|
||||||
|
|
||||||
|
- `host`: The Ollama host (default: `http://localhost:11434`)
|
||||||
|
- `timeout`: The timeout for requests (default: `None`)
|
||||||
|
- `follow_redirects`: Whether to follow redirects (default: `True`)
|
||||||
|
- `headers`: Additional headers to send with requests (default: `{}`)
|
||||||
|
|
||||||
|
```python
|
||||||
|
from ollama import Client
|
||||||
|
client = Client()
|
||||||
|
# or
|
||||||
|
client = Client(
|
||||||
|
host='http://localhost:11434',
|
||||||
|
timeout=None,
|
||||||
|
follow_redirects=True,
|
||||||
|
headers={'x-some-header': 'some-value'}
|
||||||
|
)
|
||||||
|
response = client.chat(model='llama3.1', messages=[
|
||||||
|
{
|
||||||
|
'role': 'user',
|
||||||
|
'content': 'Why is the sky blue?',
|
||||||
|
},
|
||||||
|
])
|
||||||
|
```
|
||||||
|
|
||||||
|
## Async client
|
||||||
|
|
||||||
|
The `AsyncClient` class is used to make asynchronous requests. It can be configured with the same fields as the `Client` class.
|
||||||
|
|
||||||
|
```python
|
||||||
|
import asyncio
|
||||||
|
from ollama import AsyncClient
|
||||||
|
|
||||||
|
async def chat():
|
||||||
|
message = {'role': 'user', 'content': 'Why is the sky blue?'}
|
||||||
|
response = await AsyncClient().chat(model='llama3.1', messages=[message])
|
||||||
|
|
||||||
|
asyncio.run(chat())
|
||||||
|
```
|
||||||
|
|
||||||
|
Setting `stream=True` modifies functions to return a Python asynchronous generator:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import asyncio
|
||||||
|
from ollama import AsyncClient
|
||||||
|
|
||||||
|
async def chat():
|
||||||
|
message = {'role': 'user', 'content': 'Why is the sky blue?'}
|
||||||
|
async for part in await AsyncClient().chat(model='llama3.1', messages=[message], stream=True):
|
||||||
|
print(part['message']['content'], end='', flush=True)
|
||||||
|
|
||||||
|
asyncio.run(chat())
|
||||||
|
```
|
||||||
|
|
||||||
## API
|
## API
|
||||||
|
|
||||||
The Ollama Python library's API is designed around the [Ollama REST API](https://github.com/ollama/ollama/blob/main/docs/api.md)
|
The Ollama Python library's API is designed around the [Ollama REST API](https://github.com/ollama/ollama/blob/main/docs/api.md)
|
||||||
@ -124,50 +189,6 @@ ollama.embed(model='llama3.1', input=['The sky is blue because of rayleigh scatt
|
|||||||
ollama.ps()
|
ollama.ps()
|
||||||
```
|
```
|
||||||
|
|
||||||
## Custom client
|
|
||||||
|
|
||||||
A custom client can be created with the following fields:
|
|
||||||
|
|
||||||
- `host`: The Ollama host to connect to
|
|
||||||
- `timeout`: The timeout for requests
|
|
||||||
|
|
||||||
```python
|
|
||||||
from ollama import Client
|
|
||||||
client = Client(host='http://localhost:11434')
|
|
||||||
response = client.chat(model='llama3.1', messages=[
|
|
||||||
{
|
|
||||||
'role': 'user',
|
|
||||||
'content': 'Why is the sky blue?',
|
|
||||||
},
|
|
||||||
])
|
|
||||||
```
|
|
||||||
|
|
||||||
## Async client
|
|
||||||
|
|
||||||
```python
|
|
||||||
import asyncio
|
|
||||||
from ollama import AsyncClient
|
|
||||||
|
|
||||||
async def chat():
|
|
||||||
message = {'role': 'user', 'content': 'Why is the sky blue?'}
|
|
||||||
response = await AsyncClient().chat(model='llama3.1', messages=[message])
|
|
||||||
|
|
||||||
asyncio.run(chat())
|
|
||||||
```
|
|
||||||
|
|
||||||
Setting `stream=True` modifies functions to return a Python asynchronous generator:
|
|
||||||
|
|
||||||
```python
|
|
||||||
import asyncio
|
|
||||||
from ollama import AsyncClient
|
|
||||||
|
|
||||||
async def chat():
|
|
||||||
message = {'role': 'user', 'content': 'Why is the sky blue?'}
|
|
||||||
async for part in await AsyncClient().chat(model='llama3.1', messages=[message], stream=True):
|
|
||||||
print(part['message']['content'], end='', flush=True)
|
|
||||||
|
|
||||||
asyncio.run(chat())
|
|
||||||
```
|
|
||||||
|
|
||||||
## Errors
|
## Errors
|
||||||
|
|
||||||
|
|||||||
@ -9,53 +9,47 @@ cd examples/
|
|||||||
python3 <example>.py
|
python3 <example>.py
|
||||||
```
|
```
|
||||||
|
|
||||||
### Chat
|
### Chat - Chat with a model
|
||||||
- [chat.py](chat.py) - Basic chat with model
|
- [chat.py](chat.py)
|
||||||
- [chat-stream.py](chat-stream.py) - Stream chat with model
|
- [async-chat.py](async-chat.py)
|
||||||
- [async-chat.py](async-chat.py) - Async chat with model
|
- [chat-stream.py](chat-stream.py) - Streamed outputs
|
||||||
|
|
||||||
### Generate
|
### Generate - Generate text with a model
|
||||||
- [generate.py](generate.py) - Generate text with model
|
- [generate.py](generate.py)
|
||||||
- [generate-stream.py](generate-stream.py) - Stream generate text with model
|
- [async-generate.py](async-generate.py)
|
||||||
- [async-generate.py](async-generate.py) - Async generate text with model
|
- [generate-stream.py](generate-stream.py) - Streamed outputs
|
||||||
|
|
||||||
### List
|
### Tools/Function Calling - Call a function with a model
|
||||||
- [list.py](list.py) - List all downloaded models and their properties
|
- [tools.py](tools.py) - Simple example of Tools/Function Calling
|
||||||
- [async-list.py](async-list.py) - Async list all downloaded models and their properties
|
- [async-tools.py](async-tools.py)
|
||||||
|
|
||||||
### Fill in the middle
|
### Multimodal - Chat with a multimodal model
|
||||||
- [fill-in-middle.py](fill-in-middle.py) - Fill in the middle with model
|
- [multimodal_chat.py](multimodal_chat.py)
|
||||||
|
- [multimodal_generate.py](multimodal_generate.py)
|
||||||
|
|
||||||
|
|
||||||
### Multimodal
|
|
||||||
- [multimodal.py](multimodal.py) - Multimodal chat with model
|
|
||||||
|
|
||||||
### Pull Progress
|
### Ollama List - List all downloaded models and their properties
|
||||||
|
- [list.py](list.py)
|
||||||
|
- [async-list.py](async-list.py)
|
||||||
|
|
||||||
|
### Ollama Pull - Pull a model from Ollama
|
||||||
Requirement: `pip install tqdm`
|
Requirement: `pip install tqdm`
|
||||||
|
|
||||||
- [pull-progress.py](pull-progress.py) - Pull progress with model
|
- [pull.py](pull.py)
|
||||||
|
|
||||||
### Ollama create (create a model)
|
### Ollama Create - Create a model from a Modelfile
|
||||||
- [create.py](create.py) - Create a model
|
|
||||||
|
|
||||||
### Ollama ps (show model status - cpu/gpu usage)
|
|
||||||
- [ollama-ps.py](ollama-ps.py) - Ollama ps
|
|
||||||
|
|
||||||
### Tools/Function Calling
|
|
||||||
- [tools.py](tools.py) - Simple example of Tools/Function Calling
|
|
||||||
- [async-tools.py](async-tools.py) - Async example of Tools/Function Calling
|
|
||||||
|
|
||||||
## Configuring Clients
|
|
||||||
Custom parameters can be passed to the client when initializing:
|
|
||||||
```python
|
```python
|
||||||
import ollama
|
python create.py <model> <modelfile>
|
||||||
client = ollama.Client(
|
|
||||||
host='http://localhost:11434',
|
|
||||||
timeout=10.0, # Default: None
|
|
||||||
follow_redirects=True, # Default: True
|
|
||||||
headers={'x-some-header': 'some-value'}
|
|
||||||
)
|
|
||||||
```
|
```
|
||||||
|
- [create.py](create.py)
|
||||||
|
|
||||||
|
See [ollama/docs/modelfile.md](https://github.com/ollama/ollama/blob/main/docs/modelfile.md) for more information on the Modelfile format.
|
||||||
|
|
||||||
|
### Fill in the middle
|
||||||
|
- [fill-in-middle.py](fill-in-middle.py) - Given a prefix and suffix, fill in the middle
|
||||||
|
|
||||||
|
### Ollama ps - Show model status with CPU/GPU usage
|
||||||
|
- [ps.py](ps.py)
|
||||||
|
|
||||||
Similarly, the `AsyncClient` class can be configured with the same parameters.
|
|
||||||
|
|
||||||
|
|||||||
@ -8,13 +8,23 @@ if len(args) == 2:
|
|||||||
# create from local file
|
# create from local file
|
||||||
path = args[1]
|
path = args[1]
|
||||||
else:
|
else:
|
||||||
print('usage: python main.py <name> <filepath>')
|
print('usage: python create.py <name> <filepath>')
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
# TODO: update to real Modelfile values
|
# TODO: update to real Modelfile values
|
||||||
modelfile = f"""
|
modelfile = f"""
|
||||||
FROM {path}
|
FROM {path}
|
||||||
"""
|
"""
|
||||||
|
example_modelfile = """
|
||||||
|
FROM llama3.2
|
||||||
|
# sets the temperature to 1 [higher is more creative, lower is more coherent]
|
||||||
|
PARAMETER temperature 1
|
||||||
|
# sets the context window size to 4096, this controls how many tokens the LLM can use as context to generate the next token
|
||||||
|
PARAMETER num_ctx 4096
|
||||||
|
|
||||||
|
# sets a custom system message to specify the behavior of the chat assistant
|
||||||
|
SYSTEM You are Mario from super mario bros, acting as an assistant.
|
||||||
|
"""
|
||||||
|
|
||||||
for response in create(model=args[0], modelfile=modelfile, stream=True):
|
for response in create(model=args[0], modelfile=modelfile, stream=True):
|
||||||
print(response['status'])
|
print(response['status'])
|
||||||
|
|||||||
17
examples/multimodal_chat.py
Normal file
17
examples/multimodal_chat.py
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
from ollama import Client
|
||||||
|
|
||||||
|
client = Client()
|
||||||
|
path = ''
|
||||||
|
# Passing in wrong path for image error sucks
|
||||||
|
response = client.chat(
|
||||||
|
model='llama3.2-vision',
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
'role': 'user',
|
||||||
|
'content': 'What is in this image? Be concise. Respond with the structure {"focal": "...", "subject": "...", "background": "..."}',
|
||||||
|
'images': [path],
|
||||||
|
}
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response.message.content)
|
||||||
@ -1,5 +1,7 @@
|
|||||||
from ollama import ps, pull, chat
|
from ollama import ps, pull
|
||||||
|
from ollama._types import ProcessResponse
|
||||||
|
|
||||||
|
# Ensure at least one model is loaded
|
||||||
response = pull('llama3.1', stream=True)
|
response = pull('llama3.1', stream=True)
|
||||||
progress_states = set()
|
progress_states = set()
|
||||||
for progress in response:
|
for progress in response:
|
||||||
@ -10,22 +12,14 @@ for progress in response:
|
|||||||
|
|
||||||
print('\n')
|
print('\n')
|
||||||
|
|
||||||
response = chat('llama3.1', messages=[{'role': 'user', 'content': 'Hello!'}])
|
|
||||||
print(response['message']['content'])
|
|
||||||
|
|
||||||
print('\n')
|
response: ProcessResponse = ps()
|
||||||
|
for model in response.models:
|
||||||
|
print(f'Model: {model.model}')
|
||||||
|
print(f'Digest: {model.digest}')
|
||||||
|
print(f'Expires at: {model.expires_at}')
|
||||||
|
print(f'Size: {model.size}')
|
||||||
|
print(f'Size vram: {model.size_vram}')
|
||||||
|
print(f'Details: {model.details}')
|
||||||
|
|
||||||
response = ps()
|
print('---' * 10)
|
||||||
|
|
||||||
name = response['models'][0]['name']
|
|
||||||
size = response['models'][0]['size']
|
|
||||||
size_vram = response['models'][0]['size_vram']
|
|
||||||
|
|
||||||
if size == size_vram:
|
|
||||||
print(f'{name}: 100% GPU')
|
|
||||||
elif not size_vram:
|
|
||||||
print(f'{name}: 100% CPU')
|
|
||||||
else:
|
|
||||||
size_cpu = size - size_vram
|
|
||||||
cpu_percent = round(size_cpu / size * 100)
|
|
||||||
print(f'{name}: {cpu_percent}% CPU/{100 - cpu_percent}% GPU')
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user