mirror of
https://github.com/ollama/ollama-python.git
synced 2026-01-14 06:07:17 +08:00
Update examples and readmes
This commit is contained in:
parent
238f142a5c
commit
ffe7588093
123
README.md
123
README.md
@ -4,8 +4,9 @@ The Ollama Python library provides the easiest way to integrate Python 3.8+ proj
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Install [Ollama](https://ollama.com/download)
|
||||
- Pull a model: `ollama pull <model>` See [Ollama models](https://ollama.com/models)
|
||||
- [Ollama](https://ollama.com/download) should be installed and running
|
||||
- Pull a model to use with the library: `ollama pull <model>` e.g. `ollama pull llama3.1`
|
||||
- See [Ollama models](https://ollama.com/models) for more information on the models available.
|
||||
|
||||
## Install
|
||||
|
||||
@ -16,24 +17,32 @@ pip install ollama
|
||||
## Usage
|
||||
|
||||
```python
|
||||
import ollama
|
||||
response = ollama.chat(model='llama3.1', messages=[
|
||||
from ollama import chat
|
||||
from ollama._types import ChatResponse
|
||||
|
||||
response: ChatResponse = chat(model='llama3.1', messages=[
|
||||
{
|
||||
'role': 'user',
|
||||
'content': 'Why is the sky blue?',
|
||||
},
|
||||
])
|
||||
print(response['message']['content'])
|
||||
# or access fields directly from the response object
|
||||
print(response.message.content)
|
||||
```
|
||||
|
||||
See [_types.py](ollama/_types.py) for more information on the response types.
|
||||
|
||||
## Streaming responses
|
||||
|
||||
Response streaming can be enabled by setting `stream=True`, modifying function calls to return a Python generator where each part is an object in the stream.
|
||||
Response streaming can be enabled by setting `stream=True`
|
||||
|
||||
Streaming Tool/Function calling is not yet supported.
|
||||
|
||||
```python
|
||||
import ollama
|
||||
from ollama import chat
|
||||
|
||||
stream = ollama.chat(
|
||||
stream = chat(
|
||||
model='llama3.1',
|
||||
messages=[{'role': 'user', 'content': 'Why is the sky blue?'}],
|
||||
stream=True,
|
||||
@ -43,6 +52,62 @@ for chunk in stream:
|
||||
print(chunk['message']['content'], end='', flush=True)
|
||||
```
|
||||
|
||||
## Custom client
|
||||
|
||||
A custom client can be created with the following fields:
|
||||
|
||||
- `host`: The Ollama host (default: `http://localhost:11434`)
|
||||
- `timeout`: The timeout for requests (default: `None`)
|
||||
- `follow_redirects`: Whether to follow redirects (default: `True`)
|
||||
- `headers`: Additional headers to send with requests (default: `{}`)
|
||||
|
||||
```python
|
||||
from ollama import Client
|
||||
client = Client()
|
||||
# or
|
||||
client = Client(
|
||||
host='http://localhost:11434',
|
||||
timeout=None,
|
||||
follow_redirects=True,
|
||||
headers={'x-some-header': 'some-value'}
|
||||
)
|
||||
response = client.chat(model='llama3.1', messages=[
|
||||
{
|
||||
'role': 'user',
|
||||
'content': 'Why is the sky blue?',
|
||||
},
|
||||
])
|
||||
```
|
||||
|
||||
## Async client
|
||||
|
||||
The `AsyncClient` class is used to make asynchronous requests. It can be configured with the same fields as the `Client` class.
|
||||
|
||||
```python
|
||||
import asyncio
|
||||
from ollama import AsyncClient
|
||||
|
||||
async def chat():
|
||||
message = {'role': 'user', 'content': 'Why is the sky blue?'}
|
||||
response = await AsyncClient().chat(model='llama3.1', messages=[message])
|
||||
|
||||
asyncio.run(chat())
|
||||
```
|
||||
|
||||
Setting `stream=True` modifies functions to return a Python asynchronous generator:
|
||||
|
||||
```python
|
||||
import asyncio
|
||||
from ollama import AsyncClient
|
||||
|
||||
async def chat():
|
||||
message = {'role': 'user', 'content': 'Why is the sky blue?'}
|
||||
async for part in await AsyncClient().chat(model='llama3.1', messages=[message], stream=True):
|
||||
print(part['message']['content'], end='', flush=True)
|
||||
|
||||
asyncio.run(chat())
|
||||
```
|
||||
|
||||
## API
|
||||
|
||||
The Ollama Python library's API is designed around the [Ollama REST API](https://github.com/ollama/ollama/blob/main/docs/api.md)
|
||||
@ -124,50 +189,6 @@ ollama.embed(model='llama3.1', input=['The sky is blue because of rayleigh scatt
|
||||
ollama.ps()
|
||||
```
|
||||
|
||||
## Custom client
|
||||
|
||||
A custom client can be created with the following fields:
|
||||
|
||||
- `host`: The Ollama host to connect to
|
||||
- `timeout`: The timeout for requests
|
||||
|
||||
```python
|
||||
from ollama import Client
|
||||
client = Client(host='http://localhost:11434')
|
||||
response = client.chat(model='llama3.1', messages=[
|
||||
{
|
||||
'role': 'user',
|
||||
'content': 'Why is the sky blue?',
|
||||
},
|
||||
])
|
||||
```
|
||||
|
||||
## Async client
|
||||
|
||||
```python
|
||||
import asyncio
|
||||
from ollama import AsyncClient
|
||||
|
||||
async def chat():
|
||||
message = {'role': 'user', 'content': 'Why is the sky blue?'}
|
||||
response = await AsyncClient().chat(model='llama3.1', messages=[message])
|
||||
|
||||
asyncio.run(chat())
|
||||
```
|
||||
|
||||
Setting `stream=True` modifies functions to return a Python asynchronous generator:
|
||||
|
||||
```python
|
||||
import asyncio
|
||||
from ollama import AsyncClient
|
||||
|
||||
async def chat():
|
||||
message = {'role': 'user', 'content': 'Why is the sky blue?'}
|
||||
async for part in await AsyncClient().chat(model='llama3.1', messages=[message], stream=True):
|
||||
print(part['message']['content'], end='', flush=True)
|
||||
|
||||
asyncio.run(chat())
|
||||
```
|
||||
|
||||
## Errors
|
||||
|
||||
|
||||
@ -9,53 +9,47 @@ cd examples/
|
||||
python3 <example>.py
|
||||
```
|
||||
|
||||
### Chat
|
||||
- [chat.py](chat.py) - Basic chat with model
|
||||
- [chat-stream.py](chat-stream.py) - Stream chat with model
|
||||
- [async-chat.py](async-chat.py) - Async chat with model
|
||||
### Chat - Chat with a model
|
||||
- [chat.py](chat.py)
|
||||
- [async-chat.py](async-chat.py)
|
||||
- [chat-stream.py](chat-stream.py) - Streamed outputs
|
||||
|
||||
### Generate
|
||||
- [generate.py](generate.py) - Generate text with model
|
||||
- [generate-stream.py](generate-stream.py) - Stream generate text with model
|
||||
- [async-generate.py](async-generate.py) - Async generate text with model
|
||||
### Generate - Generate text with a model
|
||||
- [generate.py](generate.py)
|
||||
- [async-generate.py](async-generate.py)
|
||||
- [generate-stream.py](generate-stream.py) - Streamed outputs
|
||||
|
||||
### List
|
||||
- [list.py](list.py) - List all downloaded models and their properties
|
||||
- [async-list.py](async-list.py) - Async list all downloaded models and their properties
|
||||
### Tools/Function Calling - Call a function with a model
|
||||
- [tools.py](tools.py) - Simple example of Tools/Function Calling
|
||||
- [async-tools.py](async-tools.py)
|
||||
|
||||
### Fill in the middle
|
||||
- [fill-in-middle.py](fill-in-middle.py) - Fill in the middle with model
|
||||
### Multimodal - Chat with a multimodal model
|
||||
- [multimodal_chat.py](multimodal_chat.py)
|
||||
- [multimodal_generate.py](multimodal_generate.py)
|
||||
|
||||
|
||||
### Multimodal
|
||||
- [multimodal.py](multimodal.py) - Multimodal chat with model
|
||||
|
||||
### Pull Progress
|
||||
### Ollama List - List all downloaded models and their properties
|
||||
- [list.py](list.py)
|
||||
- [async-list.py](async-list.py)
|
||||
|
||||
### Ollama Pull - Pull a model from Ollama
|
||||
Requirement: `pip install tqdm`
|
||||
|
||||
- [pull-progress.py](pull-progress.py) - Pull progress with model
|
||||
- [pull.py](pull.py)
|
||||
|
||||
### Ollama create (create a model)
|
||||
- [create.py](create.py) - Create a model
|
||||
|
||||
### Ollama ps (show model status - cpu/gpu usage)
|
||||
- [ollama-ps.py](ollama-ps.py) - Ollama ps
|
||||
|
||||
### Tools/Function Calling
|
||||
- [tools.py](tools.py) - Simple example of Tools/Function Calling
|
||||
- [async-tools.py](async-tools.py) - Async example of Tools/Function Calling
|
||||
|
||||
## Configuring Clients
|
||||
Custom parameters can be passed to the client when initializing:
|
||||
### Ollama Create - Create a model from a Modelfile
|
||||
```python
|
||||
import ollama
|
||||
client = ollama.Client(
|
||||
host='http://localhost:11434',
|
||||
timeout=10.0, # Default: None
|
||||
follow_redirects=True, # Default: True
|
||||
headers={'x-some-header': 'some-value'}
|
||||
)
|
||||
python create.py <model> <modelfile>
|
||||
```
|
||||
- [create.py](create.py)
|
||||
|
||||
See [ollama/docs/modelfile.md](https://github.com/ollama/ollama/blob/main/docs/modelfile.md) for more information on the Modelfile format.
|
||||
|
||||
### Fill in the middle
|
||||
- [fill-in-middle.py](fill-in-middle.py) - Given a prefix and suffix, fill in the middle
|
||||
|
||||
### Ollama ps - Show model status with CPU/GPU usage
|
||||
- [ps.py](ps.py)
|
||||
|
||||
Similarly, the `AsyncClient` class can be configured with the same parameters.
|
||||
|
||||
|
||||
@ -8,13 +8,23 @@ if len(args) == 2:
|
||||
# create from local file
|
||||
path = args[1]
|
||||
else:
|
||||
print('usage: python main.py <name> <filepath>')
|
||||
print('usage: python create.py <name> <filepath>')
|
||||
sys.exit(1)
|
||||
|
||||
# TODO: update to real Modelfile values
|
||||
modelfile = f"""
|
||||
FROM {path}
|
||||
"""
|
||||
example_modelfile = """
|
||||
FROM llama3.2
|
||||
# sets the temperature to 1 [higher is more creative, lower is more coherent]
|
||||
PARAMETER temperature 1
|
||||
# sets the context window size to 4096, this controls how many tokens the LLM can use as context to generate the next token
|
||||
PARAMETER num_ctx 4096
|
||||
|
||||
# sets a custom system message to specify the behavior of the chat assistant
|
||||
SYSTEM You are Mario from super mario bros, acting as an assistant.
|
||||
"""
|
||||
|
||||
for response in create(model=args[0], modelfile=modelfile, stream=True):
|
||||
print(response['status'])
|
||||
|
||||
17
examples/multimodal_chat.py
Normal file
17
examples/multimodal_chat.py
Normal file
@ -0,0 +1,17 @@
|
||||
from ollama import Client
|
||||
|
||||
client = Client()
|
||||
path = ''
|
||||
# Passing in wrong path for image error sucks
|
||||
response = client.chat(
|
||||
model='llama3.2-vision',
|
||||
messages=[
|
||||
{
|
||||
'role': 'user',
|
||||
'content': 'What is in this image? Be concise. Respond with the structure {"focal": "...", "subject": "...", "background": "..."}',
|
||||
'images': [path],
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
print(response.message.content)
|
||||
@ -1,5 +1,7 @@
|
||||
from ollama import ps, pull, chat
|
||||
from ollama import ps, pull
|
||||
from ollama._types import ProcessResponse
|
||||
|
||||
# Ensure at least one model is loaded
|
||||
response = pull('llama3.1', stream=True)
|
||||
progress_states = set()
|
||||
for progress in response:
|
||||
@ -10,22 +12,14 @@ for progress in response:
|
||||
|
||||
print('\n')
|
||||
|
||||
response = chat('llama3.1', messages=[{'role': 'user', 'content': 'Hello!'}])
|
||||
print(response['message']['content'])
|
||||
|
||||
print('\n')
|
||||
response: ProcessResponse = ps()
|
||||
for model in response.models:
|
||||
print(f'Model: {model.model}')
|
||||
print(f'Digest: {model.digest}')
|
||||
print(f'Expires at: {model.expires_at}')
|
||||
print(f'Size: {model.size}')
|
||||
print(f'Size vram: {model.size_vram}')
|
||||
print(f'Details: {model.details}')
|
||||
|
||||
response = ps()
|
||||
|
||||
name = response['models'][0]['name']
|
||||
size = response['models'][0]['size']
|
||||
size_vram = response['models'][0]['size_vram']
|
||||
|
||||
if size == size_vram:
|
||||
print(f'{name}: 100% GPU')
|
||||
elif not size_vram:
|
||||
print(f'{name}: 100% CPU')
|
||||
else:
|
||||
size_cpu = size - size_vram
|
||||
cpu_percent = round(size_cpu / size * 100)
|
||||
print(f'{name}: {cpu_percent}% CPU/{100 - cpu_percent}% GPU')
|
||||
print('---' * 10)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user