Update examples and readmes

This commit is contained in:
ParthSareen 2024-11-19 14:31:18 -08:00
parent 238f142a5c
commit ffe7588093
7 changed files with 143 additions and 107 deletions

123
README.md
View File

@ -4,8 +4,9 @@ The Ollama Python library provides the easiest way to integrate Python 3.8+ proj
## Prerequisites
- Install [Ollama](https://ollama.com/download)
- Pull a model: `ollama pull <model>` See [Ollama models](https://ollama.com/models)
- [Ollama](https://ollama.com/download) should be installed and running
- Pull a model to use with the library: `ollama pull <model>` e.g. `ollama pull llama3.1`
- See [Ollama models](https://ollama.com/models) for more information on the models available.
## Install
@ -16,24 +17,32 @@ pip install ollama
## Usage
```python
import ollama
response = ollama.chat(model='llama3.1', messages=[
from ollama import chat
from ollama._types import ChatResponse
response: ChatResponse = chat(model='llama3.1', messages=[
{
'role': 'user',
'content': 'Why is the sky blue?',
},
])
print(response['message']['content'])
# or access fields directly from the response object
print(response.message.content)
```
See [_types.py](ollama/_types.py) for more information on the response types.
## Streaming responses
Response streaming can be enabled by setting `stream=True`, modifying function calls to return a Python generator where each part is an object in the stream.
Response streaming can be enabled by setting `stream=True`
Streaming Tool/Function calling is not yet supported.
```python
import ollama
from ollama import chat
stream = ollama.chat(
stream = chat(
model='llama3.1',
messages=[{'role': 'user', 'content': 'Why is the sky blue?'}],
stream=True,
@ -43,6 +52,62 @@ for chunk in stream:
print(chunk['message']['content'], end='', flush=True)
```
## Custom client
A custom client can be created with the following fields:
- `host`: The Ollama host (default: `http://localhost:11434`)
- `timeout`: The timeout for requests (default: `None`)
- `follow_redirects`: Whether to follow redirects (default: `True`)
- `headers`: Additional headers to send with requests (default: `{}`)
```python
from ollama import Client
client = Client()
# or
client = Client(
host='http://localhost:11434',
timeout=None,
follow_redirects=True,
headers={'x-some-header': 'some-value'}
)
response = client.chat(model='llama3.1', messages=[
{
'role': 'user',
'content': 'Why is the sky blue?',
},
])
```
## Async client
The `AsyncClient` class is used to make asynchronous requests. It can be configured with the same fields as the `Client` class.
```python
import asyncio
from ollama import AsyncClient
async def chat():
message = {'role': 'user', 'content': 'Why is the sky blue?'}
response = await AsyncClient().chat(model='llama3.1', messages=[message])
asyncio.run(chat())
```
Setting `stream=True` modifies functions to return a Python asynchronous generator:
```python
import asyncio
from ollama import AsyncClient
async def chat():
message = {'role': 'user', 'content': 'Why is the sky blue?'}
async for part in await AsyncClient().chat(model='llama3.1', messages=[message], stream=True):
print(part['message']['content'], end='', flush=True)
asyncio.run(chat())
```
## API
The Ollama Python library's API is designed around the [Ollama REST API](https://github.com/ollama/ollama/blob/main/docs/api.md)
@ -124,50 +189,6 @@ ollama.embed(model='llama3.1', input=['The sky is blue because of rayleigh scatt
ollama.ps()
```
## Custom client
A custom client can be created with the following fields:
- `host`: The Ollama host to connect to
- `timeout`: The timeout for requests
```python
from ollama import Client
client = Client(host='http://localhost:11434')
response = client.chat(model='llama3.1', messages=[
{
'role': 'user',
'content': 'Why is the sky blue?',
},
])
```
## Async client
```python
import asyncio
from ollama import AsyncClient
async def chat():
message = {'role': 'user', 'content': 'Why is the sky blue?'}
response = await AsyncClient().chat(model='llama3.1', messages=[message])
asyncio.run(chat())
```
Setting `stream=True` modifies functions to return a Python asynchronous generator:
```python
import asyncio
from ollama import AsyncClient
async def chat():
message = {'role': 'user', 'content': 'Why is the sky blue?'}
async for part in await AsyncClient().chat(model='llama3.1', messages=[message], stream=True):
print(part['message']['content'], end='', flush=True)
asyncio.run(chat())
```
## Errors

View File

@ -9,53 +9,47 @@ cd examples/
python3 <example>.py
```
### Chat
- [chat.py](chat.py) - Basic chat with model
- [chat-stream.py](chat-stream.py) - Stream chat with model
- [async-chat.py](async-chat.py) - Async chat with model
### Chat - Chat with a model
- [chat.py](chat.py)
- [async-chat.py](async-chat.py)
- [chat-stream.py](chat-stream.py) - Streamed outputs
### Generate
- [generate.py](generate.py) - Generate text with model
- [generate-stream.py](generate-stream.py) - Stream generate text with model
- [async-generate.py](async-generate.py) - Async generate text with model
### Generate - Generate text with a model
- [generate.py](generate.py)
- [async-generate.py](async-generate.py)
- [generate-stream.py](generate-stream.py) - Streamed outputs
### List
- [list.py](list.py) - List all downloaded models and their properties
- [async-list.py](async-list.py) - Async list all downloaded models and their properties
### Tools/Function Calling - Call a function with a model
- [tools.py](tools.py) - Simple example of Tools/Function Calling
- [async-tools.py](async-tools.py)
### Fill in the middle
- [fill-in-middle.py](fill-in-middle.py) - Fill in the middle with model
### Multimodal - Chat with a multimodal model
- [multimodal_chat.py](multimodal_chat.py)
- [multimodal_generate.py](multimodal_generate.py)
### Multimodal
- [multimodal.py](multimodal.py) - Multimodal chat with model
### Pull Progress
### Ollama List - List all downloaded models and their properties
- [list.py](list.py)
- [async-list.py](async-list.py)
### Ollama Pull - Pull a model from Ollama
Requirement: `pip install tqdm`
- [pull-progress.py](pull-progress.py) - Pull progress with model
- [pull.py](pull.py)
### Ollama create (create a model)
- [create.py](create.py) - Create a model
### Ollama ps (show model status - cpu/gpu usage)
- [ollama-ps.py](ollama-ps.py) - Ollama ps
### Tools/Function Calling
- [tools.py](tools.py) - Simple example of Tools/Function Calling
- [async-tools.py](async-tools.py) - Async example of Tools/Function Calling
## Configuring Clients
Custom parameters can be passed to the client when initializing:
### Ollama Create - Create a model from a Modelfile
```python
import ollama
client = ollama.Client(
host='http://localhost:11434',
timeout=10.0, # Default: None
follow_redirects=True, # Default: True
headers={'x-some-header': 'some-value'}
)
python create.py <model> <modelfile>
```
- [create.py](create.py)
See [ollama/docs/modelfile.md](https://github.com/ollama/ollama/blob/main/docs/modelfile.md) for more information on the Modelfile format.
### Fill in the middle
- [fill-in-middle.py](fill-in-middle.py) - Given a prefix and suffix, fill in the middle
### Ollama ps - Show model status with CPU/GPU usage
- [ps.py](ps.py)
Similarly, the `AsyncClient` class can be configured with the same parameters.

View File

@ -8,13 +8,23 @@ if len(args) == 2:
# create from local file
path = args[1]
else:
print('usage: python main.py <name> <filepath>')
print('usage: python create.py <name> <filepath>')
sys.exit(1)
# TODO: update to real Modelfile values
modelfile = f"""
FROM {path}
"""
example_modelfile = """
FROM llama3.2
# sets the temperature to 1 [higher is more creative, lower is more coherent]
PARAMETER temperature 1
# sets the context window size to 4096, this controls how many tokens the LLM can use as context to generate the next token
PARAMETER num_ctx 4096
# sets a custom system message to specify the behavior of the chat assistant
SYSTEM You are Mario from super mario bros, acting as an assistant.
"""
for response in create(model=args[0], modelfile=modelfile, stream=True):
print(response['status'])

View File

@ -0,0 +1,17 @@
from ollama import Client
client = Client()
path = ''
# Passing in wrong path for image error sucks
response = client.chat(
model='llama3.2-vision',
messages=[
{
'role': 'user',
'content': 'What is in this image? Be concise. Respond with the structure {"focal": "...", "subject": "...", "background": "..."}',
'images': [path],
}
],
)
print(response.message.content)

View File

@ -1,5 +1,7 @@
from ollama import ps, pull, chat
from ollama import ps, pull
from ollama._types import ProcessResponse
# Ensure at least one model is loaded
response = pull('llama3.1', stream=True)
progress_states = set()
for progress in response:
@ -10,22 +12,14 @@ for progress in response:
print('\n')
response = chat('llama3.1', messages=[{'role': 'user', 'content': 'Hello!'}])
print(response['message']['content'])
print('\n')
response: ProcessResponse = ps()
for model in response.models:
print(f'Model: {model.model}')
print(f'Digest: {model.digest}')
print(f'Expires at: {model.expires_at}')
print(f'Size: {model.size}')
print(f'Size vram: {model.size_vram}')
print(f'Details: {model.details}')
response = ps()
name = response['models'][0]['name']
size = response['models'][0]['size']
size_vram = response['models'][0]['size_vram']
if size == size_vram:
print(f'{name}: 100% GPU')
elif not size_vram:
print(f'{name}: 100% CPU')
else:
size_cpu = size - size_vram
cpu_percent = round(size_cpu / size * 100)
print(f'{name}: {cpu_percent}% CPU/{100 - cpu_percent}% GPU')
print('---' * 10)