mirror of
https://github.com/ollama/ollama-python.git
synced 2026-04-25 08:48:18 +08:00
175 lines
3.0 KiB
Markdown
175 lines
3.0 KiB
Markdown
# Ollama Python Library
|
|
|
|
The Ollama Python library provides the easiest way to integrate Python 3.8+ projects with [Ollama](https://github.com/ollama/ollama).
|
|
|
|
## Install
|
|
|
|
```sh
|
|
pip install ollama
|
|
```
|
|
|
|
## Usage
|
|
|
|
```python
|
|
import ollama
|
|
response = ollama.chat(model='llama3.1', messages=[
|
|
{
|
|
'role': 'user',
|
|
'content': 'Why is the sky blue?',
|
|
},
|
|
])
|
|
print(response['message']['content'])
|
|
```
|
|
|
|
## Streaming responses
|
|
|
|
Response streaming can be enabled by setting `stream=True`, modifying function calls to return a Python generator where each part is an object in the stream.
|
|
|
|
```python
|
|
import ollama
|
|
|
|
stream = ollama.chat(
|
|
model='llama3.1',
|
|
messages=[{'role': 'user', 'content': 'Why is the sky blue?'}],
|
|
stream=True,
|
|
)
|
|
|
|
for chunk in stream:
|
|
print(chunk['message']['content'], end='', flush=True)
|
|
```
|
|
|
|
## API
|
|
|
|
The Ollama Python library's API is designed around the [Ollama REST API](https://github.com/ollama/ollama/blob/main/docs/api.md)
|
|
|
|
### Chat
|
|
|
|
```python
|
|
ollama.chat(model='llama3.1', messages=[{'role': 'user', 'content': 'Why is the sky blue?'}])
|
|
```
|
|
|
|
### Generate
|
|
|
|
```python
|
|
ollama.generate(model='llama3.1', prompt='Why is the sky blue?')
|
|
```
|
|
|
|
### List
|
|
|
|
```python
|
|
ollama.list()
|
|
```
|
|
|
|
### Show
|
|
|
|
```python
|
|
ollama.show('llama3.1')
|
|
```
|
|
|
|
### Create
|
|
|
|
```python
|
|
modelfile='''
|
|
FROM llama3.1
|
|
SYSTEM You are mario from super mario bros.
|
|
'''
|
|
|
|
ollama.create(model='example', modelfile=modelfile)
|
|
```
|
|
|
|
### Copy
|
|
|
|
```python
|
|
ollama.copy('llama3.1', 'user/llama3.1')
|
|
```
|
|
|
|
### Delete
|
|
|
|
```python
|
|
ollama.delete('llama3.1')
|
|
```
|
|
|
|
### Pull
|
|
|
|
```python
|
|
ollama.pull('llama3.1')
|
|
```
|
|
|
|
### Push
|
|
|
|
```python
|
|
ollama.push('user/llama3.1')
|
|
```
|
|
|
|
### Embeddings
|
|
|
|
```python
|
|
ollama.embeddings(model='llama3.1', prompt='The sky is blue because of rayleigh scattering')
|
|
```
|
|
|
|
### Ps
|
|
|
|
```python
|
|
ollama.ps()
|
|
```
|
|
|
|
## Custom client
|
|
|
|
A custom client can be created with the following fields:
|
|
|
|
- `host`: The Ollama host to connect to
|
|
- `timeout`: The timeout for requests
|
|
|
|
```python
|
|
from ollama import Client
|
|
client = Client(host='http://localhost:11434')
|
|
response = client.chat(model='llama3.1', messages=[
|
|
{
|
|
'role': 'user',
|
|
'content': 'Why is the sky blue?',
|
|
},
|
|
])
|
|
```
|
|
|
|
## Async client
|
|
|
|
```python
|
|
import asyncio
|
|
from ollama import AsyncClient
|
|
|
|
async def chat():
|
|
message = {'role': 'user', 'content': 'Why is the sky blue?'}
|
|
response = await AsyncClient().chat(model='llama3.1', messages=[message])
|
|
|
|
asyncio.run(chat())
|
|
```
|
|
|
|
Setting `stream=True` modifies functions to return a Python asynchronous generator:
|
|
|
|
```python
|
|
import asyncio
|
|
from ollama import AsyncClient
|
|
|
|
async def chat():
|
|
message = {'role': 'user', 'content': 'Why is the sky blue?'}
|
|
async for part in await AsyncClient().chat(model='llama3.1', messages=[message], stream=True):
|
|
print(part['message']['content'], end='', flush=True)
|
|
|
|
asyncio.run(chat())
|
|
```
|
|
|
|
## Errors
|
|
|
|
Errors are raised if requests return an error status or if an error is detected while streaming.
|
|
|
|
```python
|
|
model = 'does-not-yet-exist'
|
|
|
|
try:
|
|
ollama.chat(model)
|
|
except ollama.ResponseError as e:
|
|
print('Error:', e.error)
|
|
if e.status_code == 404:
|
|
ollama.pull(model)
|
|
```
|