mirror of
https://github.com/ollama/ollama-python.git
synced 2026-06-11 10:44:46 +00:00
Compare commits
14 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| db7e2a980e | |||
| a1d04f04f2 | |||
| 8af6cac86b | |||
| 9f41447f20 | |||
| da79e987f0 | |||
| c8392d6524 | |||
| 07ab287cdf | |||
| b0f6b99ca6 | |||
| c87604c66f | |||
| 53ff3cd025 | |||
| aa4b476f26 | |||
| 34e98bd237 | |||
| dad9e1ca3a | |||
| fe91357d4b |
@@ -13,8 +13,8 @@ jobs:
|
||||
id-token: write
|
||||
contents: write
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/setup-python@v6
|
||||
- uses: astral-sh/setup-uv@v5
|
||||
with:
|
||||
enable-cache: true
|
||||
|
||||
@@ -10,7 +10,7 @@ jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v5
|
||||
- uses: astral-sh/setup-uv@v5
|
||||
with:
|
||||
enable-cache: true
|
||||
@@ -19,8 +19,8 @@ jobs:
|
||||
lint:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/setup-python@v6
|
||||
- uses: astral-sh/setup-uv@v5
|
||||
with:
|
||||
enable-cache: true
|
||||
|
||||
@@ -5,7 +5,7 @@ The Ollama Python library provides the easiest way to integrate Python 3.8+ proj
|
||||
## Prerequisites
|
||||
|
||||
- [Ollama](https://ollama.com/download) should be installed and running
|
||||
- Pull a model to use with the library: `ollama pull <model>` e.g. `ollama pull llama3.2`
|
||||
- Pull a model to use with the library: `ollama pull <model>` e.g. `ollama pull gemma3`
|
||||
- See [Ollama.com](https://ollama.com/search) for more information on the models available.
|
||||
|
||||
## Install
|
||||
@@ -20,7 +20,7 @@ pip install ollama
|
||||
from ollama import chat
|
||||
from ollama import ChatResponse
|
||||
|
||||
response: ChatResponse = chat(model='llama3.2', messages=[
|
||||
response: ChatResponse = chat(model='gemma3', messages=[
|
||||
{
|
||||
'role': 'user',
|
||||
'content': 'Why is the sky blue?',
|
||||
@@ -41,7 +41,7 @@ Response streaming can be enabled by setting `stream=True`.
|
||||
from ollama import chat
|
||||
|
||||
stream = chat(
|
||||
model='llama3.2',
|
||||
model='gemma3',
|
||||
messages=[{'role': 'user', 'content': 'Why is the sky blue?'}],
|
||||
stream=True,
|
||||
)
|
||||
@@ -61,7 +61,7 @@ client = Client(
|
||||
host='http://localhost:11434',
|
||||
headers={'x-some-header': 'some-value'}
|
||||
)
|
||||
response = client.chat(model='llama3.2', messages=[
|
||||
response = client.chat(model='gemma3', messages=[
|
||||
{
|
||||
'role': 'user',
|
||||
'content': 'Why is the sky blue?',
|
||||
@@ -79,7 +79,7 @@ from ollama import AsyncClient
|
||||
|
||||
async def chat():
|
||||
message = {'role': 'user', 'content': 'Why is the sky blue?'}
|
||||
response = await AsyncClient().chat(model='llama3.2', messages=[message])
|
||||
response = await AsyncClient().chat(model='gemma3', messages=[message])
|
||||
|
||||
asyncio.run(chat())
|
||||
```
|
||||
@@ -92,7 +92,7 @@ from ollama import AsyncClient
|
||||
|
||||
async def chat():
|
||||
message = {'role': 'user', 'content': 'Why is the sky blue?'}
|
||||
async for part in await AsyncClient().chat(model='llama3.2', messages=[message], stream=True):
|
||||
async for part in await AsyncClient().chat(model='gemma3', messages=[message], stream=True):
|
||||
print(part['message']['content'], end='', flush=True)
|
||||
|
||||
asyncio.run(chat())
|
||||
@@ -105,13 +105,13 @@ The Ollama Python library's API is designed around the [Ollama REST API](https:/
|
||||
### Chat
|
||||
|
||||
```python
|
||||
ollama.chat(model='llama3.2', messages=[{'role': 'user', 'content': 'Why is the sky blue?'}])
|
||||
ollama.chat(model='gemma3', messages=[{'role': 'user', 'content': 'Why is the sky blue?'}])
|
||||
```
|
||||
|
||||
### Generate
|
||||
|
||||
```python
|
||||
ollama.generate(model='llama3.2', prompt='Why is the sky blue?')
|
||||
ollama.generate(model='gemma3', prompt='Why is the sky blue?')
|
||||
```
|
||||
|
||||
### List
|
||||
@@ -123,49 +123,49 @@ ollama.list()
|
||||
### Show
|
||||
|
||||
```python
|
||||
ollama.show('llama3.2')
|
||||
ollama.show('gemma3')
|
||||
```
|
||||
|
||||
### Create
|
||||
|
||||
```python
|
||||
ollama.create(model='example', from_='llama3.2', system="You are Mario from Super Mario Bros.")
|
||||
ollama.create(model='example', from_='gemma3', system="You are Mario from Super Mario Bros.")
|
||||
```
|
||||
|
||||
### Copy
|
||||
|
||||
```python
|
||||
ollama.copy('llama3.2', 'user/llama3.2')
|
||||
ollama.copy('gemma3', 'user/gemma3')
|
||||
```
|
||||
|
||||
### Delete
|
||||
|
||||
```python
|
||||
ollama.delete('llama3.2')
|
||||
ollama.delete('gemma3')
|
||||
```
|
||||
|
||||
### Pull
|
||||
|
||||
```python
|
||||
ollama.pull('llama3.2')
|
||||
ollama.pull('gemma3')
|
||||
```
|
||||
|
||||
### Push
|
||||
|
||||
```python
|
||||
ollama.push('user/llama3.2')
|
||||
ollama.push('user/gemma3')
|
||||
```
|
||||
|
||||
### Embed
|
||||
|
||||
```python
|
||||
ollama.embed(model='llama3.2', input='The sky is blue because of rayleigh scattering')
|
||||
ollama.embed(model='gemma3', input='The sky is blue because of rayleigh scattering')
|
||||
```
|
||||
|
||||
### Embed (batch)
|
||||
|
||||
```python
|
||||
ollama.embed(model='llama3.2', input=['The sky is blue because of rayleigh scattering', 'Grass is green because of chlorophyll'])
|
||||
ollama.embed(model='gemma3', input=['The sky is blue because of rayleigh scattering', 'Grass is green because of chlorophyll'])
|
||||
```
|
||||
|
||||
### Ps
|
||||
|
||||
@@ -27,6 +27,12 @@ See [ollama/docs/api.md](https://github.com/ollama/ollama/blob/main/docs/api.md)
|
||||
- [async-tools.py](async-tools.py)
|
||||
- [multi-tool.py](multi-tool.py) - Using multiple tools, with thinking enabled
|
||||
|
||||
#### gpt-oss
|
||||
- [gpt-oss-tools.py](gpt-oss-tools.py)
|
||||
- [gpt-oss-tools-stream.py](gpt-oss-tools-stream.py)
|
||||
- [gpt-oss-tools-browser.py](gpt-oss-tools-browser.py) - Using browser research tools with gpt-oss
|
||||
- [gpt-oss-tools-browser-stream.py](gpt-oss-tools-browser-stream.py) - Using browser research tools with gpt-oss, with streaming enabled
|
||||
|
||||
|
||||
### Multimodal with Images - Chat with a multimodal (image chat) model
|
||||
- [multimodal-chat.py](multimodal-chat.py)
|
||||
@@ -69,3 +75,6 @@ Requirement: `pip install tqdm`
|
||||
|
||||
### Thinking (generate) - Enable thinking mode for a model
|
||||
- [thinking-generate.py](thinking-generate.py)
|
||||
|
||||
### Thinking (levels) - Choose the thinking level
|
||||
- [thinking-levels.py](thinking-levels.py)
|
||||
|
||||
@@ -12,7 +12,7 @@ async def main():
|
||||
]
|
||||
|
||||
client = AsyncClient()
|
||||
response = await client.chat('llama3.2', messages=messages)
|
||||
response = await client.chat('gemma3', messages=messages)
|
||||
print(response['message']['content'])
|
||||
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ import ollama
|
||||
|
||||
async def main():
|
||||
client = ollama.AsyncClient()
|
||||
response = await client.generate('llama3.2', 'Why is the sky blue?')
|
||||
response = await client.generate('gemma3', 'Why is the sky blue?')
|
||||
print(response['response'])
|
||||
|
||||
|
||||
|
||||
@@ -7,7 +7,5 @@ messages = [
|
||||
},
|
||||
]
|
||||
|
||||
for part in chat('llama3.2', messages=messages, stream=True):
|
||||
for part in chat('gemma3', messages=messages, stream=True):
|
||||
print(part['message']['content'], end='', flush=True)
|
||||
|
||||
print()
|
||||
|
||||
@@ -22,7 +22,7 @@ messages = [
|
||||
while True:
|
||||
user_input = input('Chat with history: ')
|
||||
response = chat(
|
||||
'llama3.2',
|
||||
'gemma3',
|
||||
messages=[*messages, {'role': 'user', 'content': user_input}],
|
||||
)
|
||||
|
||||
|
||||
+1
-1
@@ -7,5 +7,5 @@ messages = [
|
||||
},
|
||||
]
|
||||
|
||||
response = chat('llama3.2', messages=messages)
|
||||
response = chat('gemma3', messages=messages)
|
||||
print(response['message']['content'])
|
||||
|
||||
+1
-1
@@ -3,7 +3,7 @@ from ollama import Client
|
||||
client = Client()
|
||||
response = client.create(
|
||||
model='my-assistant',
|
||||
from_='llama3.2',
|
||||
from_='gemma3',
|
||||
system='You are mario from Super Mario Bros.',
|
||||
stream=False,
|
||||
)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from ollama import generate
|
||||
|
||||
for part in generate('llama3.2', 'Why is the sky blue?', stream=True):
|
||||
for part in generate('gemma3', 'Why is the sky blue?', stream=True):
|
||||
print(part['response'], end='', flush=True)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from ollama import generate
|
||||
|
||||
response = generate('llama3.2', 'Why is the sky blue?')
|
||||
response = generate('gemma3', 'Why is the sky blue?')
|
||||
print(response['response'])
|
||||
|
||||
@@ -0,0 +1,198 @@
|
||||
# /// script
|
||||
# requires-python = ">=3.11"
|
||||
# dependencies = [
|
||||
# "gpt-oss",
|
||||
# "ollama",
|
||||
# "rich",
|
||||
# ]
|
||||
# ///
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
from typing import Iterator, Optional
|
||||
|
||||
from gpt_oss.tools.simple_browser import ExaBackend, SimpleBrowserTool
|
||||
from openai_harmony import Author, Role, TextContent
|
||||
from openai_harmony import Message as HarmonyMessage
|
||||
from rich import print
|
||||
|
||||
from ollama import Client
|
||||
from ollama._types import ChatResponse
|
||||
|
||||
_backend = ExaBackend(source='web')
|
||||
_browser_tool = SimpleBrowserTool(backend=_backend)
|
||||
|
||||
|
||||
def heading(text):
|
||||
print(text)
|
||||
print('=' * (len(text) + 3))
|
||||
|
||||
|
||||
async def _browser_search_async(query: str, topn: int = 10, source: str | None = None) -> str:
|
||||
# map Ollama message to Harmony format
|
||||
harmony_message = HarmonyMessage(
|
||||
author=Author(role=Role.USER),
|
||||
content=[TextContent(text=json.dumps({'query': query, 'topn': topn}))],
|
||||
recipient='browser.search',
|
||||
)
|
||||
|
||||
result_text: str = ''
|
||||
async for response in _browser_tool._process(harmony_message):
|
||||
if response.content:
|
||||
for content in response.content:
|
||||
if isinstance(content, TextContent):
|
||||
result_text += content.text
|
||||
return result_text or f'No results for query: {query}'
|
||||
|
||||
|
||||
async def _browser_open_async(id: int | str = -1, cursor: int = -1, loc: int = -1, num_lines: int = -1, *, view_source: bool = False, source: str | None = None) -> str:
|
||||
payload = {'id': id, 'cursor': cursor, 'loc': loc, 'num_lines': num_lines, 'view_source': view_source, 'source': source}
|
||||
|
||||
harmony_message = HarmonyMessage(
|
||||
author=Author(role=Role.USER),
|
||||
content=[TextContent(text=json.dumps(payload))],
|
||||
recipient='browser.open',
|
||||
)
|
||||
|
||||
result_text: str = ''
|
||||
async for response in _browser_tool._process(harmony_message):
|
||||
if response.content:
|
||||
for content in response.content:
|
||||
if isinstance(content, TextContent):
|
||||
result_text += content.text
|
||||
return result_text or f'Could not open: {id}'
|
||||
|
||||
|
||||
async def _browser_find_async(pattern: str, cursor: int = -1) -> str:
|
||||
payload = {'pattern': pattern, 'cursor': cursor}
|
||||
|
||||
harmony_message = HarmonyMessage(
|
||||
author=Author(role=Role.USER),
|
||||
content=[TextContent(text=json.dumps(payload))],
|
||||
recipient='browser.find',
|
||||
)
|
||||
|
||||
result_text: str = ''
|
||||
async for response in _browser_tool._process(harmony_message):
|
||||
if response.content:
|
||||
for content in response.content:
|
||||
if isinstance(content, TextContent):
|
||||
result_text += content.text
|
||||
return result_text or f'Pattern not found: {pattern}'
|
||||
|
||||
|
||||
def browser_search(query: str, topn: int = 10, source: Optional[str] = None) -> str:
|
||||
return asyncio.run(_browser_search_async(query=query, topn=topn, source=source))
|
||||
|
||||
|
||||
def browser_open(id: int | str = -1, cursor: int = -1, loc: int = -1, num_lines: int = -1, *, view_source: bool = False, source: Optional[str] = None) -> str:
|
||||
return asyncio.run(_browser_open_async(id=id, cursor=cursor, loc=loc, num_lines=num_lines, view_source=view_source, source=source))
|
||||
|
||||
|
||||
def browser_find(pattern: str, cursor: int = -1) -> str:
|
||||
return asyncio.run(_browser_find_async(pattern=pattern, cursor=cursor))
|
||||
|
||||
|
||||
# Schema definitions for each browser tool
|
||||
browser_search_schema = {
|
||||
'type': 'function',
|
||||
'function': {
|
||||
'name': 'browser.search',
|
||||
},
|
||||
}
|
||||
|
||||
browser_open_schema = {
|
||||
'type': 'function',
|
||||
'function': {
|
||||
'name': 'browser.open',
|
||||
},
|
||||
}
|
||||
|
||||
browser_find_schema = {
|
||||
'type': 'function',
|
||||
'function': {
|
||||
'name': 'browser.find',
|
||||
},
|
||||
}
|
||||
|
||||
available_tools = {
|
||||
'browser.search': browser_search,
|
||||
'browser.open': browser_open,
|
||||
'browser.find': browser_find,
|
||||
}
|
||||
|
||||
|
||||
model = 'gpt-oss:20b'
|
||||
print('Model: ', model, '\n')
|
||||
|
||||
prompt = 'What is Ollama?'
|
||||
print('You: ', prompt, '\n')
|
||||
messages = [{'role': 'user', 'content': prompt}]
|
||||
|
||||
client = Client()
|
||||
|
||||
# gpt-oss can call tools while "thinking"
|
||||
# a loop is needed to call the tools and get the results
|
||||
final = True
|
||||
while True:
|
||||
response_stream: Iterator[ChatResponse] = client.chat(
|
||||
model=model,
|
||||
messages=messages,
|
||||
tools=[browser_search_schema, browser_open_schema, browser_find_schema],
|
||||
options={'num_ctx': 8192}, # 8192 is the recommended lower limit for the context window
|
||||
stream=True,
|
||||
)
|
||||
|
||||
tool_calls = []
|
||||
thinking = ''
|
||||
content = ''
|
||||
|
||||
for chunk in response_stream:
|
||||
if chunk.message.tool_calls:
|
||||
tool_calls.extend(chunk.message.tool_calls)
|
||||
|
||||
if chunk.message.content:
|
||||
if not (chunk.message.thinking or chunk.message.thinking == '') and final:
|
||||
heading('\n\nFinal result: ')
|
||||
final = False
|
||||
print(chunk.message.content, end='', flush=True)
|
||||
|
||||
if chunk.message.thinking:
|
||||
thinking += chunk.message.thinking
|
||||
print(chunk.message.thinking, end='', flush=True)
|
||||
|
||||
if thinking != '':
|
||||
messages.append({'role': 'assistant', 'content': thinking, 'tool_calls': tool_calls})
|
||||
|
||||
print()
|
||||
|
||||
if tool_calls:
|
||||
for tool_call in tool_calls:
|
||||
tool_name = tool_call.function.name
|
||||
args = tool_call.function.arguments or {}
|
||||
function_to_call = available_tools.get(tool_name)
|
||||
|
||||
if function_to_call:
|
||||
heading(f'\nCalling tool: {tool_name}')
|
||||
if args:
|
||||
print(f'Arguments: {args}')
|
||||
|
||||
try:
|
||||
result = function_to_call(**args)
|
||||
print(f'Tool result: {result[:200]}')
|
||||
if len(result) > 200:
|
||||
heading('... [truncated]')
|
||||
print()
|
||||
|
||||
result_message = {'role': 'tool', 'content': result, 'tool_name': tool_name}
|
||||
messages.append(result_message)
|
||||
|
||||
except Exception as e:
|
||||
err = f'Error from {tool_name}: {e}'
|
||||
print(err)
|
||||
messages.append({'role': 'tool', 'content': err, 'tool_name': tool_name})
|
||||
else:
|
||||
print(f'Tool {tool_name} not found')
|
||||
else:
|
||||
# no more tool calls, we can stop the loop
|
||||
break
|
||||
@@ -0,0 +1,175 @@
|
||||
# /// script
|
||||
# requires-python = ">=3.11"
|
||||
# dependencies = [
|
||||
# "gpt-oss",
|
||||
# "ollama",
|
||||
# "rich",
|
||||
# ]
|
||||
# ///
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
from typing import Optional
|
||||
|
||||
from gpt_oss.tools.simple_browser import ExaBackend, SimpleBrowserTool
|
||||
from openai_harmony import Author, Role, TextContent
|
||||
from openai_harmony import Message as HarmonyMessage
|
||||
|
||||
from ollama import Client
|
||||
|
||||
_backend = ExaBackend(source='web')
|
||||
_browser_tool = SimpleBrowserTool(backend=_backend)
|
||||
|
||||
|
||||
def heading(text):
|
||||
print(text)
|
||||
print('=' * (len(text) + 3))
|
||||
|
||||
|
||||
async def _browser_search_async(query: str, topn: int = 10, source: str | None = None) -> str:
|
||||
# map Ollama message to Harmony format
|
||||
harmony_message = HarmonyMessage(
|
||||
author=Author(role=Role.USER),
|
||||
content=[TextContent(text=json.dumps({'query': query, 'topn': topn}))],
|
||||
recipient='browser.search',
|
||||
)
|
||||
|
||||
result_text: str = ''
|
||||
async for response in _browser_tool._process(harmony_message):
|
||||
if response.content:
|
||||
for content in response.content:
|
||||
if isinstance(content, TextContent):
|
||||
result_text += content.text
|
||||
return result_text or f'No results for query: {query}'
|
||||
|
||||
|
||||
async def _browser_open_async(id: int | str = -1, cursor: int = -1, loc: int = -1, num_lines: int = -1, *, view_source: bool = False, source: str | None = None) -> str:
|
||||
payload = {'id': id, 'cursor': cursor, 'loc': loc, 'num_lines': num_lines, 'view_source': view_source, 'source': source}
|
||||
|
||||
harmony_message = HarmonyMessage(
|
||||
author=Author(role=Role.USER),
|
||||
content=[TextContent(text=json.dumps(payload))],
|
||||
recipient='browser.open',
|
||||
)
|
||||
|
||||
result_text: str = ''
|
||||
async for response in _browser_tool._process(harmony_message):
|
||||
if response.content:
|
||||
for content in response.content:
|
||||
if isinstance(content, TextContent):
|
||||
result_text += content.text
|
||||
return result_text or f'Could not open: {id}'
|
||||
|
||||
|
||||
async def _browser_find_async(pattern: str, cursor: int = -1) -> str:
|
||||
payload = {'pattern': pattern, 'cursor': cursor}
|
||||
|
||||
harmony_message = HarmonyMessage(
|
||||
author=Author(role=Role.USER),
|
||||
content=[TextContent(text=json.dumps(payload))],
|
||||
recipient='browser.find',
|
||||
)
|
||||
|
||||
result_text: str = ''
|
||||
async for response in _browser_tool._process(harmony_message):
|
||||
if response.content:
|
||||
for content in response.content:
|
||||
if isinstance(content, TextContent):
|
||||
result_text += content.text
|
||||
return result_text or f'Pattern not found: {pattern}'
|
||||
|
||||
|
||||
def browser_search(query: str, topn: int = 10, source: Optional[str] = None) -> str:
|
||||
return asyncio.run(_browser_search_async(query=query, topn=topn, source=source))
|
||||
|
||||
|
||||
def browser_open(id: int | str = -1, cursor: int = -1, loc: int = -1, num_lines: int = -1, *, view_source: bool = False, source: Optional[str] = None) -> str:
|
||||
return asyncio.run(_browser_open_async(id=id, cursor=cursor, loc=loc, num_lines=num_lines, view_source=view_source, source=source))
|
||||
|
||||
|
||||
def browser_find(pattern: str, cursor: int = -1) -> str:
|
||||
return asyncio.run(_browser_find_async(pattern=pattern, cursor=cursor))
|
||||
|
||||
|
||||
# Schema definitions for each browser tool
|
||||
browser_search_schema = {
|
||||
'type': 'function',
|
||||
'function': {
|
||||
'name': 'browser.search',
|
||||
},
|
||||
}
|
||||
|
||||
browser_open_schema = {
|
||||
'type': 'function',
|
||||
'function': {
|
||||
'name': 'browser.open',
|
||||
},
|
||||
}
|
||||
|
||||
browser_find_schema = {
|
||||
'type': 'function',
|
||||
'function': {
|
||||
'name': 'browser.find',
|
||||
},
|
||||
}
|
||||
|
||||
available_tools = {
|
||||
'browser.search': browser_search,
|
||||
'browser.open': browser_open,
|
||||
'browser.find': browser_find,
|
||||
}
|
||||
|
||||
|
||||
model = 'gpt-oss:20b'
|
||||
print('Model: ', model, '\n')
|
||||
|
||||
prompt = 'What is Ollama?'
|
||||
print('You: ', prompt, '\n')
|
||||
messages = [{'role': 'user', 'content': prompt}]
|
||||
|
||||
client = Client()
|
||||
while True:
|
||||
response = client.chat(
|
||||
model=model,
|
||||
messages=messages,
|
||||
tools=[browser_search_schema, browser_open_schema, browser_find_schema],
|
||||
options={'num_ctx': 8192}, # 8192 is the recommended lower limit for the context window
|
||||
)
|
||||
|
||||
if hasattr(response.message, 'thinking') and response.message.thinking:
|
||||
heading('Thinking')
|
||||
print(response.message.thinking.strip() + '\n')
|
||||
|
||||
if hasattr(response.message, 'content') and response.message.content:
|
||||
heading('Assistant')
|
||||
print(response.message.content.strip() + '\n')
|
||||
|
||||
# add message to chat history
|
||||
messages.append(response.message)
|
||||
|
||||
if response.message.tool_calls:
|
||||
for tool_call in response.message.tool_calls:
|
||||
tool_name = tool_call.function.name
|
||||
args = tool_call.function.arguments or {}
|
||||
function_to_call = available_tools.get(tool_name)
|
||||
if not function_to_call:
|
||||
print(f'Unknown tool: {tool_name}')
|
||||
continue
|
||||
|
||||
try:
|
||||
result = function_to_call(**args)
|
||||
heading(f'Tool: {tool_name}')
|
||||
if args:
|
||||
print(f'Arguments: {args}')
|
||||
print(result[:200])
|
||||
if len(result) > 200:
|
||||
print('... [truncated]')
|
||||
print()
|
||||
messages.append({'role': 'tool', 'content': result, 'tool_name': tool_name})
|
||||
except Exception as e:
|
||||
err = f'Error from {tool_name}: {e}'
|
||||
print(err)
|
||||
messages.append({'role': 'tool', 'content': err, 'tool_name': tool_name})
|
||||
else:
|
||||
# break on no more tool calls
|
||||
break
|
||||
@@ -0,0 +1,105 @@
|
||||
# /// script
|
||||
# requires-python = ">=3.11"
|
||||
# dependencies = [
|
||||
# "gpt-oss",
|
||||
# "ollama",
|
||||
# "rich",
|
||||
# ]
|
||||
# ///
|
||||
import random
|
||||
from typing import Iterator
|
||||
|
||||
from rich import print
|
||||
|
||||
from ollama import Client
|
||||
from ollama._types import ChatResponse
|
||||
|
||||
|
||||
def get_weather(city: str) -> str:
|
||||
"""
|
||||
Get the current temperature for a city
|
||||
|
||||
Args:
|
||||
city (str): The name of the city
|
||||
|
||||
Returns:
|
||||
str: The current temperature
|
||||
"""
|
||||
temperatures = list(range(-10, 35))
|
||||
|
||||
temp = random.choice(temperatures)
|
||||
|
||||
return f'The temperature in {city} is {temp}°C'
|
||||
|
||||
|
||||
def get_weather_conditions(city: str) -> str:
|
||||
"""
|
||||
Get the weather conditions for a city
|
||||
|
||||
Args:
|
||||
city (str): The name of the city
|
||||
|
||||
Returns:
|
||||
str: The current weather conditions
|
||||
"""
|
||||
conditions = ['sunny', 'cloudy', 'rainy', 'snowy', 'foggy']
|
||||
return random.choice(conditions)
|
||||
|
||||
|
||||
available_tools = {'get_weather': get_weather, 'get_weather_conditions': get_weather_conditions}
|
||||
|
||||
messages = [{'role': 'user', 'content': 'What is the weather like in London? What are the conditions in Toronto?'}]
|
||||
|
||||
client = Client(
|
||||
# Ollama Turbo
|
||||
# host="https://ollama.com", headers={'Authorization': (os.getenv('OLLAMA_API_KEY'))}
|
||||
)
|
||||
|
||||
model = 'gpt-oss:20b'
|
||||
# gpt-oss can call tools while "thinking"
|
||||
# a loop is needed to call the tools and get the results
|
||||
final = True
|
||||
while True:
|
||||
response_stream: Iterator[ChatResponse] = client.chat(model=model, messages=messages, tools=[get_weather, get_weather_conditions], stream=True)
|
||||
tool_calls = []
|
||||
thinking = ''
|
||||
content = ''
|
||||
|
||||
for chunk in response_stream:
|
||||
if chunk.message.tool_calls:
|
||||
tool_calls.extend(chunk.message.tool_calls)
|
||||
|
||||
if chunk.message.content:
|
||||
if not (chunk.message.thinking or chunk.message.thinking == '') and final:
|
||||
print('\n\n' + '=' * 10)
|
||||
print('Final result: ')
|
||||
final = False
|
||||
print(chunk.message.content, end='', flush=True)
|
||||
|
||||
if chunk.message.thinking:
|
||||
# accumulate thinking
|
||||
thinking += chunk.message.thinking
|
||||
print(chunk.message.thinking, end='', flush=True)
|
||||
|
||||
if thinking != '' or content != '' or len(tool_calls) > 0:
|
||||
messages.append({'role': 'assistant', 'thinking': thinking, 'content': content, 'tool_calls': tool_calls})
|
||||
|
||||
print()
|
||||
|
||||
if tool_calls:
|
||||
for tool_call in tool_calls:
|
||||
function_to_call = available_tools.get(tool_call.function.name)
|
||||
if function_to_call:
|
||||
print('\nCalling tool:', tool_call.function.name, 'with arguments: ', tool_call.function.arguments)
|
||||
result = function_to_call(**tool_call.function.arguments)
|
||||
print('Tool result: ', result + '\n')
|
||||
|
||||
result_message = {'role': 'tool', 'content': result, 'tool_name': tool_call.function.name}
|
||||
messages.append(result_message)
|
||||
else:
|
||||
print(f'Tool {tool_call.function.name} not found')
|
||||
messages.append({'role': 'tool', 'content': f'Tool {tool_call.function.name} not found', 'tool_name': tool_call.function.name})
|
||||
|
||||
else:
|
||||
# no more tool calls, we can stop the loop
|
||||
break
|
||||
@@ -0,0 +1,84 @@
|
||||
# /// script
|
||||
# requires-python = ">=3.11"
|
||||
# dependencies = [
|
||||
# "gpt-oss",
|
||||
# "ollama",
|
||||
# "rich",
|
||||
# ]
|
||||
# ///
|
||||
import random
|
||||
|
||||
from rich import print
|
||||
|
||||
from ollama import Client
|
||||
from ollama._types import ChatResponse
|
||||
|
||||
|
||||
def get_weather(city: str) -> str:
|
||||
"""
|
||||
Get the current temperature for a city
|
||||
|
||||
Args:
|
||||
city (str): The name of the city
|
||||
|
||||
Returns:
|
||||
str: The current temperature
|
||||
"""
|
||||
temperatures = list(range(-10, 35))
|
||||
|
||||
temp = random.choice(temperatures)
|
||||
|
||||
return f'The temperature in {city} is {temp}°C'
|
||||
|
||||
|
||||
def get_weather_conditions(city: str) -> str:
|
||||
"""
|
||||
Get the weather conditions for a city
|
||||
|
||||
Args:
|
||||
city (str): The name of the city
|
||||
|
||||
Returns:
|
||||
str: The current weather conditions
|
||||
"""
|
||||
conditions = ['sunny', 'cloudy', 'rainy', 'snowy', 'foggy']
|
||||
return random.choice(conditions)
|
||||
|
||||
|
||||
available_tools = {'get_weather': get_weather, 'get_weather_conditions': get_weather_conditions}
|
||||
|
||||
messages = [{'role': 'user', 'content': 'What is the weather like in London? What are the conditions in Toronto?'}]
|
||||
|
||||
|
||||
client = Client(
|
||||
# Ollama Turbo
|
||||
# host="https://ollama.com", headers={'Authorization': (os.getenv('OLLAMA_API_KEY'))}
|
||||
)
|
||||
model = 'gpt-oss:20b'
|
||||
# gpt-oss can call tools while "thinking"
|
||||
# a loop is needed to call the tools and get the results
|
||||
while True:
|
||||
response: ChatResponse = client.chat(model=model, messages=messages, tools=[get_weather, get_weather_conditions])
|
||||
|
||||
if response.message.content:
|
||||
print('Content: ')
|
||||
print(response.message.content + '\n')
|
||||
if response.message.thinking:
|
||||
print('Thinking: ')
|
||||
print(response.message.thinking + '\n')
|
||||
|
||||
messages.append(response.message)
|
||||
|
||||
if response.message.tool_calls:
|
||||
for tool_call in response.message.tool_calls:
|
||||
function_to_call = available_tools.get(tool_call.function.name)
|
||||
if function_to_call:
|
||||
result = function_to_call(**tool_call.function.arguments)
|
||||
print('Result from tool call name: ', tool_call.function.name, 'with arguments: ', tool_call.function.arguments, 'result: ', result + '\n')
|
||||
messages.append({'role': 'tool', 'content': result, 'tool_name': tool_call.function.name})
|
||||
else:
|
||||
print(f'Tool {tool_call.function.name} not found')
|
||||
messages.append({'role': 'tool', 'content': f'Tool {tool_call.function.name} not found', 'tool_name': tool_call.function.name})
|
||||
else:
|
||||
# no more tool calls, we can stop the loop
|
||||
break
|
||||
@@ -11,7 +11,7 @@ path = input('Please enter the path to the image: ')
|
||||
# img = Path(path).read_bytes()
|
||||
|
||||
response = chat(
|
||||
model='llama3.2-vision',
|
||||
model='gemma3',
|
||||
messages=[
|
||||
{
|
||||
'role': 'user',
|
||||
|
||||
+2
-2
@@ -1,7 +1,7 @@
|
||||
from ollama import ProcessResponse, chat, ps, pull
|
||||
|
||||
# Ensure at least one model is loaded
|
||||
response = pull('llama3.2', stream=True)
|
||||
response = pull('gemma3', stream=True)
|
||||
progress_states = set()
|
||||
for progress in response:
|
||||
if progress.get('status') in progress_states:
|
||||
@@ -12,7 +12,7 @@ for progress in response:
|
||||
print('\n')
|
||||
|
||||
print('Waiting for model to load... \n')
|
||||
chat(model='llama3.2', messages=[{'role': 'user', 'content': 'Why is the sky blue?'}])
|
||||
chat(model='gemma3', messages=[{'role': 'user', 'content': 'Why is the sky blue?'}])
|
||||
|
||||
|
||||
response: ProcessResponse = ps()
|
||||
|
||||
+1
-1
@@ -3,7 +3,7 @@ from tqdm import tqdm
|
||||
from ollama import pull
|
||||
|
||||
current_digest, bars = '', {}
|
||||
for progress in pull('llama3.2', stream=True):
|
||||
for progress in pull('gemma3', stream=True):
|
||||
digest = progress.get('digest', '')
|
||||
if digest != current_digest and current_digest in bars:
|
||||
bars[current_digest].close()
|
||||
|
||||
@@ -33,7 +33,7 @@ if not path.exists():
|
||||
|
||||
# Set up chat as usual
|
||||
response = chat(
|
||||
model='llama3.2-vision',
|
||||
model='gemma3',
|
||||
format=ImageDescription.model_json_schema(), # Pass in the schema for the response
|
||||
messages=[
|
||||
{
|
||||
|
||||
@@ -0,0 +1,26 @@
|
||||
from ollama import chat
|
||||
|
||||
|
||||
def heading(text):
|
||||
print(text)
|
||||
print('=' * len(text))
|
||||
|
||||
|
||||
messages = [
|
||||
{'role': 'user', 'content': 'What is 10 + 23?'},
|
||||
]
|
||||
|
||||
# gpt-oss supports 'low', 'medium', 'high'
|
||||
levels = ['low', 'medium', 'high']
|
||||
for i, level in enumerate(levels):
|
||||
response = chat('gpt-oss:20b', messages=messages, think=level)
|
||||
|
||||
heading(f'Thinking ({level})')
|
||||
print(response.message.thinking)
|
||||
print('\n')
|
||||
heading('Response')
|
||||
print(response.message.content)
|
||||
print('\n')
|
||||
if i < len(levels) - 1:
|
||||
print('-' * 20)
|
||||
print('\n')
|
||||
+100
@@ -0,0 +1,100 @@
|
||||
import base64
|
||||
import os
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from cryptography.hazmat.primitives import serialization
|
||||
|
||||
|
||||
class OllamaAuth:
|
||||
def __init__(self, key_path: Optional[str] = None):
|
||||
"""Initialize the OllamaAuth class.
|
||||
|
||||
Args:
|
||||
key_path: Optional path to the private key file. If not provided,
|
||||
defaults to ~/.ollama/id_ed25519
|
||||
"""
|
||||
if key_path is None:
|
||||
home = str(Path.home())
|
||||
self.key_path = os.path.join(home, '.ollama', 'id_ed25519')
|
||||
else:
|
||||
# Expand ~ and environment variables in the path
|
||||
self.key_path = os.path.expanduser(os.path.expandvars(key_path))
|
||||
|
||||
def load_private_key(self):
|
||||
"""Read and load the private key.
|
||||
|
||||
Returns:
|
||||
The loaded Ed25519 private key.
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If the key file doesn't exist
|
||||
ValueError: If the key file is invalid
|
||||
"""
|
||||
try:
|
||||
with open(self.key_path, 'rb') as f:
|
||||
private_key_data = f.read()
|
||||
|
||||
private_key = serialization.load_ssh_private_key(
|
||||
private_key_data,
|
||||
password=None,
|
||||
)
|
||||
return private_key
|
||||
except FileNotFoundError:
|
||||
raise FileNotFoundError(f"Could not find Ollama private key at {self.key_path}. Please generate one using: ssh-keygen -t ed25519 -f ~/.ollama/id_ed25519 -N ''")
|
||||
except Exception as e:
|
||||
raise ValueError(f'Invalid private key at {self.key_path}: {e!s}')
|
||||
|
||||
def get_public_key_b64(self, private_key):
|
||||
"""Get the base64 encoded public key.
|
||||
|
||||
Args:
|
||||
private_key: The Ed25519 private key
|
||||
|
||||
Returns:
|
||||
Base64 encoded public key string
|
||||
"""
|
||||
# Get the public key in OpenSSH format and extract the second field (base64-encoded key)
|
||||
public_key = private_key.public_key()
|
||||
openssh_pub = (
|
||||
public_key.public_bytes(
|
||||
encoding=serialization.Encoding.OpenSSH,
|
||||
format=serialization.PublicFormat.OpenSSH,
|
||||
)
|
||||
.decode('utf-8')
|
||||
.strip()
|
||||
)
|
||||
parts = openssh_pub.split(' ')
|
||||
if len(parts) < 2:
|
||||
raise ValueError('Malformed OpenSSH public key')
|
||||
public_key_b64 = parts[1]
|
||||
return public_key_b64
|
||||
|
||||
def sign_request(self, method: str, path: str):
|
||||
"""Sign an HTTP request.
|
||||
|
||||
Args:
|
||||
method: The HTTP method (e.g. 'GET', 'POST')
|
||||
path: The request path (e.g. '/api/chat')
|
||||
|
||||
Returns:
|
||||
A tuple of (auth_token, timestamp) where auth_token is the
|
||||
authorization header value and timestamp is the request timestamp.
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If the key file doesn't exist
|
||||
ValueError: If the key file is invalid
|
||||
"""
|
||||
timestamp = str(int(time.time()))
|
||||
path_with_ts = f'{path}&ts={timestamp}' if '?' in path else f'{path}?ts={timestamp}'
|
||||
challenge = f'{method},{path_with_ts}'
|
||||
|
||||
private_key = self.load_private_key()
|
||||
signature = private_key.sign(challenge.encode())
|
||||
|
||||
public_key_b64 = self.get_public_key_b64(private_key)
|
||||
|
||||
auth_token = f'{public_key_b64}:{base64.b64encode(signature).decode("utf-8")}'
|
||||
|
||||
return auth_token, timestamp
|
||||
+53
-16
@@ -25,6 +25,7 @@ from typing import (
|
||||
import anyio
|
||||
from pydantic.json_schema import JsonSchemaValue
|
||||
|
||||
from ollama._auth import OllamaAuth
|
||||
from ollama._utils import convert_function_to_tool
|
||||
|
||||
if sys.version_info < (3, 9):
|
||||
@@ -80,6 +81,7 @@ class BaseClient:
|
||||
follow_redirects: bool = True,
|
||||
timeout: Any = None,
|
||||
headers: Optional[Mapping[str, str]] = None,
|
||||
auth_key_path: Optional[str] = None,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
"""
|
||||
@@ -87,9 +89,10 @@ class BaseClient:
|
||||
except for the following:
|
||||
- `follow_redirects`: True
|
||||
- `timeout`: None
|
||||
- `auth_key_path`: Optional path to the ed25519 private key for authentication
|
||||
`kwargs` are passed to the httpx client.
|
||||
"""
|
||||
|
||||
self._auth = OllamaAuth(auth_key_path)
|
||||
self._client = client(
|
||||
base_url=_parse_host(host or os.getenv('OLLAMA_HOST')),
|
||||
follow_redirects=follow_redirects,
|
||||
@@ -107,6 +110,27 @@ class BaseClient:
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def _prepare_request(self, method: str, path: str, **kwargs) -> Dict[str, Any]:
|
||||
if self._auth:
|
||||
url = str(self._client.build_request(method, path).url)
|
||||
parsed = urllib.parse.urlparse(url)
|
||||
full_path = parsed.path
|
||||
if parsed.query:
|
||||
full_path = f'{full_path}?{parsed.query}'
|
||||
|
||||
auth_token, timestamp = self._auth.sign_request(method, full_path)
|
||||
|
||||
if 'headers' not in kwargs:
|
||||
kwargs['headers'] = {}
|
||||
kwargs['headers']['Authorization'] = auth_token
|
||||
|
||||
if '?' in path:
|
||||
path = f'{path}&ts={timestamp}'
|
||||
else:
|
||||
path = f'{path}?ts={timestamp}'
|
||||
|
||||
return {'method': method, 'url': path, **kwargs}
|
||||
|
||||
|
||||
CONNECTION_ERROR_MESSAGE = 'Failed to connect to Ollama. Please check that Ollama is downloaded, running and accessible. https://ollama.com/download'
|
||||
|
||||
@@ -155,14 +179,18 @@ class Client(BaseClient):
|
||||
def _request(
|
||||
self,
|
||||
cls: Type[T],
|
||||
*args,
|
||||
method: str,
|
||||
path: str,
|
||||
*,
|
||||
stream: bool = False,
|
||||
**kwargs,
|
||||
) -> Union[T, Iterator[T]]:
|
||||
request_params = self._prepare_request(method, path, **kwargs)
|
||||
|
||||
if stream:
|
||||
|
||||
def inner():
|
||||
with self._client.stream(*args, **kwargs) as r:
|
||||
with self._client.stream(**request_params) as r:
|
||||
try:
|
||||
r.raise_for_status()
|
||||
except httpx.HTTPStatusError as e:
|
||||
@@ -177,7 +205,7 @@ class Client(BaseClient):
|
||||
|
||||
return inner()
|
||||
|
||||
return cls(**self._request_raw(*args, **kwargs).json())
|
||||
return cls(**self._request_raw(**request_params).json())
|
||||
|
||||
@overload
|
||||
def generate(
|
||||
@@ -274,7 +302,7 @@ class Client(BaseClient):
|
||||
*,
|
||||
tools: Optional[Sequence[Union[Mapping[str, Any], Tool, Callable]]] = None,
|
||||
stream: Literal[False] = False,
|
||||
think: Optional[bool] = None,
|
||||
think: Optional[Union[bool, Literal['low', 'medium', 'high']]] = None,
|
||||
format: Optional[Union[Literal['', 'json'], JsonSchemaValue]] = None,
|
||||
options: Optional[Union[Mapping[str, Any], Options]] = None,
|
||||
keep_alive: Optional[Union[float, str]] = None,
|
||||
@@ -288,7 +316,7 @@ class Client(BaseClient):
|
||||
*,
|
||||
tools: Optional[Sequence[Union[Mapping[str, Any], Tool, Callable]]] = None,
|
||||
stream: Literal[True] = True,
|
||||
think: Optional[bool] = None,
|
||||
think: Optional[Union[bool, Literal['low', 'medium', 'high']]] = None,
|
||||
format: Optional[Union[Literal['', 'json'], JsonSchemaValue]] = None,
|
||||
options: Optional[Union[Mapping[str, Any], Options]] = None,
|
||||
keep_alive: Optional[Union[float, str]] = None,
|
||||
@@ -301,7 +329,7 @@ class Client(BaseClient):
|
||||
*,
|
||||
tools: Optional[Sequence[Union[Mapping[str, Any], Tool, Callable]]] = None,
|
||||
stream: bool = False,
|
||||
think: Optional[bool] = None,
|
||||
think: Optional[Union[bool, Literal['low', 'medium', 'high']]] = None,
|
||||
format: Optional[Union[Literal['', 'json'], JsonSchemaValue]] = None,
|
||||
options: Optional[Union[Mapping[str, Any], Options]] = None,
|
||||
keep_alive: Optional[Union[float, str]] = None,
|
||||
@@ -363,6 +391,7 @@ class Client(BaseClient):
|
||||
truncate: Optional[bool] = None,
|
||||
options: Optional[Union[Mapping[str, Any], Options]] = None,
|
||||
keep_alive: Optional[Union[float, str]] = None,
|
||||
dimensions: Optional[int] = None,
|
||||
) -> EmbedResponse:
|
||||
return self._request(
|
||||
EmbedResponse,
|
||||
@@ -374,6 +403,7 @@ class Client(BaseClient):
|
||||
truncate=truncate,
|
||||
options=options,
|
||||
keep_alive=keep_alive,
|
||||
dimensions=dimensions,
|
||||
).model_dump(exclude_none=True),
|
||||
)
|
||||
|
||||
@@ -667,14 +697,19 @@ class AsyncClient(BaseClient):
|
||||
async def _request(
|
||||
self,
|
||||
cls: Type[T],
|
||||
*args,
|
||||
method: str,
|
||||
path: str,
|
||||
*,
|
||||
stream: bool = False,
|
||||
**kwargs,
|
||||
) -> Union[T, AsyncIterator[T]]:
|
||||
"""Make a request with optional authentication."""
|
||||
request_params = self._prepare_request(method, path, **kwargs)
|
||||
|
||||
if stream:
|
||||
|
||||
async def inner():
|
||||
async with self._client.stream(*args, **kwargs) as r:
|
||||
async with self._client.stream(**request_params) as r:
|
||||
try:
|
||||
r.raise_for_status()
|
||||
except httpx.HTTPStatusError as e:
|
||||
@@ -689,7 +724,7 @@ class AsyncClient(BaseClient):
|
||||
|
||||
return inner()
|
||||
|
||||
return cls(**(await self._request_raw(*args, **kwargs)).json())
|
||||
return cls(**(await self._request_raw(**request_params)).json())
|
||||
|
||||
@overload
|
||||
async def generate(
|
||||
@@ -702,7 +737,7 @@ class AsyncClient(BaseClient):
|
||||
template: str = '',
|
||||
context: Optional[Sequence[int]] = None,
|
||||
stream: Literal[False] = False,
|
||||
think: Optional[bool] = None,
|
||||
think: Optional[Union[bool, Literal['low', 'medium', 'high']]] = None,
|
||||
raw: bool = False,
|
||||
format: Optional[Union[Literal['', 'json'], JsonSchemaValue]] = None,
|
||||
images: Optional[Sequence[Union[str, bytes, Image]]] = None,
|
||||
@@ -721,7 +756,7 @@ class AsyncClient(BaseClient):
|
||||
template: str = '',
|
||||
context: Optional[Sequence[int]] = None,
|
||||
stream: Literal[True] = True,
|
||||
think: Optional[bool] = None,
|
||||
think: Optional[Union[bool, Literal['low', 'medium', 'high']]] = None,
|
||||
raw: bool = False,
|
||||
format: Optional[Union[Literal['', 'json'], JsonSchemaValue]] = None,
|
||||
images: Optional[Sequence[Union[str, bytes, Image]]] = None,
|
||||
@@ -739,7 +774,7 @@ class AsyncClient(BaseClient):
|
||||
template: Optional[str] = None,
|
||||
context: Optional[Sequence[int]] = None,
|
||||
stream: bool = False,
|
||||
think: Optional[bool] = None,
|
||||
think: Optional[Union[bool, Literal['low', 'medium', 'high']]] = None,
|
||||
raw: Optional[bool] = None,
|
||||
format: Optional[Union[Literal['', 'json'], JsonSchemaValue]] = None,
|
||||
images: Optional[Sequence[Union[str, bytes, Image]]] = None,
|
||||
@@ -785,7 +820,7 @@ class AsyncClient(BaseClient):
|
||||
*,
|
||||
tools: Optional[Sequence[Union[Mapping[str, Any], Tool, Callable]]] = None,
|
||||
stream: Literal[False] = False,
|
||||
think: Optional[bool] = None,
|
||||
think: Optional[Union[bool, Literal['low', 'medium', 'high']]] = None,
|
||||
format: Optional[Union[Literal['', 'json'], JsonSchemaValue]] = None,
|
||||
options: Optional[Union[Mapping[str, Any], Options]] = None,
|
||||
keep_alive: Optional[Union[float, str]] = None,
|
||||
@@ -799,7 +834,7 @@ class AsyncClient(BaseClient):
|
||||
*,
|
||||
tools: Optional[Sequence[Union[Mapping[str, Any], Tool, Callable]]] = None,
|
||||
stream: Literal[True] = True,
|
||||
think: Optional[bool] = None,
|
||||
think: Optional[Union[bool, Literal['low', 'medium', 'high']]] = None,
|
||||
format: Optional[Union[Literal['', 'json'], JsonSchemaValue]] = None,
|
||||
options: Optional[Union[Mapping[str, Any], Options]] = None,
|
||||
keep_alive: Optional[Union[float, str]] = None,
|
||||
@@ -812,7 +847,7 @@ class AsyncClient(BaseClient):
|
||||
*,
|
||||
tools: Optional[Sequence[Union[Mapping[str, Any], Tool, Callable]]] = None,
|
||||
stream: bool = False,
|
||||
think: Optional[bool] = None,
|
||||
think: Optional[Union[bool, Literal['low', 'medium', 'high']]] = None,
|
||||
format: Optional[Union[Literal['', 'json'], JsonSchemaValue]] = None,
|
||||
options: Optional[Union[Mapping[str, Any], Options]] = None,
|
||||
keep_alive: Optional[Union[float, str]] = None,
|
||||
@@ -875,6 +910,7 @@ class AsyncClient(BaseClient):
|
||||
truncate: Optional[bool] = None,
|
||||
options: Optional[Union[Mapping[str, Any], Options]] = None,
|
||||
keep_alive: Optional[Union[float, str]] = None,
|
||||
dimensions: Optional[int] = None,
|
||||
) -> EmbedResponse:
|
||||
return await self._request(
|
||||
EmbedResponse,
|
||||
@@ -886,6 +922,7 @@ class AsyncClient(BaseClient):
|
||||
truncate=truncate,
|
||||
options=options,
|
||||
keep_alive=keep_alive,
|
||||
dimensions=dimensions,
|
||||
).model_dump(exclude_none=True),
|
||||
)
|
||||
|
||||
|
||||
+7
-4
@@ -79,7 +79,7 @@ class SubscriptableBaseModel(BaseModel):
|
||||
if key in self.model_fields_set:
|
||||
return True
|
||||
|
||||
if value := self.model_fields.get(key):
|
||||
if value := self.__class__.model_fields.get(key):
|
||||
return value.default is not None
|
||||
|
||||
return False
|
||||
@@ -207,7 +207,7 @@ class GenerateRequest(BaseGenerateRequest):
|
||||
images: Optional[Sequence[Image]] = None
|
||||
'Image data for multimodal models.'
|
||||
|
||||
think: Optional[bool] = None
|
||||
think: Optional[Union[bool, Literal['low', 'medium', 'high']]] = None
|
||||
'Enable thinking mode (for thinking models).'
|
||||
|
||||
|
||||
@@ -313,7 +313,7 @@ class Message(SubscriptableBaseModel):
|
||||
|
||||
|
||||
class Tool(SubscriptableBaseModel):
|
||||
type: Optional[Literal['function']] = 'function'
|
||||
type: Optional[str] = 'function'
|
||||
|
||||
class Function(SubscriptableBaseModel):
|
||||
name: Optional[str] = None
|
||||
@@ -357,7 +357,7 @@ class ChatRequest(BaseGenerateRequest):
|
||||
tools: Optional[Sequence[Tool]] = None
|
||||
'Tools to use for the chat.'
|
||||
|
||||
think: Optional[bool] = None
|
||||
think: Optional[Union[bool, Literal['low', 'medium', 'high']]] = None
|
||||
'Enable thinking mode (for thinking models).'
|
||||
|
||||
|
||||
@@ -382,6 +382,9 @@ class EmbedRequest(BaseRequest):
|
||||
|
||||
keep_alive: Optional[Union[float, str]] = None
|
||||
|
||||
dimensions: Optional[int] = None
|
||||
'Dimensions truncates the output embedding to the specified dimension.'
|
||||
|
||||
|
||||
class EmbedResponse(BaseGenerateResponse):
|
||||
"""
|
||||
|
||||
+2
-1
@@ -79,11 +79,12 @@ def convert_function_to_tool(func: Callable) -> Tool:
|
||||
}
|
||||
|
||||
tool = Tool(
|
||||
type='function',
|
||||
function=Tool.Function(
|
||||
name=func.__name__,
|
||||
description=schema.get('description', ''),
|
||||
parameters=Tool.Function.Parameters(**schema),
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
return Tool.model_validate(tool)
|
||||
|
||||
@@ -8,7 +8,7 @@ from typing import Any
|
||||
|
||||
import pytest
|
||||
from httpx import Response as httpxResponse
|
||||
from pydantic import BaseModel, ValidationError
|
||||
from pydantic import BaseModel
|
||||
from pytest_httpserver import HTTPServer, URIPattern
|
||||
from werkzeug.wrappers import Request, Response
|
||||
|
||||
@@ -1136,10 +1136,11 @@ def test_copy_tools():
|
||||
|
||||
|
||||
def test_tool_validation():
|
||||
# Raises ValidationError when used as it is a generator
|
||||
with pytest.raises(ValidationError):
|
||||
invalid_tool = {'type': 'invalid_type', 'function': {'name': 'test'}}
|
||||
list(_copy_tools([invalid_tool]))
|
||||
arbitrary_tool = {'type': 'custom_type', 'function': {'name': 'test'}}
|
||||
tools = list(_copy_tools([arbitrary_tool]))
|
||||
assert len(tools) == 1
|
||||
assert tools[0].type == 'custom_type'
|
||||
assert tools[0].function.name == 'test'
|
||||
|
||||
|
||||
def test_client_connection_error():
|
||||
|
||||
Reference in New Issue
Block a user