Compare commits

...

2 Commits

Author SHA1 Message Date
ParthSareen 588e338b2b Add tests + cleanup 2024-12-16 11:34:03 -08:00
ParthSareen 422724ee8c Add tokenize detokenize compatibility 2024-12-16 09:41:07 -08:00
6 changed files with 110 additions and 0 deletions
+4
View File
@@ -36,6 +36,10 @@ python3 examples/<example>.py
- [structured-outputs-image.py](structured-outputs-image.py)
### Tokenization - Tokenize and detokenize text with a model
- [tokenization.py](tokenization.py)
### Ollama List - List all downloaded models and their properties
- [list.py](list.py)
+10
View File
@@ -0,0 +1,10 @@
import ollama
# Get tokens from a model
response = ollama.tokenize(model='llama3.2', text='Why the sky is blue?')
tokens = response.tokens
print('Tokens from model', tokens)
# Convert tokens back to text
response = ollama.detokenize(model='llama3.2', tokens=tokens)
print('Text from tokens', response.text) # Prints: Why the sky is blue?
+6
View File
@@ -12,6 +12,8 @@ from ollama._types import (
ListResponse,
ShowResponse,
ProcessResponse,
TokenizeResponse,
DetokenizeResponse,
RequestError,
ResponseError,
)
@@ -31,6 +33,8 @@ __all__ = [
'ListResponse',
'ShowResponse',
'ProcessResponse',
'TokenizeResponse',
'DetokenizeResponse',
'RequestError',
'ResponseError',
]
@@ -49,3 +53,5 @@ list = _client.list
copy = _client.copy
show = _client.show
ps = _client.ps
tokenize = _client.tokenize
detokenize = _client.detokenize
+48
View File
@@ -48,6 +48,8 @@ from ollama._types import (
CreateRequest,
CopyRequest,
DeleteRequest,
DetokenizeRequest,
DetokenizeResponse,
EmbedRequest,
EmbedResponse,
EmbeddingsRequest,
@@ -67,6 +69,8 @@ from ollama._types import (
ShowRequest,
ShowResponse,
StatusResponse,
TokenizeRequest,
TokenizeResponse,
Tool,
)
@@ -611,6 +615,28 @@ class Client(BaseClient):
'/api/ps',
)
def tokenize(self, model: str, text: str) -> TokenizeResponse:
return self._request(
TokenizeResponse,
'POST',
'/api/tokenize',
json=TokenizeRequest(
model=model,
text=text,
).model_dump(exclude_none=True),
)
def detokenize(self, model: str, tokens: Sequence[int]) -> DetokenizeResponse:
return self._request(
DetokenizeResponse,
'POST',
'/api/detokenize',
json=DetokenizeRequest(
model=model,
tokens=tokens,
).model_dump(exclude_none=True),
)
class AsyncClient(BaseClient):
def __init__(self, host: Optional[str] = None, **kwargs) -> None:
@@ -1120,6 +1146,28 @@ class AsyncClient(BaseClient):
'/api/ps',
)
async def tokenize(self, model: str, text: str) -> TokenizeResponse:
return await self._request(
TokenizeResponse,
'POST',
'/api/tokenize',
json=TokenizeRequest(
model=model,
text=text,
).model_dump(exclude_none=True),
)
async def detokenize(self, model: str, tokens: Sequence[int]) -> DetokenizeResponse:
return await self._request(
DetokenizeResponse,
'POST',
'/api/detokenize',
json=DetokenizeRequest(
model=model,
tokens=tokens,
).model_dump(exclude_none=True),
)
def _copy_messages(messages: Optional[Sequence[Union[Mapping[str, Any], Message]]]) -> Iterator[Message]:
for message in messages or []:
+18
View File
@@ -494,6 +494,24 @@ class ProcessResponse(SubscriptableBaseModel):
models: Sequence[Model]
class TokenizeRequest(BaseRequest):
model: str
text: str
class TokenizeResponse(BaseGenerateResponse):
tokens: Sequence[int]
class DetokenizeRequest(BaseRequest):
model: str
tokens: Sequence[int]
class DetokenizeResponse(BaseGenerateResponse):
text: str
class RequestError(Exception):
"""
Common class for request errors.
+24
View File
@@ -1260,3 +1260,27 @@ def test_tool_validation():
with pytest.raises(ValidationError):
invalid_tool = {'type': 'invalid_type', 'function': {'name': 'test'}}
list(_copy_tools([invalid_tool]))
def test_client_tokenize(httpserver: HTTPServer):
httpserver.expect_ordered_request(
'/api/tokenize',
method='POST',
json={'model': 'dummy', 'text': 'Hello world!'},
).respond_with_json({'tokens': [1, 2, 3]})
client = Client(httpserver.url_for('/'))
response = client.tokenize('dummy', 'Hello world!')
assert response.tokens == [1, 2, 3]
def test_client_detokenize(httpserver: HTTPServer):
httpserver.expect_ordered_request(
'/api/detokenize',
method='POST',
json={'model': 'dummy', 'tokens': [1, 2, 3]},
).respond_with_json({'text': 'Hello world!'})
client = Client(httpserver.url_for('/'))
response = client.detokenize('dummy', [1, 2, 3])
assert response.text == 'Hello world!'