import json from base64 import b64encode from pathlib import Path from datetime import datetime from typing import ( Any, Literal, Mapping, Optional, Sequence, Union, ) from typing_extensions import Annotated from pydantic import ( BaseModel, ByteSize, Field, FilePath, Base64Str, model_serializer, ) from pydantic.json_schema import JsonSchemaValue class SubscriptableBaseModel(BaseModel): def __getitem__(self, key: str) -> Any: return getattr(self, key) def __setitem__(self, key: str, value: Any) -> None: setattr(self, key, value) def __contains__(self, key: str) -> bool: return hasattr(self, key) def get(self, key: str, default: Any = None) -> Any: return getattr(self, key, default) class Options(SubscriptableBaseModel): # load time options numa: Optional[bool] = None num_ctx: Optional[int] = None num_batch: Optional[int] = None num_gpu: Optional[int] = None main_gpu: Optional[int] = None low_vram: Optional[bool] = None f16_kv: Optional[bool] = None logits_all: Optional[bool] = None vocab_only: Optional[bool] = None use_mmap: Optional[bool] = None use_mlock: Optional[bool] = None embedding_only: Optional[bool] = None num_thread: Optional[int] = None # runtime options num_keep: Optional[int] = None seed: Optional[int] = None num_predict: Optional[int] = None top_k: Optional[int] = None top_p: Optional[float] = None tfs_z: Optional[float] = None typical_p: Optional[float] = None repeat_last_n: Optional[int] = None temperature: Optional[float] = None repeat_penalty: Optional[float] = None presence_penalty: Optional[float] = None frequency_penalty: Optional[float] = None mirostat: Optional[int] = None mirostat_tau: Optional[float] = None mirostat_eta: Optional[float] = None penalize_newline: Optional[bool] = None stop: Optional[Sequence[str]] = None class BaseRequest(SubscriptableBaseModel): model: Annotated[str, Field(min_length=1)] 'Model to use for the request.' class BaseStreamableRequest(BaseRequest): stream: Optional[bool] = None 'Stream response.' class BaseGenerateRequest(BaseStreamableRequest): options: Optional[Union[Mapping[str, Any], Options]] = None 'Options to use for the request.' format: Optional[Literal['', 'json']] = None 'Format of the response.' keep_alive: Optional[Union[float, str]] = None 'Keep model alive for the specified duration.' class Image(BaseModel): value: Union[FilePath, Base64Str, bytes] # This overloads the `model_dump` method and returns values depending on the type of the `value` field @model_serializer def serialize_model(self): if isinstance(self.value, Path): return b64encode(self.value.read_bytes()).decode() elif isinstance(self.value, bytes): return b64encode(self.value).decode() return self.value class GenerateRequest(BaseGenerateRequest): prompt: Optional[str] = None 'Prompt to generate response from.' suffix: Optional[str] = None 'Suffix to append to the response.' system: Optional[str] = None 'System prompt to prepend to the prompt.' template: Optional[str] = None 'Template to use for the response.' context: Optional[Sequence[int]] = None 'Tokenized history to use for the response.' raw: Optional[bool] = None images: Optional[Sequence[Image]] = None 'Image data for multimodal models.' class BaseGenerateResponse(SubscriptableBaseModel): model: Optional[str] = None 'Model used to generate response.' created_at: Optional[str] = None 'Time when the request was created.' done: Optional[bool] = None 'True if response is complete, otherwise False. Useful for streaming to detect the final response.' done_reason: Optional[str] = None 'Reason for completion. Only present when done is True.' total_duration: Optional[int] = None 'Total duration in nanoseconds.' load_duration: Optional[int] = None 'Load duration in nanoseconds.' prompt_eval_count: Optional[int] = None 'Number of tokens evaluated in the prompt.' prompt_eval_duration: Optional[int] = None 'Duration of evaluating the prompt in nanoseconds.' eval_count: Optional[int] = None 'Number of tokens evaluated in inference.' eval_duration: Optional[int] = None 'Duration of evaluating inference in nanoseconds.' class GenerateResponse(BaseGenerateResponse): """ Response returned by generate requests. """ response: str 'Response content. When streaming, this contains a fragment of the response.' context: Optional[Sequence[int]] = None 'Tokenized history up to the point of the response.' class Message(SubscriptableBaseModel): """ Chat message. """ role: Literal['user', 'assistant', 'system', 'tool'] "Assumed role of the message. Response messages has role 'assistant' or 'tool'." content: Optional[str] = None 'Content of the message. Response messages contains message fragments when streaming.' images: Optional[Sequence[Image]] = None """ Optional list of image data for multimodal models. Valid input types are: - `str` or path-like object: path to image file - `bytes` or bytes-like object: raw image data Valid image formats depend on the model. See the model card for more information. """ class ToolCall(SubscriptableBaseModel): """ Model tool calls. """ class Function(SubscriptableBaseModel): """ Tool call function. """ name: str 'Name of the function.' arguments: Mapping[str, Any] 'Arguments of the function.' function: Function 'Function to be called.' tool_calls: Optional[Sequence[ToolCall]] = None """ Tools calls to be made by the model. """ class Tool(SubscriptableBaseModel): type: Literal['function'] = 'function' class Function(SubscriptableBaseModel): name: str description: str class Parameters(SubscriptableBaseModel): type: str required: Optional[Sequence[str]] = None properties: Optional[JsonSchemaValue] = None parameters: Parameters function: Function class ChatRequest(BaseGenerateRequest): messages: Optional[Sequence[Union[Mapping[str, Any], Message]]] = None 'Messages to chat with.' tools: Optional[Sequence[Tool]] = None 'Tools to use for the chat.' class ChatResponse(BaseGenerateResponse): """ Response returned by chat requests. """ message: Message 'Response message.' class EmbedRequest(BaseRequest): input: Union[str, Sequence[str]] 'Input text to embed.' truncate: Optional[bool] = None 'Truncate the input to the maximum token length.' options: Optional[Union[Mapping[str, Any], Options]] = None 'Options to use for the request.' keep_alive: Optional[Union[float, str]] = None class EmbedResponse(BaseGenerateResponse): """ Response returned by embed requests. """ embeddings: Sequence[Sequence[float]] 'Embeddings of the inputs.' class EmbeddingsRequest(BaseRequest): prompt: Optional[str] = None 'Prompt to generate embeddings from.' options: Optional[Union[Mapping[str, Any], Options]] = None 'Options to use for the request.' keep_alive: Optional[Union[float, str]] = None class EmbeddingsResponse(SubscriptableBaseModel): """ Response returned by embeddings requests. """ embedding: Sequence[float] 'Embedding of the prompt.' class PullRequest(BaseStreamableRequest): """ Request to pull the model. """ insecure: Optional[bool] = None 'Allow insecure (HTTP) connections.' class PushRequest(BaseStreamableRequest): """ Request to pull the model. """ insecure: Optional[bool] = None 'Allow insecure (HTTP) connections.' class CreateRequest(BaseStreamableRequest): """ Request to create a new model. """ modelfile: Optional[str] = None quantize: Optional[str] = None class ModelDetails(SubscriptableBaseModel): parent_model: Optional[str] = None format: Optional[str] = None family: Optional[str] = None families: Optional[Sequence[str]] = None parameter_size: Optional[str] = None quantization_level: Optional[str] = None class ListResponse(SubscriptableBaseModel): class Model(SubscriptableBaseModel): modified_at: Optional[datetime] = None digest: Optional[str] = None size: Optional[ByteSize] = None details: Optional[ModelDetails] = None models: Sequence[Model] 'List of models.' class DeleteRequest(BaseRequest): """ Request to delete a model. """ class CopyRequest(BaseModel): """ Request to copy a model. """ source: str 'Source model to copy.' destination: str 'Destination model to copy to.' class StatusResponse(SubscriptableBaseModel): status: Optional[str] = None class ProgressResponse(StatusResponse): completed: Optional[int] = None total: Optional[int] = None digest: Optional[str] = None class ShowRequest(BaseRequest): """ Request to show model information. """ class ShowResponse(SubscriptableBaseModel): modified_at: Optional[datetime] = None template: Optional[str] = None modelfile: Optional[str] = None license: Optional[str] = None details: Optional[ModelDetails] = None modelinfo: Optional[Mapping[str, Any]] = Field(alias='model_info') parameters: Optional[str] = None class ProcessResponse(SubscriptableBaseModel): class Model(SubscriptableBaseModel): model: Optional[str] = None name: Optional[str] = None digest: Optional[str] = None expires_at: Optional[datetime] = None size: Optional[ByteSize] = None size_vram: Optional[ByteSize] = None details: Optional[ModelDetails] = None models: Sequence[Model] class RequestError(Exception): """ Common class for request errors. """ def __init__(self, error: str): super().__init__(error) self.error = error 'Reason for the error.' class ResponseError(Exception): """ Common class for response errors. """ def __init__(self, error: str, status_code: int = -1): try: # try to parse content as JSON and extract 'error' # fallback to raw content if JSON parsing fails error = json.loads(error).get('error', error) except json.JSONDecodeError: ... super().__init__(error) self.error = error 'Reason for the error.' self.status_code = status_code 'HTTP status code of the response.'