Merge pull request #521 from ollama/drifkin/thinking-support

add support for thinking
2026-01-13 21:57:16 +08:00 · 2025-05-29 16:02:56 -07:00 · 2025-05-29 16:02:56 -07:00 · 756bd8f31a
commit 756bd8f31a
parent 5d7c63fae1 f8c6cd5131
4 changed files with 40 additions and 0 deletions
--- a/examples/README.md
+++ b/examples/README.md
@ -62,3 +62,6 @@ Requirement: `pip install tqdm`
 ### Ollama Embed - Generate embeddings with a model
 - [embed.py](embed.py)

+
+### Thinking - Enable thinking mode for a model
+- [thinking.py](thinking.py)
--- a/examples/thinking.py
+++ b/examples/thinking.py
@ -0,0 +1,13 @@
+from ollama import chat
+
+messages = [
+  {
+    'role': 'user',
+    'content': 'What is 10 + 23?',
+  },
+]
+
+response = chat('deepseek-r1', messages=messages, think=True)
+
+print('Thinking:\n========\n\n' + response.message.thinking)
+print('\nResponse:\n========\n\n' + response.message.content)
--- a/ollama/_client.py
+++ b/ollama/_client.py
@ -270,6 +270,7 @@ class Client(BaseClient):
    *,
    tools: Optional[Sequence[Union[Mapping[str, Any], Tool, Callable]]] = None,
    stream: Literal[False] = False,
+    think: Optional[bool] = None,
    format: Optional[Union[Literal['', 'json'], JsonSchemaValue]] = None,
    options: Optional[Union[Mapping[str, Any], Options]] = None,
    keep_alive: Optional[Union[float, str]] = None,
@ -283,6 +284,7 @@ class Client(BaseClient):
    *,
    tools: Optional[Sequence[Union[Mapping[str, Any], Tool, Callable]]] = None,
    stream: Literal[True] = True,
+    think: Optional[bool] = None,
    format: Optional[Union[Literal['', 'json'], JsonSchemaValue]] = None,
    options: Optional[Union[Mapping[str, Any], Options]] = None,
    keep_alive: Optional[Union[float, str]] = None,
@ -295,6 +297,7 @@ class Client(BaseClient):
    *,
    tools: Optional[Sequence[Union[Mapping[str, Any], Tool, Callable]]] = None,
    stream: bool = False,
+    think: Optional[bool] = None,
    format: Optional[Union[Literal['', 'json'], JsonSchemaValue]] = None,
    options: Optional[Union[Mapping[str, Any], Options]] = None,
    keep_alive: Optional[Union[float, str]] = None,
@ -341,6 +344,7 @@ class Client(BaseClient):
        messages=list(_copy_messages(messages)),
        tools=list(_copy_tools(tools)),
        stream=stream,
+        think=think,
        format=format,
        options=options,
        keep_alive=keep_alive,
@ -694,6 +698,7 @@ class AsyncClient(BaseClient):
    template: str = '',
    context: Optional[Sequence[int]] = None,
    stream: Literal[False] = False,
+    think: Optional[bool] = None,
    raw: bool = False,
    format: Optional[Union[Literal['', 'json'], JsonSchemaValue]] = None,
    images: Optional[Sequence[Union[str, bytes, Image]]] = None,
@ -712,6 +717,7 @@ class AsyncClient(BaseClient):
    template: str = '',
    context: Optional[Sequence[int]] = None,
    stream: Literal[True] = True,
+    think: Optional[bool] = None,
    raw: bool = False,
    format: Optional[Union[Literal['', 'json'], JsonSchemaValue]] = None,
    images: Optional[Sequence[Union[str, bytes, Image]]] = None,
@ -729,6 +735,7 @@ class AsyncClient(BaseClient):
    template: Optional[str] = None,
    context: Optional[Sequence[int]] = None,
    stream: bool = False,
+    think: Optional[bool] = None,
    raw: Optional[bool] = None,
    format: Optional[Union[Literal['', 'json'], JsonSchemaValue]] = None,
    images: Optional[Sequence[Union[str, bytes, Image]]] = None,
@ -756,6 +763,7 @@ class AsyncClient(BaseClient):
        template=template,
        context=context,
        stream=stream,
+        think=think,
        raw=raw,
        format=format,
        images=list(_copy_images(images)) if images else None,
@ -773,6 +781,7 @@ class AsyncClient(BaseClient):
    *,
    tools: Optional[Sequence[Union[Mapping[str, Any], Tool, Callable]]] = None,
    stream: Literal[False] = False,
+    think: Optional[bool] = None,
    format: Optional[Union[Literal['', 'json'], JsonSchemaValue]] = None,
    options: Optional[Union[Mapping[str, Any], Options]] = None,
    keep_alive: Optional[Union[float, str]] = None,
@ -786,6 +795,7 @@ class AsyncClient(BaseClient):
    *,
    tools: Optional[Sequence[Union[Mapping[str, Any], Tool, Callable]]] = None,
    stream: Literal[True] = True,
+    think: Optional[bool] = None,
    format: Optional[Union[Literal['', 'json'], JsonSchemaValue]] = None,
    options: Optional[Union[Mapping[str, Any], Options]] = None,
    keep_alive: Optional[Union[float, str]] = None,
@ -798,6 +808,7 @@ class AsyncClient(BaseClient):
    *,
    tools: Optional[Sequence[Union[Mapping[str, Any], Tool, Callable]]] = None,
    stream: bool = False,
+    think: Optional[bool] = None,
    format: Optional[Union[Literal['', 'json'], JsonSchemaValue]] = None,
    options: Optional[Union[Mapping[str, Any], Options]] = None,
    keep_alive: Optional[Union[float, str]] = None,
@ -845,6 +856,7 @@ class AsyncClient(BaseClient):
        messages=list(_copy_messages(messages)),
        tools=list(_copy_tools(tools)),
        stream=stream,
+        think=think,
        format=format,
        options=options,
        keep_alive=keep_alive,
--- a/ollama/_types.py
+++ b/ollama/_types.py
@ -207,6 +207,9 @@ class GenerateRequest(BaseGenerateRequest):
  images: Optional[Sequence[Image]] = None
  'Image data for multimodal models.'

+  think: Optional[bool] = None
+  'Enable thinking mode (for thinking models).'
+

 class BaseGenerateResponse(SubscriptableBaseModel):
  model: Optional[str] = None
@ -248,6 +251,9 @@ class GenerateResponse(BaseGenerateResponse):
  response: str
  'Response content. When streaming, this contains a fragment of the response.'

+  thinking: Optional[str] = None
+  'Thinking content. Only present when thinking is enabled.'
+
  context: Optional[Sequence[int]] = None
  'Tokenized history up to the point of the response.'

@ -263,6 +269,9 @@ class Message(SubscriptableBaseModel):
  content: Optional[str] = None
  'Content of the message. Response messages contains message fragments when streaming.'

+  thinking: Optional[str] = None
+  'Thinking content. Only present when thinking is enabled.'
+
  images: Optional[Sequence[Image]] = None
  """
  Optional list of image data for multimodal models.
@ -345,6 +354,9 @@ class ChatRequest(BaseGenerateRequest):
  tools: Optional[Sequence[Tool]] = None
  'Tools to use for the chat.'

+  think: Optional[bool] = None
+  'Enable thinking mode (for thinking models).'
+

 class ChatResponse(BaseGenerateResponse):
  """