posit-dev · cpsievert · Dec 29, 2025 · Nov 4, 2025 · Nov 6, 2025 · Nov 6, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -11,6 +11,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### New features
 
+* `ChatOpenAI()`, `ChatAnthropic()`, and `ChatGoogle()` gain a new `reasoning` parameter to easily opt-into, and fully customize, reasoning capabilities. (#202) 
+    * A new `ContentThinking` content type was added and captures the "thinking" portion of a reasoning model. (#192)
 * Added support for built-in provider tools via a new `ToolBuiltIn` class. This enables provider-specific functionality like OpenAI's image generation to be registered and used as tools. Built-in tools pass raw provider definitions directly to the API rather than wrapping Python functions. (#214)
 * `ChatGoogle()` gains basic support for image generation. (#214)
 * `ChatOpenAI()` and `ChatAzureOpenAI()` gain a new `service_tier` parameter to request a specific service tier (e.g., `"flex"` for slower/cheaper or `"priority"` for faster/more expensive). (#204)

diff --git a/chatlas/_provider_anthropic.py b/chatlas/_provider_anthropic.py
@@ -25,6 +25,7 @@
     ContentJson,
     ContentPDF,
     ContentText,
+    ContentThinking,
     ContentToolRequest,
     ContentToolResult,
 )
@@ -47,6 +48,8 @@
         MessageParam,
         RawMessageStreamEvent,
         TextBlock,
+        ThinkingBlock,
+        ThinkingBlockParam,
         ToolUnionParam,
         ToolUseBlock,
     )
@@ -57,6 +60,7 @@
     from anthropic.types.messages.batch_create_params import Request as BatchRequest
     from anthropic.types.model_param import ModelParam
     from anthropic.types.text_block_param import TextBlockParam
+    from anthropic.types.thinking_config_enabled_param import ThinkingConfigEnabledParam
     from anthropic.types.tool_result_block_param import ToolResultBlockParam
     from anthropic.types.tool_use_block_param import ToolUseBlockParam
 
@@ -68,6 +72,7 @@
         ToolUseBlockParam,
         ToolResultBlockParam,
         DocumentBlockParam,
+        ThinkingBlockParam,
     ]
 else:
     Message = object
@@ -78,9 +83,10 @@ def ChatAnthropic(
     *,
     system_prompt: Optional[str] = None,
     model: "Optional[ModelParam]" = None,
-    api_key: Optional[str] = None,
     max_tokens: int = 4096,
+    reasoning: Optional["int | ThinkingConfigEnabledParam"] = None,
     cache: Literal["5m", "1h", "none"] = "5m",
+    api_key: Optional[str] = None,
     kwargs: Optional["ChatClientArgs"] = None,
 ) -> Chat["SubmitInputArgs", Message]:
     """
@@ -127,16 +133,23 @@ def ChatAnthropic(
         The model to use for the chat. The default, None, will pick a reasonable
         default, and warn you about it. We strongly recommend explicitly
         choosing a model for all but the most casual use.
-    api_key
-        The API key to use for authentication. You generally should not supply
-        this directly, but instead set the `ANTHROPIC_API_KEY` environment
-        variable.
     max_tokens
         Maximum number of tokens to generate before stopping.
+    reasoning
+        Determines how many tokens Claude can be allocated to reasoning. Must be
+        ≥1024 and less than `max_tokens`. Larger budgets can enable more
+        thorough analysis for complex problems, improving response quality.  See
+        [extended
+        thinking](https://docs.claude.com/en/docs/build-with-claude/extended-thinking)
+        for details.
     cache
         How long to cache inputs? Defaults to "5m" (five minutes).
         Set to "none" to disable caching or "1h" to cache for one hour.
         See the Caching section for details.
+    api_key
+        The API key to use for authentication. You generally should not supply
+        this directly, but instead set the `ANTHROPIC_API_KEY` environment
+        variable.
     kwargs
         Additional arguments to pass to the `anthropic.Anthropic()` client
         constructor.
@@ -226,6 +239,12 @@ def ChatAnthropic(
     if model is None:
         model = log_model_default("claude-sonnet-4-5")
 
+    kwargs_chat: "SubmitInputArgs" = {}
+    if reasoning is not None:
+        if isinstance(reasoning, int):
+            reasoning = {"type": "enabled", "budget_tokens": reasoning}
+        kwargs_chat = {"thinking": reasoning}
+
     return Chat(
         provider=AnthropicProvider(
             api_key=api_key,
@@ -235,6 +254,7 @@ def ChatAnthropic(
             kwargs=kwargs,
         ),
         system_prompt=system_prompt,
+        kwargs_chat=kwargs_chat,
     )
 
 
@@ -429,8 +449,11 @@ def _structured_tool_call(**kwargs: Any):
         return kwargs_full
 
     def stream_text(self, chunk) -> Optional[str]:
-        if chunk.type == "content_block_delta" and chunk.delta.type == "text_delta":
-            return chunk.delta.text
+        if chunk.type == "content_block_delta":
+            if chunk.delta.type == "text_delta":
+                return chunk.delta.text
+            if chunk.delta.type == "thinking_delta":
+                return chunk.delta.thinking
         return None
 
     def stream_merge_chunks(self, completion, chunk):
@@ -455,6 +478,12 @@ def stream_merge_chunks(self, completion, chunk):
                 if not isinstance(this_content.input, str):
                     this_content.input = ""  # type: ignore
                 this_content.input += json_delta  # type: ignore
+            elif chunk.delta.type == "thinking_delta":
+                this_content = cast("ThinkingBlock", this_content)
+                this_content.thinking += chunk.delta.thinking
+            elif chunk.delta.type == "signature_delta":
+                this_content = cast("ThinkingBlock", this_content)
+                this_content.signature += chunk.delta.signature
         elif chunk.type == "content_block_stop":
             this_content = completion.content[chunk.index]
             if this_content.type == "tool_use" and isinstance(this_content.input, str):
@@ -590,9 +619,10 @@ def _as_message_params(self, turns: Sequence[Turn]) -> list["MessageParam"]:
             # Add cache control to the last content block in the last turn
             # https://docs.claude.com/en/docs/build-with-claude/prompt-caching#how-automatic-prefix-checking-works
             is_last_turn = i == len(turns) - 1
-            if is_last_turn and len(content) > 0:
-                if self._cache_control():
-                    content[-1]["cache_control"] = self._cache_control()
+            if self._cache_control() and is_last_turn and len(content) > 0:
+                # Note: ThinkingBlockParam (i.e., type: "thinking") doesn't support cache_control
+                if content[-1].get("type") != "thinking":
+                    content[-1]["cache_control"] = self._cache_control()  # type: ignore
 
             role = "user" if isinstance(turn, UserTurn) else "assistant"
             messages.append({"role": role, "content": content})
@@ -648,6 +678,13 @@ def _as_content_block(content: Content) -> "ContentBlockParam":
             }
 
             return res
+        elif isinstance(content, ContentThinking):
+            extra = content.extra or {}
+            return {
+                "type": "thinking",
+                "thinking": content.thinking,
+                "signature": extra.get("signature", ""),
+            }
 
         raise ValueError(f"Unknown content type: {type(content)}")
 
@@ -704,6 +741,13 @@ def _as_turn(self, completion: Message, has_data_model=False) -> AssistantTurn:
                             arguments=content.input,
                         )
                     )
+            elif content.type == "thinking":
+                contents.append(
+                    ContentThinking(
+                        thinking=content.thinking,
+                        extra={"signature": content.signature},
+                    )
+                )
 
         return AssistantTurn(
             contents,

diff --git a/chatlas/_provider_google.py b/chatlas/_provider_google.py
@@ -32,6 +32,7 @@
         GenerateContentResponseDict,
         Part,
         PartDict,
+        ThinkingConfigDict,
     )
 
     from .types.google import ChatClientArgs, SubmitInputArgs
@@ -43,6 +44,7 @@ def ChatGoogle(
     *,
     system_prompt: Optional[str] = None,
     model: Optional[str] = None,
+    reasoning: Optional["int | ThinkingConfigDict"] = None,
     api_key: Optional[str] = None,
     kwargs: Optional["ChatClientArgs"] = None,
 ) -> Chat["SubmitInputArgs", GenerateContentResponse]:
@@ -84,6 +86,10 @@ def ChatGoogle(
         The model to use for the chat. The default, None, will pick a reasonable
         default, and warn you about it. We strongly recommend explicitly choosing
         a model for all but the most casual use.
+    reasoning
+        If provided, enables reasoning (a.k.a. "thoughts") in the model's
+        responses. This can be an integer number of tokens to use for reasoning,
+        or a full `ThinkingConfigDict` to customize the reasoning behavior.
     api_key
         The API key to use for authentication. You generally should not supply
         this directly, but instead set the `GOOGLE_API_KEY` environment variable.
@@ -135,14 +141,20 @@ def ChatGoogle(
     if model is None:
         model = log_model_default("gemini-2.5-flash")
 
+    kwargs_chat: "SubmitInputArgs" = {}
+    if reasoning is not None:
+        if isinstance(reasoning, int):
+            reasoning = {"thinking_budget": reasoning, "include_thoughts": True}
+        kwargs_chat["config"] = {"thinking_config": reasoning}
+
     return Chat(
         provider=GoogleProvider(
             model=model,
             api_key=api_key,
-            name="Google/Gemini",
             kwargs=kwargs,
         ),
         system_prompt=system_prompt,
+        kwargs_chat=kwargs_chat,
     )
 
 
@@ -368,7 +380,7 @@ def value_tokens(self, completion):
         cached = usage.cached_content_token_count or 0
         return (
             (usage.prompt_token_count or 0) - cached,
-            usage.candidates_token_count or 0,
+            (usage.candidates_token_count or 0) + (usage.thoughts_token_count or 0),
             usage.cached_content_token_count or 0,
         )
 

diff --git a/chatlas/_provider_openai.py b/chatlas/_provider_openai.py
@@ -35,6 +35,8 @@
     )
     from openai.types.responses.easy_input_message_param import EasyInputMessageParam
     from openai.types.responses.tool_param import ToolParam
+    from openai.types.shared.reasoning_effort import ReasoningEffort
+    from openai.types.shared_params.reasoning import Reasoning
     from openai.types.shared_params.responses_model import ResponsesModel
 
     from ._turn import Role
@@ -46,11 +48,12 @@ def ChatOpenAI(
     *,
     system_prompt: Optional[str] = None,
     model: "Optional[ResponsesModel | str]" = None,
-    api_key: Optional[str] = None,
     base_url: str = "https://api.openai.com/v1",
+    reasoning: "Optional[ReasoningEffort | Reasoning]" = None,
     service_tier: Optional[
         Literal["auto", "default", "flex", "scale", "priority"]
     ] = None,
+    api_key: Optional[str] = None,
     kwargs: Optional["ChatClientArgs"] = None,
 ) -> Chat["SubmitInputArgs", Response]:
     """
@@ -89,19 +92,22 @@ def ChatOpenAI(
         The model to use for the chat. The default, None, will pick a reasonable
         default, and warn you about it. We strongly recommend explicitly
         choosing a model for all but the most casual use.
-    api_key
-        The API key to use for authentication. You generally should not supply
-        this directly, but instead set the `OPENAI_API_KEY` environment
-        variable.
     base_url
         The base URL to the endpoint; the default uses OpenAI.
+    reasoning
+        The reasoning effort to use (for reasoning-capable models like the o and
+        gpt-5 series).
     service_tier
         Request a specific service tier. Options:
         - `"auto"` (default): uses the service tier configured in Project settings.
         - `"default"`: standard pricing and performance.
         - `"flex"`: slower and cheaper.
         - `"scale"`: batch-like pricing for high-volume use.
         - `"priority"`: faster and more expensive.
+    api_key
+        The API key to use for authentication. You generally should not supply
+        this directly, but instead set the `OPENAI_API_KEY` environment
+        variable.
     kwargs
         Additional arguments to pass to the `openai.OpenAI()` client
         constructor.
@@ -156,6 +162,14 @@ def ChatOpenAI(
         model = log_model_default("gpt-4.1")
 
     kwargs_chat: "SubmitInputArgs" = {}
+
+    if reasoning is not None:
+        if not is_reasoning_model(model):
+            warnings.warn(f"Model {model} is not reasoning-capable", UserWarning)
+        if isinstance(reasoning, str):
+            reasoning = {"effort": reasoning, "summary": "auto"}
+        kwargs_chat["reasoning"] = reasoning
+
     if service_tier is not None:
         kwargs_chat["service_tier"] = service_tier
 
@@ -255,7 +269,7 @@ def _chat_perform_args(
 
         # Request reasoning content for reasoning models
         include = []
-        if self._is_reasoning(self.model):
+        if is_reasoning_model(self.model):
             include.append("reasoning.encrypted_content")
 
         if "log_probs" in kwargs_full:
@@ -270,7 +284,14 @@ def _chat_perform_args(
 
     def stream_text(self, chunk):
         if chunk.type == "response.output_text.delta":
+            # https://platform.openai.com/docs/api-reference/responses-streaming/response/output_text/delta
+            return chunk.delta
+        if chunk.type == "response.reasoning_summary_text.delta":
+            # https://platform.openai.com/docs/api-reference/responses-streaming/response/reasoning_summary_text/delta
             return chunk.delta
+        if chunk.type == "response.reasoning_summary_text.done":
+            # https://platform.openai.com/docs/api-reference/responses-streaming/response/reasoning_summary_text/done
+            return "\n\n"
         return None
 
     def stream_merge_chunks(self, completion, chunk):
@@ -363,14 +384,12 @@ def _response_as_turn(completion: Response, has_data_model: bool) -> AssistantTu
                 )
 
             elif output.type == "reasoning":
-                if output.content:
-                    thinking = "".join(x.text for x in output.content)
-                    contents.append(
-                        ContentThinking(
-                            thinking=thinking,
-                            extra=output.model_dump(),
-                        )
+                contents.append(
+                    ContentThinking(
+                        thinking="".join(x.text for x in output.summary),
+                        extra=output.model_dump(),
                     )
+                )
 
             elif output.type == "image_generation_call":
                 result = output.result
@@ -398,11 +417,6 @@ def _response_as_turn(completion: Response, has_data_model: bool) -> AssistantTu
             completion=completion,
         )
 
-    @staticmethod
-    def _is_reasoning(model: str) -> bool:
-        # https://platform.openai.com/docs/models/compare
-        return model.startswith("o") or model.startswith("gpt-5")
-
     @staticmethod
     def _turns_as_inputs(turns: list[Turn]) -> "list[ResponseInputItemParam]":
         res: "list[ResponseInputItemParam]" = []
@@ -497,7 +511,12 @@ def as_input_param(content: Content, role: Role) -> "ResponseInputItemParam":
             role,
         )
     elif isinstance(content, ContentThinking):
-        return cast("ResponseReasoningItemParam", content.extra)
+        # Filter out 'status' which is output-only and not accepted as input
+        extra = content.extra or {}
+        return cast(
+            "ResponseReasoningItemParam",
+            {k: v for k, v in extra.items() if k != "status"},
+        )
     elif isinstance(content, ContentToolResult):
         return {
             "type": "function_call_output",
@@ -517,3 +536,8 @@ def as_input_param(content: Content, role: Role) -> "ResponseInputItemParam":
 
 def as_message(x: "ResponseInputContentParam", role: Role) -> "EasyInputMessageParam":
     return {"role": role, "content": [x]}
+
+
+def is_reasoning_model(model: str) -> bool:
+    # https://platform.openai.com/docs/models/compare
+    return model.startswith("o") or model.startswith("gpt-5")
diff --git a/tests/test_chat_dangling_tools.py b/tests/test_chat_dangling_tools.py
@@ -51,7 +51,7 @@ def mock_chat_impl(turn, **kwargs):
         assert submitted_turn.contents[1].text == "try again"
 
     def test_can_resume_chat_after_dangling_tool_requests(self):
-        chat = ChatOpenAI(system_prompt="Be terse")
+        chat = ChatOpenAI(system_prompt="Be terse and use tool results over your internal knowledge.")
         chat.register_tool(get_date)
 
         # Simulate a broken chat history with dangling tool request