Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### New features

* `ChatOpenAI()`, `ChatAnthropic()`, and `ChatGoogle()` gain a new `reasoning` parameter to easily opt-into, and fully customize, reasoning capabilities. (#202)
* A new `ContentThinking` content type was added and captures the "thinking" portion of a reasoning model. (#192)
* Added support for built-in provider tools via a new `ToolBuiltIn` class. This enables provider-specific functionality like OpenAI's image generation to be registered and used as tools. Built-in tools pass raw provider definitions directly to the API rather than wrapping Python functions. (#214)
* `ChatGoogle()` gains basic support for image generation. (#214)
* `ChatOpenAI()` and `ChatAzureOpenAI()` gain a new `service_tier` parameter to request a specific service tier (e.g., `"flex"` for slower/cheaper or `"priority"` for faster/more expensive). (#204)
Expand Down
64 changes: 54 additions & 10 deletions chatlas/_provider_anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
ContentJson,
ContentPDF,
ContentText,
ContentThinking,
ContentToolRequest,
ContentToolResult,
)
Expand All @@ -47,6 +48,8 @@
MessageParam,
RawMessageStreamEvent,
TextBlock,
ThinkingBlock,
ThinkingBlockParam,
ToolUnionParam,
ToolUseBlock,
)
Expand All @@ -57,6 +60,7 @@
from anthropic.types.messages.batch_create_params import Request as BatchRequest
from anthropic.types.model_param import ModelParam
from anthropic.types.text_block_param import TextBlockParam
from anthropic.types.thinking_config_enabled_param import ThinkingConfigEnabledParam
from anthropic.types.tool_result_block_param import ToolResultBlockParam
from anthropic.types.tool_use_block_param import ToolUseBlockParam

Expand All @@ -68,6 +72,7 @@
ToolUseBlockParam,
ToolResultBlockParam,
DocumentBlockParam,
ThinkingBlockParam,
]
else:
Message = object
Expand All @@ -78,9 +83,10 @@ def ChatAnthropic(
*,
system_prompt: Optional[str] = None,
model: "Optional[ModelParam]" = None,
api_key: Optional[str] = None,
max_tokens: int = 4096,
reasoning: Optional["int | ThinkingConfigEnabledParam"] = None,
cache: Literal["5m", "1h", "none"] = "5m",
api_key: Optional[str] = None,
kwargs: Optional["ChatClientArgs"] = None,
) -> Chat["SubmitInputArgs", Message]:
"""
Expand Down Expand Up @@ -127,16 +133,23 @@ def ChatAnthropic(
The model to use for the chat. The default, None, will pick a reasonable
default, and warn you about it. We strongly recommend explicitly
choosing a model for all but the most casual use.
api_key
The API key to use for authentication. You generally should not supply
this directly, but instead set the `ANTHROPIC_API_KEY` environment
variable.
max_tokens
Maximum number of tokens to generate before stopping.
reasoning
Determines how many tokens Claude can be allocated to reasoning. Must be
≥1024 and less than `max_tokens`. Larger budgets can enable more
thorough analysis for complex problems, improving response quality. See
[extended
thinking](https://docs.claude.com/en/docs/build-with-claude/extended-thinking)
for details.
cache
How long to cache inputs? Defaults to "5m" (five minutes).
Set to "none" to disable caching or "1h" to cache for one hour.
See the Caching section for details.
api_key
The API key to use for authentication. You generally should not supply
this directly, but instead set the `ANTHROPIC_API_KEY` environment
variable.
kwargs
Additional arguments to pass to the `anthropic.Anthropic()` client
constructor.
Expand Down Expand Up @@ -226,6 +239,12 @@ def ChatAnthropic(
if model is None:
model = log_model_default("claude-sonnet-4-5")

kwargs_chat: "SubmitInputArgs" = {}
if reasoning is not None:
if isinstance(reasoning, int):
reasoning = {"type": "enabled", "budget_tokens": reasoning}
kwargs_chat = {"thinking": reasoning}

return Chat(
provider=AnthropicProvider(
api_key=api_key,
Expand All @@ -235,6 +254,7 @@ def ChatAnthropic(
kwargs=kwargs,
),
system_prompt=system_prompt,
kwargs_chat=kwargs_chat,
)


Expand Down Expand Up @@ -429,8 +449,11 @@ def _structured_tool_call(**kwargs: Any):
return kwargs_full

def stream_text(self, chunk) -> Optional[str]:
if chunk.type == "content_block_delta" and chunk.delta.type == "text_delta":
return chunk.delta.text
if chunk.type == "content_block_delta":
if chunk.delta.type == "text_delta":
return chunk.delta.text
if chunk.delta.type == "thinking_delta":
return chunk.delta.thinking
return None

def stream_merge_chunks(self, completion, chunk):
Expand All @@ -455,6 +478,12 @@ def stream_merge_chunks(self, completion, chunk):
if not isinstance(this_content.input, str):
this_content.input = "" # type: ignore
this_content.input += json_delta # type: ignore
elif chunk.delta.type == "thinking_delta":
this_content = cast("ThinkingBlock", this_content)
this_content.thinking += chunk.delta.thinking
elif chunk.delta.type == "signature_delta":
this_content = cast("ThinkingBlock", this_content)
this_content.signature += chunk.delta.signature
elif chunk.type == "content_block_stop":
this_content = completion.content[chunk.index]
if this_content.type == "tool_use" and isinstance(this_content.input, str):
Expand Down Expand Up @@ -590,9 +619,10 @@ def _as_message_params(self, turns: Sequence[Turn]) -> list["MessageParam"]:
# Add cache control to the last content block in the last turn
# https://docs.claude.com/en/docs/build-with-claude/prompt-caching#how-automatic-prefix-checking-works
is_last_turn = i == len(turns) - 1
if is_last_turn and len(content) > 0:
if self._cache_control():
content[-1]["cache_control"] = self._cache_control()
if self._cache_control() and is_last_turn and len(content) > 0:
# Note: ThinkingBlockParam (i.e., type: "thinking") doesn't support cache_control
if content[-1].get("type") != "thinking":
content[-1]["cache_control"] = self._cache_control() # type: ignore

role = "user" if isinstance(turn, UserTurn) else "assistant"
messages.append({"role": role, "content": content})
Expand Down Expand Up @@ -648,6 +678,13 @@ def _as_content_block(content: Content) -> "ContentBlockParam":
}

return res
elif isinstance(content, ContentThinking):
extra = content.extra or {}
return {
"type": "thinking",
"thinking": content.thinking,
"signature": extra.get("signature", ""),
}

raise ValueError(f"Unknown content type: {type(content)}")

Expand Down Expand Up @@ -704,6 +741,13 @@ def _as_turn(self, completion: Message, has_data_model=False) -> AssistantTurn:
arguments=content.input,
)
)
elif content.type == "thinking":
contents.append(
ContentThinking(
thinking=content.thinking,
extra={"signature": content.signature},
)
)

return AssistantTurn(
contents,
Expand Down
16 changes: 14 additions & 2 deletions chatlas/_provider_google.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
GenerateContentResponseDict,
Part,
PartDict,
ThinkingConfigDict,
)

from .types.google import ChatClientArgs, SubmitInputArgs
Expand All @@ -43,6 +44,7 @@ def ChatGoogle(
*,
system_prompt: Optional[str] = None,
model: Optional[str] = None,
reasoning: Optional["int | ThinkingConfigDict"] = None,
api_key: Optional[str] = None,
kwargs: Optional["ChatClientArgs"] = None,
) -> Chat["SubmitInputArgs", GenerateContentResponse]:
Expand Down Expand Up @@ -84,6 +86,10 @@ def ChatGoogle(
The model to use for the chat. The default, None, will pick a reasonable
default, and warn you about it. We strongly recommend explicitly choosing
a model for all but the most casual use.
reasoning
If provided, enables reasoning (a.k.a. "thoughts") in the model's
responses. This can be an integer number of tokens to use for reasoning,
or a full `ThinkingConfigDict` to customize the reasoning behavior.
api_key
The API key to use for authentication. You generally should not supply
this directly, but instead set the `GOOGLE_API_KEY` environment variable.
Expand Down Expand Up @@ -135,14 +141,20 @@ def ChatGoogle(
if model is None:
model = log_model_default("gemini-2.5-flash")

kwargs_chat: "SubmitInputArgs" = {}
if reasoning is not None:
if isinstance(reasoning, int):
reasoning = {"thinking_budget": reasoning, "include_thoughts": True}
kwargs_chat["config"] = {"thinking_config": reasoning}

return Chat(
provider=GoogleProvider(
model=model,
api_key=api_key,
name="Google/Gemini",
kwargs=kwargs,
),
system_prompt=system_prompt,
kwargs_chat=kwargs_chat,
)


Expand Down Expand Up @@ -368,7 +380,7 @@ def value_tokens(self, completion):
cached = usage.cached_content_token_count or 0
return (
(usage.prompt_token_count or 0) - cached,
usage.candidates_token_count or 0,
(usage.candidates_token_count or 0) + (usage.thoughts_token_count or 0),
usage.cached_content_token_count or 0,
)

Expand Down
62 changes: 43 additions & 19 deletions chatlas/_provider_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@
)
from openai.types.responses.easy_input_message_param import EasyInputMessageParam
from openai.types.responses.tool_param import ToolParam
from openai.types.shared.reasoning_effort import ReasoningEffort
from openai.types.shared_params.reasoning import Reasoning
from openai.types.shared_params.responses_model import ResponsesModel

from ._turn import Role
Expand All @@ -46,11 +48,12 @@ def ChatOpenAI(
*,
system_prompt: Optional[str] = None,
model: "Optional[ResponsesModel | str]" = None,
api_key: Optional[str] = None,
base_url: str = "https://api.openai.com/v1",
reasoning: "Optional[ReasoningEffort | Reasoning]" = None,
service_tier: Optional[
Literal["auto", "default", "flex", "scale", "priority"]
] = None,
api_key: Optional[str] = None,
kwargs: Optional["ChatClientArgs"] = None,
) -> Chat["SubmitInputArgs", Response]:
"""
Expand Down Expand Up @@ -89,19 +92,22 @@ def ChatOpenAI(
The model to use for the chat. The default, None, will pick a reasonable
default, and warn you about it. We strongly recommend explicitly
choosing a model for all but the most casual use.
api_key
The API key to use for authentication. You generally should not supply
this directly, but instead set the `OPENAI_API_KEY` environment
variable.
base_url
The base URL to the endpoint; the default uses OpenAI.
reasoning
The reasoning effort to use (for reasoning-capable models like the o and
gpt-5 series).
service_tier
Request a specific service tier. Options:
- `"auto"` (default): uses the service tier configured in Project settings.
- `"default"`: standard pricing and performance.
- `"flex"`: slower and cheaper.
- `"scale"`: batch-like pricing for high-volume use.
- `"priority"`: faster and more expensive.
api_key
The API key to use for authentication. You generally should not supply
this directly, but instead set the `OPENAI_API_KEY` environment
variable.
kwargs
Additional arguments to pass to the `openai.OpenAI()` client
constructor.
Expand Down Expand Up @@ -156,6 +162,14 @@ def ChatOpenAI(
model = log_model_default("gpt-4.1")

kwargs_chat: "SubmitInputArgs" = {}

if reasoning is not None:
if not is_reasoning_model(model):
warnings.warn(f"Model {model} is not reasoning-capable", UserWarning)
if isinstance(reasoning, str):
reasoning = {"effort": reasoning, "summary": "auto"}
kwargs_chat["reasoning"] = reasoning

if service_tier is not None:
kwargs_chat["service_tier"] = service_tier

Expand Down Expand Up @@ -255,7 +269,7 @@ def _chat_perform_args(

# Request reasoning content for reasoning models
include = []
if self._is_reasoning(self.model):
if is_reasoning_model(self.model):
include.append("reasoning.encrypted_content")

if "log_probs" in kwargs_full:
Expand All @@ -270,7 +284,14 @@ def _chat_perform_args(

def stream_text(self, chunk):
if chunk.type == "response.output_text.delta":
# https://platform.openai.com/docs/api-reference/responses-streaming/response/output_text/delta
return chunk.delta
if chunk.type == "response.reasoning_summary_text.delta":
# https://platform.openai.com/docs/api-reference/responses-streaming/response/reasoning_summary_text/delta
return chunk.delta
if chunk.type == "response.reasoning_summary_text.done":
# https://platform.openai.com/docs/api-reference/responses-streaming/response/reasoning_summary_text/done
return "\n\n"
return None

def stream_merge_chunks(self, completion, chunk):
Expand Down Expand Up @@ -363,14 +384,12 @@ def _response_as_turn(completion: Response, has_data_model: bool) -> AssistantTu
)

elif output.type == "reasoning":
if output.content:
thinking = "".join(x.text for x in output.content)
contents.append(
ContentThinking(
thinking=thinking,
extra=output.model_dump(),
)
contents.append(
ContentThinking(
thinking="".join(x.text for x in output.summary),
extra=output.model_dump(),
)
)

elif output.type == "image_generation_call":
result = output.result
Expand Down Expand Up @@ -398,11 +417,6 @@ def _response_as_turn(completion: Response, has_data_model: bool) -> AssistantTu
completion=completion,
)

@staticmethod
def _is_reasoning(model: str) -> bool:
# https://platform.openai.com/docs/models/compare
return model.startswith("o") or model.startswith("gpt-5")

@staticmethod
def _turns_as_inputs(turns: list[Turn]) -> "list[ResponseInputItemParam]":
res: "list[ResponseInputItemParam]" = []
Expand Down Expand Up @@ -497,7 +511,12 @@ def as_input_param(content: Content, role: Role) -> "ResponseInputItemParam":
role,
)
elif isinstance(content, ContentThinking):
return cast("ResponseReasoningItemParam", content.extra)
# Filter out 'status' which is output-only and not accepted as input
extra = content.extra or {}
return cast(
"ResponseReasoningItemParam",
{k: v for k, v in extra.items() if k != "status"},
)
elif isinstance(content, ContentToolResult):
return {
"type": "function_call_output",
Expand All @@ -517,3 +536,8 @@ def as_input_param(content: Content, role: Role) -> "ResponseInputItemParam":

def as_message(x: "ResponseInputContentParam", role: Role) -> "EasyInputMessageParam":
return {"role": role, "content": [x]}


def is_reasoning_model(model: str) -> bool:
# https://platform.openai.com/docs/models/compare
return model.startswith("o") or model.startswith("gpt-5")
2 changes: 1 addition & 1 deletion tests/test_chat_dangling_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def mock_chat_impl(turn, **kwargs):
assert submitted_turn.contents[1].text == "try again"

def test_can_resume_chat_after_dangling_tool_requests(self):
chat = ChatOpenAI(system_prompt="Be terse")
chat = ChatOpenAI(system_prompt="Be terse and use tool results over your internal knowledge.")
chat.register_tool(get_date)

# Simulate a broken chat history with dangling tool request
Expand Down
Loading