Merge branch 'main' into api-check

JackYPCOnline · web-flow · commit 3c6a6a982a40 · 2025-12-23T07:23:18.000-08:00
diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml
@@ -52,7 +52,7 @@ jobs:
          aws-region: us-east-1
          mask-aws-account-id: true
       - name: Checkout head commit
-        uses: actions/checkout@v5
+        uses: actions/checkout@v6
         with:
           ref: ${{ github.event.pull_request.head.sha }} # Pull the commit from the forked repo
           persist-credentials: false  # Don't persist credentials for subsequent actions
diff --git a/.github/workflows/pr-and-push.yml b/.github/workflows/pr-and-push.yml
@@ -17,6 +17,8 @@ jobs:
       contents: read
     with:
       ref: ${{ github.event.pull_request.head.sha }}
+    secrets:
+      CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
 
   check-api:
     runs-on: ubuntu-latest
@@ -35,3 +37,4 @@ jobs:
           echo "Breaking API changes detected"
           exit 1
         fi
+
diff --git a/.github/workflows/pypi-publish-on-release.yml b/.github/workflows/pypi-publish-on-release.yml
@@ -12,6 +12,8 @@ jobs:
       contents: read
     with:
       ref: ${{ github.event.release.target_commitish }}
+    secrets:
+      CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
 
   build:
     name: Build distribution 📦
@@ -22,7 +24,7 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-    - uses: actions/checkout@v5
+    - uses: actions/checkout@v6
       with:
         persist-credentials: false
 
@@ -52,7 +54,7 @@ jobs:
         hatch build
 
     - name: Store the distribution packages
-      uses: actions/upload-artifact@v4
+      uses: actions/upload-artifact@v6
       with:
         name: python-package-distributions
         path: dist/
@@ -74,7 +76,7 @@ jobs:
     
     steps:
     - name: Download all the dists
-      uses: actions/download-artifact@v5
+      uses: actions/download-artifact@v7
       with:
         name: python-package-distributions
         path: dist/
diff --git a/.github/workflows/test-lint.yml b/.github/workflows/test-lint.yml
@@ -6,6 +6,9 @@ on:
       ref:
         required: true
         type: string
+    secrets:
+      CODECOV_TOKEN:
+        required: false
 
 jobs:
   unit-test:
@@ -51,7 +54,7 @@ jobs:
       LOG_LEVEL: DEBUG
     steps:
       - name: Checkout code
-        uses: actions/checkout@v5
+        uses: actions/checkout@v6
         with:
           ref: ${{ inputs.ref }}  # Explicitly define which commit to check out
           persist-credentials: false  # Don't persist credentials for subsequent actions
@@ -92,7 +95,7 @@ jobs:
       contents: read
     steps:
       - name: Checkout code
-        uses: actions/checkout@v5
+        uses: actions/checkout@v6
         with:
           ref: ${{ inputs.ref }}
           persist-credentials: false
diff --git a/pyproject.toml b/pyproject.toml
@@ -87,10 +87,10 @@ dev = [
     "hatch>=1.0.0,<2.0.0",
     "moto>=5.1.0,<6.0.0",
     "mypy>=1.15.0,<2.0.0",
-    "pre-commit>=3.2.0,<4.4.0",
+    "pre-commit>=3.2.0,<4.6.0",
     "pytest>=8.0.0,<9.0.0",
     "pytest-cov>=7.0.0,<8.0.0",
-    "pytest-asyncio>=1.0.0,<1.3.0",
+    "pytest-asyncio>=1.0.0,<1.4.0",
     "pytest-xdist>=3.0.0,<4.0.0",
     "ruff>=0.13.0,<0.14.0",
 ]
@@ -144,7 +144,7 @@ extra-args = ["-n", "auto", "-vv"]
 dependencies = [
     "pytest>=8.0.0,<9.0.0",
     "pytest-cov>=7.0.0,<8.0.0",
-    "pytest-asyncio>=1.0.0,<1.3.0",
+    "pytest-asyncio>=1.0.0,<1.4.0",
     "pytest-xdist>=3.0.0,<4.0.0",
     "moto>=5.1.0,<6.0.0",
 ]
@@ -166,7 +166,7 @@ features = ["all"]
 dependencies = [
     "commitizen>=4.4.0,<5.0.0",
     "hatch>=1.0.0,<2.0.0",
-    "pre-commit>=3.2.0,<4.4.0",
+    "pre-commit>=3.2.0,<4.6.0",
 ]
 
 
diff --git a/src/strands/models/gemini.py b/src/strands/models/gemini.py
@@ -54,27 +54,44 @@ class GeminiConfig(TypedDict, total=False):
     def __init__(
         self,
         *,
+        client: Optional[genai.Client] = None,
         client_args: Optional[dict[str, Any]] = None,
         **model_config: Unpack[GeminiConfig],
     ) -> None:
         """Initialize provider instance.
 
         Args:
+            client: Pre-configured Gemini client to reuse across requests.
+                When provided, this client will be reused for all requests and will NOT be closed
+                by the model. The caller is responsible for managing the client lifecycle.
+                This is useful for:
+                - Injecting custom client wrappers
+                - Reusing connection pools within a single event loop/worker
+                - Centralizing observability, retries, and networking policy
+                Note: The client should not be shared across different asyncio event loops.
             client_args: Arguments for the underlying Gemini client (e.g., api_key).
                 For a complete list of supported arguments, see https://googleapis.github.io/python-genai/.
             **model_config: Configuration options for the Gemini model.
+
+        Raises:
+            ValueError: If both `client` and `client_args` are provided.
         """
         validate_config_keys(model_config, GeminiModel.GeminiConfig)
         self.config = GeminiModel.GeminiConfig(**model_config)
 
+        # Validate that only one client configuration method is provided
+        if client is not None and client_args is not None and len(client_args) > 0:
+            raise ValueError("Only one of 'client' or 'client_args' should be provided, not both.")
+
+        self._custom_client = client
+        self.client_args = client_args or {}
+
         # Validate gemini_tools if provided
         if "gemini_tools" in self.config:
             self._validate_gemini_tools(self.config["gemini_tools"])
 
         logger.debug("config=<%s> | initializing", self.config)
 
-        self.client_args = client_args or {}
-
     @override
     def update_config(self, **model_config: Unpack[GeminiConfig]) -> None:  # type: ignore[override]
         """Update the Gemini model configuration with the provided arguments.
@@ -97,6 +114,24 @@ def get_config(self) -> GeminiConfig:
         """
         return self.config
 
+    def _get_client(self) -> genai.Client:
+        """Get a Gemini client for making requests.
+
+        This method handles client lifecycle management:
+        - If an injected client was provided during initialization, it returns that client
+          without managing its lifecycle (caller is responsible for cleanup).
+        - Otherwise, creates a new genai.Client from client_args.
+
+        Returns:
+            genai.Client: A Gemini client instance.
+        """
+        if self._custom_client is not None:
+            # Use the injected client (caller manages lifecycle)
+            return self._custom_client
+        else:
+            # Create a new client from client_args
+            return genai.Client(**self.client_args)
+
     def _format_request_content_part(self, content: ContentBlock) -> genai.types.Part:
         """Format content block into a Gemini part instance.
 
@@ -382,7 +417,8 @@ async def stream(
         """
         request = self._format_request(messages, tool_specs, system_prompt, self.config.get("params"))
 
-        client = genai.Client(**self.client_args).aio
+        client = self._get_client().aio
+
         try:
             response = await client.models.generate_content_stream(**request)
 
@@ -465,7 +501,7 @@ async def structured_output(
             "response_schema": output_model.model_json_schema(),
         }
         request = self._format_request(prompt, None, system_prompt, params)
-        client = genai.Client(**self.client_args).aio
+        client = self._get_client().aio
         response = await client.models.generate_content(**request)
         yield {"output": output_model.model_validate(response.parsed)}
 
diff --git a/src/strands/models/openai.py b/src/strands/models/openai.py
@@ -7,7 +7,8 @@
 import json
 import logging
 import mimetypes
-from typing import Any, AsyncGenerator, Optional, Protocol, Type, TypedDict, TypeVar, Union, cast
+from contextlib import asynccontextmanager
+from typing import Any, AsyncGenerator, AsyncIterator, Optional, Protocol, Type, TypedDict, TypeVar, Union, cast
 
 import openai
 from openai.types.chat.parsed_chat_completion import ParsedChatCompletion
@@ -55,16 +56,39 @@ class OpenAIConfig(TypedDict, total=False):
         model_id: str
         params: Optional[dict[str, Any]]
 
-    def __init__(self, client_args: Optional[dict[str, Any]] = None, **model_config: Unpack[OpenAIConfig]) -> None:
+    def __init__(
+        self,
+        client: Optional[Client] = None,
+        client_args: Optional[dict[str, Any]] = None,
+        **model_config: Unpack[OpenAIConfig],
+    ) -> None:
         """Initialize provider instance.
 
         Args:
-            client_args: Arguments for the OpenAI client.
+            client: Pre-configured OpenAI-compatible client to reuse across requests.
+                When provided, this client will be reused for all requests and will NOT be closed
+                by the model. The caller is responsible for managing the client lifecycle.
+                This is useful for:
+                - Injecting custom client wrappers (e.g., GuardrailsAsyncOpenAI)
+                - Reusing connection pools within a single event loop/worker
+                - Centralizing observability, retries, and networking policy
+                - Pointing to custom model gateways
+                Note: The client should not be shared across different asyncio event loops.
+            client_args: Arguments for the OpenAI client (legacy approach).
                 For a complete list of supported arguments, see https://pypi.org/project/openai/.
             **model_config: Configuration options for the OpenAI model.
+
+        Raises:
+            ValueError: If both `client` and `client_args` are provided.
         """
         validate_config_keys(model_config, self.OpenAIConfig)
         self.config = dict(model_config)
+
+        # Validate that only one client configuration method is provided
+        if client is not None and client_args is not None and len(client_args) > 0:
+            raise ValueError("Only one of 'client' or 'client_args' should be provided, not both.")
+
+        self._custom_client = client
         self.client_args = client_args or {}
 
         logger.debug("config=<%s> | initializing", self.config)
@@ -422,6 +446,34 @@ def format_chunk(self, event: dict[str, Any], **kwargs: Any) -> StreamEvent:
             case _:
                 raise RuntimeError(f"chunk_type=<{event['chunk_type']} | unknown type")
 
+    @asynccontextmanager
+    async def _get_client(self) -> AsyncIterator[Any]:
+        """Get an OpenAI client for making requests.
+
+        This context manager handles client lifecycle management:
+        - If an injected client was provided during initialization, it yields that client
+          without closing it (caller manages lifecycle).
+        - Otherwise, creates a new AsyncOpenAI client from client_args and automatically
+          closes it when the context exits.
+
+        Note: We create a new client per request to avoid connection sharing in the underlying
+        httpx client, as the asyncio event loop does not allow connections to be shared.
+        For more details, see https://github.com/encode/httpx/discussions/2959.
+
+        Yields:
+            Client: An OpenAI-compatible client instance.
+        """
+        if self._custom_client is not None:
+            # Use the injected client (caller manages lifecycle)
+            yield self._custom_client
+        else:
+            # Create a new client from client_args
+            # We initialize an OpenAI context on every request so as to avoid connection sharing in the underlying
+            # httpx client. The asyncio event loop does not allow connections to be shared. For more details, please
+            # refer to https://github.com/encode/httpx/discussions/2959.
+            async with openai.AsyncOpenAI(**self.client_args) as client:
+                yield client
+
     @override
     async def stream(
         self,
@@ -457,7 +509,7 @@ async def stream(
         # We initialize an OpenAI context on every request so as to avoid connection sharing in the underlying httpx
         # client. The asyncio event loop does not allow connections to be shared. For more details, please refer to
         # https://github.com/encode/httpx/discussions/2959.
-        async with openai.AsyncOpenAI(**self.client_args) as client:
+        async with self._get_client() as client:
             try:
                 response = await client.chat.completions.create(**request)
             except openai.BadRequestError as e:
@@ -576,7 +628,7 @@ async def structured_output(
         # We initialize an OpenAI context on every request so as to avoid connection sharing in the underlying httpx
         # client. The asyncio event loop does not allow connections to be shared. For more details, please refer to
         # https://github.com/encode/httpx/discussions/2959.
-        async with openai.AsyncOpenAI(**self.client_args) as client:
+        async with self._get_client() as client:
             try:
                 response: ParsedChatCompletion = await client.beta.chat.completions.parse(
                     model=self.get_config()["model_id"],
diff --git a/tests/strands/models/test_gemini.py b/tests/strands/models/test_gemini.py
@@ -720,3 +720,77 @@ async def test_stream_handles_non_json_error(gemini_client, model, messages, cap
 
     assert "Gemini API returned non-JSON error" in caplog.text
     assert f"error_message=<{error_message}>" in caplog.text
+
+
+@pytest.mark.asyncio
+async def test_stream_with_injected_client(model_id, agenerator, alist):
+    """Test that stream works with an injected client and doesn't close it."""
+    # Create a mock injected client
+    mock_injected_client = unittest.mock.Mock()
+    mock_injected_client.aio = unittest.mock.AsyncMock()
+
+    mock_injected_client.aio.models.generate_content_stream.return_value = agenerator(
+        [
+            genai.types.GenerateContentResponse(
+                candidates=[
+                    genai.types.Candidate(
+                        content=genai.types.Content(
+                            parts=[genai.types.Part(text="Hello")],
+                        ),
+                        finish_reason="STOP",
+                    ),
+                ],
+                usage_metadata=genai.types.GenerateContentResponseUsageMetadata(
+                    prompt_token_count=1,
+                    total_token_count=3,
+                ),
+            ),
+        ]
+    )
+
+    # Create model with injected client
+    model = GeminiModel(client=mock_injected_client, model_id=model_id)
+
+    messages = [{"role": "user", "content": [{"text": "test"}]}]
+    response = model.stream(messages)
+    tru_events = await alist(response)
+
+    # Verify events were generated
+    assert len(tru_events) > 0
+
+    # Verify the injected client was used
+    mock_injected_client.aio.models.generate_content_stream.assert_called_once()
+
+
+@pytest.mark.asyncio
+async def test_structured_output_with_injected_client(model_id, weather_output, alist):
+    """Test that structured_output works with an injected client and doesn't close it."""
+    # Create a mock injected client
+    mock_injected_client = unittest.mock.Mock()
+    mock_injected_client.aio = unittest.mock.AsyncMock()
+
+    mock_injected_client.aio.models.generate_content.return_value = unittest.mock.Mock(
+        parsed=weather_output.model_dump()
+    )
+
+    # Create model with injected client
+    model = GeminiModel(client=mock_injected_client, model_id=model_id)
+
+    messages = [{"role": "user", "content": [{"text": "Generate weather"}]}]
+    stream = model.structured_output(type(weather_output), messages)
+    events = await alist(stream)
+
+    # Verify output was generated
+    assert len(events) == 1
+    assert events[0] == {"output": weather_output}
+
+    # Verify the injected client was used
+    mock_injected_client.aio.models.generate_content.assert_called_once()
+
+
+def test_init_with_both_client_and_client_args_raises_error():
+    """Test that providing both client and client_args raises ValueError."""
+    mock_client = unittest.mock.Mock()
+
+    with pytest.raises(ValueError, match="Only one of 'client' or 'client_args' should be provided"):
+        GeminiModel(client=mock_client, client_args={"api_key": "test"}, model_id="test-model")
diff --git a/tests/strands/models/test_openai.py b/tests/strands/models/test_openai.py