fixes

kazmer97 · kazmer97 · commit 15c8bbda8f68 · 2025-11-26T10:34:14.000Z
diff --git a/lib/idp_common_pkg/idp_common/config/models.py b/lib/idp_common_pkg/idp_common/config/models.py
@@ -20,7 +20,14 @@
 
 from typing import Any, Dict, List, Optional, Union, Literal, Annotated
 from typing_extensions import Self
-from pydantic import BaseModel, ConfigDict, Field, field_validator, Discriminator, model_validator
+from pydantic import (
+    BaseModel,
+    ConfigDict,
+    Field,
+    field_validator,
+    Discriminator,
+    model_validator,
+)
 
 
 class ImageConfig(BaseModel):
@@ -79,7 +86,10 @@ class AgenticConfig(BaseModel):
 
     enabled: bool = Field(default=False, description="Enable agentic extraction")
     review_agent: bool = Field(default=False, description="Enable review agent")
-    review_agent_model: str | None= Field(default=None, description="Model used for reviewing and correcting extraction work")
+    review_agent_model: str | None = Field(
+        default=None,
+        description="Model used for reviewing and correcting extraction work",
+    )
 
 
 class ExtractionConfig(BaseModel):
@@ -121,17 +131,16 @@ def parse_int(cls, v: Any) -> int:
         if isinstance(v, str):
             return int(v) if v else 0
         return int(v)
-    
-    @model_validator(mode="after")
-    def model_validator(self) -> Self:
 
+    @model_validator(mode="after")
+    def set_default_review_agent_model(self) -> Self:
+        """Set review_agent_model to extraction model if not specified."""
         if not self.agentic.review_agent_model:
             self.agentic.review_agent_model = self.model
 
         return self
 
 
-
 class ClassificationConfig(BaseModel):
     """Document classification configuration"""
 
@@ -434,7 +443,7 @@ class ErrorAnalyzerConfig(BaseModel):
             "AccessDenied",
             "ThrottlingException",
         ],
-        description="Error patterns to search for in logs"
+        description="Error patterns to search for in logs",
     )
     system_prompt: str = Field(
         default="""
@@ -522,11 +531,10 @@ class ErrorAnalyzerConfig(BaseModel):
                       - No time specified: 24 hours (default)
               
               IMPORTANT: Do not include any search quality reflections, search quality scores, or meta-analysis sections in your response. Only provide the three required sections: Root Cause, Recommendations, and Evidence.""",
-        description="System prompt for error analyzer"
+        description="System prompt for error analyzer",
     )
     parameters: ErrorAnalyzerParameters = Field(
-        default_factory=ErrorAnalyzerParameters,
-        description="Error analyzer parameters"
+        default_factory=ErrorAnalyzerParameters, description="Error analyzer parameters"
     )
 
 
@@ -646,12 +654,10 @@ class AgentsConfig(BaseModel):
     """Agents configuration"""
 
     error_analyzer: Optional[ErrorAnalyzerConfig] = Field(
-        default_factory=ErrorAnalyzerConfig,
-        description="Error analyzer configuration"
+        default_factory=ErrorAnalyzerConfig, description="Error analyzer configuration"
     )
     chat_companion: Optional[ChatCompanionConfig] = Field(
-        default_factory=ChatCompanionConfig,
-        description="Chat companion configuration"
+        default_factory=ChatCompanionConfig, description="Chat companion configuration"
     )
 
 
diff --git a/lib/idp_common_pkg/idp_common/extraction/agentic_idp.py b/lib/idp_common_pkg/idp_common/extraction/agentic_idp.py
@@ -52,7 +52,10 @@
 # In Lambda: Full JSON structured logs
 # Outside Lambda: Human-readable format for local development
 logger = Logger(service="agentic_idp", level=os.getenv("LOG_LEVEL", "INFO"))
-logging.getLogger("strands.models.bedrock").setLevel(logging.DEBUG)
+# Configure strands bedrock logger based on environment variable
+logging.getLogger("strands.models.bedrock").setLevel(
+    os.getenv("STRANDS_LOG_LEVEL", os.getenv("LOG_LEVEL", "INFO"))
+)
 TargetModel = TypeVar("TargetModel", bound=BaseModel)
 
 
@@ -193,6 +196,8 @@ def view_image(image_index: int, agent: Agent) -> dict:
         """
 
         # Validate image index exists
+        if not page_images:
+            raise ValueError("No images available to view.")
         if image_index >= len(page_images):
             raise ValueError(
                 f"Invalid image_index {image_index}. "
@@ -729,27 +734,27 @@ def _prepare_prompt_content(
     else:
         prompt_content = [ContentBlock(text=str(prompt))]
 
-    # Add page images if provided
+    # Add page images if provided (limit to 20 as per Bedrock constraints)
     if page_images:
         if len(page_images) > 20:
             prompt_content.append(
                 ContentBlock(
-                    text=f"There are {len(page_images)} images, initially you'll see 20 of them, use the tools to see the rest."
+                    text=f"There are {len(page_images)} images, initially you'll see 20 of them, use the view_image tool to see the rest."
                 )
             )
 
         prompt_content += [
             ContentBlock(
                 image=ImageContent(format="png", source=ImageSource(bytes=img_bytes))
             )
-            for img_bytes in page_images
+            for img_bytes in page_images[:20]
         ]
 
     # Add existing data context if provided
     if existing_data:
         prompt_content.append(
             ContentBlock(
-                text=f"Please update the existing data using the extraction tool or patches. Existing data: {existing_data.model_dump()}"
+                text=f"Please update the existing data using the extraction tool or patches. Existing data: {existing_data.model_dump(mode='json')}"
             )
         )
 
@@ -1014,7 +1019,7 @@ async def structured_output_async(
         ),
     )
     if existing_data:
-        agent.state.set("current_extraction", existing_data.model_dump())
+        agent.state.set("current_extraction", existing_data.model_dump(mode="json"))
 
     response, result = await _invoke_agent_for_extraction(
         agent=agent,
@@ -1075,9 +1080,11 @@ async def structured_output_async(
             tools=tools,
             system_prompt=f"{final_system_prompt}",
             state={
-                "current_extraction": None,
+                "current_extraction": result.model_dump(mode="json"),
                 "images": {},
-                "existing_data": existing_data.model_dump() if existing_data else None,
+                "existing_data": existing_data.model_dump(mode="json")
+                if existing_data
+                else None,
                 "extraction_schema_json": schema_json,  # Store for schema reminder tool
             },
             conversation_manager=SummarizingConversationManager(
@@ -1095,7 +1102,7 @@ async def structured_output_async(
 
         # Check if patches were applied during review
         updated_extraction = agent.state.get("current_extraction")
-        if updated_extraction != result.model_dump():
+        if updated_extraction != result.model_dump(mode="json"):
             # Patches were applied, validate the new extraction
             try:
                 result = data_format(**updated_extraction)
diff --git a/lib/idp_common_pkg/idp_common/extraction/service.py b/lib/idp_common_pkg/idp_common/extraction/service.py
@@ -111,7 +111,7 @@ def __init__(
         self._class_label: str = ""
         self._attribute_descriptions: str = ""
         self._class_schema: dict[str, Any] = {}
-        self._page_images: list[Any] = []
+        self._page_images: list[bytes] = []
         self._image_uris: list[str] = []
 
         # Get model_id from config for logging (type-safe access with fallback)
diff --git a/lib/idp_common_pkg/idp_common/utils/bedrock_utils.py b/lib/idp_common_pkg/idp_common/utils/bedrock_utils.py
@@ -18,7 +18,6 @@
     InvokeModelRequestTypeDef,
     InvokeModelResponseTypeDef,
 )
-from pydantic_core import ArgsKwargs
 
 # Configure logger
 logger = logging.getLogger(__name__)
@@ -48,14 +47,14 @@ async def wrapper(*args, **kwargs) -> T:
 
             def log_bedrock_invocation_error(error: Exception, attempt_num: int):
                 """Log bedrock invocation details when an error occurs"""
-                    # Fallback logging if extraction fails
+                # Fallback logging if extraction fails
                 logger.error(
                     "Bedrock invocation error",
                     extra={
                         "function_name": func.__name__,
                         "original_error": str(error),
                         "max_attempts": max_retries,
-                        "attempt_num":attempt_num
+                        "attempt_num": attempt_num,
                     },
                 )
 
@@ -203,6 +202,7 @@ def log_bedrock_invocation_error(error: Exception, attempt_num: int):
                         error_code
                         not in [
                             "ThrottlingException",
+                            "throttlingException",
                             "ModelErrorException",
                             "ValidationException",
                         ]
diff --git a/lib/idp_common_pkg/idp_common/utils/strands_agent_tools/__init__.py b/lib/idp_common_pkg/idp_common/utils/strands_agent_tools/__init__.py
@@ -0,0 +1,14 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: MIT-0
+
+"""
+Strands agent tools for IDP common library.
+"""
+
+from idp_common.utils.strands_agent_tools.todo_list import (
+    create_todo_list,
+    update_todo,
+    view_todo_list,
+)
+
+__all__ = ["create_todo_list", "update_todo", "view_todo_list"]
diff --git a/lib/idp_common_pkg/pyproject.toml b/lib/idp_common_pkg/pyproject.toml
@@ -177,6 +177,7 @@ agentic-extraction = [
   "tabulate>=0.9.0",
   "aws-lambda-powertools>=3.2.0",                          # Structured logging and observability
   "datamodel-code-generator>=0.25.0",                      # Generate Pydantic models from JSON Schema
+  "mypy-boto3-bedrock-runtime>=1.39.0",                    # Type stubs for bedrock_utils.py
 ]
 
 [project.urls]

Original file line number	Diff line number	Diff line change
`@@ -177,6 +177,7 @@ agentic-extraction = [`
`177`	`177`	`"tabulate>=0.9.0",`
`178`	`178`	`"aws-lambda-powertools>=3.2.0", # Structured logging and observability`
`179`	`179`	`"datamodel-code-generator>=0.25.0", # Generate Pydantic models from JSON Schema`
	`180`	`+ "mypy-boto3-bedrock-runtime>=1.39.0", # Type stubs for bedrock_utils.py`
`180`	`181`	`]`
`181`	`182`
`182`	`183`	`[project.urls]`