small fixes

kazmer97 · kazmer97 · commit acd8967fe001 · 2025-12-11T10:13:15.000Z
diff --git a/lib/idp_common_pkg/idp_common/assessment/granular_service.py b/lib/idp_common_pkg/idp_common/assessment/granular_service.py
@@ -11,15 +11,19 @@
 4. Maintaining assessment structure that mirrors extraction results
 """
 
+from __future__ import annotations
+
 import json
 import os
 import time
-from typing import Any
+from typing import TYPE_CHECKING, Any
 
 from aws_lambda_powertools import Logger
-from mypy_boto3_dynamodb.service_resource import DynamoDBServiceResource
 
 from idp_common import image, metrics, s3, utils
+
+if TYPE_CHECKING:
+    from mypy_boto3_dynamodb.service_resource import DynamoDBServiceResource
 from idp_common.assessment.models import AssessmentResult, AssessmentTask
 from idp_common.assessment.strands_executor import execute_assessment_tasks_parallel
 from idp_common.assessment.strands_service import _convert_field_path_to_string
diff --git a/lib/idp_common_pkg/idp_common/assessment/strands_executor.py b/lib/idp_common_pkg/idp_common/assessment/strands_executor.py
@@ -6,6 +6,7 @@
 """
 
 import asyncio
+import concurrent.futures
 import os
 import time
 from typing import Any, cast
@@ -188,53 +189,44 @@ def execute_assessment_tasks_parallel(
 
     start_time = time.time()
 
-    # Run async executor
-    # Use asyncio.run() for clean event loop management
+    # Define the async coroutine to run
+    async def _run() -> tuple[list[AssessmentResult], dict[str, Any]]:
+        return await execute_tasks_async(
+            tasks=tasks,
+            extraction_results=extraction_results,
+            page_images=page_images,
+            sorted_page_ids=sorted_page_ids,
+            model_id=model_id,
+            system_prompt=system_prompt,
+            temperature=temperature,
+            max_tokens=max_tokens,
+            document_schema=document_schema,
+            max_concurrent=max_concurrent,
+            max_retries=max_retries,
+            connect_timeout=connect_timeout,
+            read_timeout=read_timeout,
+        )
+
+    # Check if there's already a running event loop
+    # This is more robust than catching exceptions with string matching
     try:
-        results, metering = asyncio.run(
-            execute_tasks_async(
-                tasks=tasks,
-                extraction_results=extraction_results,
-                page_images=page_images,
-                sorted_page_ids=sorted_page_ids,
-                model_id=model_id,
-                system_prompt=system_prompt,
-                temperature=temperature,
-                max_tokens=max_tokens,
-                document_schema=document_schema,
-                max_concurrent=max_concurrent,
-                max_retries=max_retries,
-                connect_timeout=connect_timeout,
-                read_timeout=read_timeout,
-            )
+        loop = asyncio.get_running_loop()
+    except RuntimeError:
+        loop = None
+
+    if loop is not None and loop.is_running():
+        # We're inside an async context (e.g., Jupyter, nested async call)
+        # Execute in a separate thread to avoid "cannot be called from a running event loop"
+        logger.warning(
+            "Event loop already running, executing in separate thread",
+            extra={"loop": str(loop)},
         )
-    except RuntimeError as e:
-        # Handle case where event loop already exists (shouldn't happen in Lambda)
-        if "There is no current event loop" in str(e) or "asyncio.run()" in str(e):
-            logger.warning(
-                "Event loop already exists, using get_event_loop",
-                extra={"error": str(e)},
-            )
-            loop = asyncio.get_event_loop()
-            results, metering = loop.run_until_complete(
-                execute_tasks_async(
-                    tasks=tasks,
-                    extraction_results=extraction_results,
-                    page_images=page_images,
-                    sorted_page_ids=sorted_page_ids,
-                    model_id=model_id,
-                    system_prompt=system_prompt,
-                    temperature=temperature,
-                    max_tokens=max_tokens,
-                    document_schema=document_schema,
-                    max_concurrent=max_concurrent,
-                    max_retries=max_retries,
-                    connect_timeout=connect_timeout,
-                    read_timeout=read_timeout,
-                )
-            )
-        else:
-            raise
+        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
+            future = executor.submit(asyncio.run, _run())
+            results, metering = future.result()
+    else:
+        # No running loop - safe to use asyncio.run()
+        results, metering = asyncio.run(_run())
 
     duration = time.time() - start_time
 
diff --git a/lib/idp_common_pkg/idp_common/assessment/strands_tools.py b/lib/idp_common_pkg/idp_common/assessment/strands_tools.py
@@ -172,7 +172,7 @@ def create_strands_tools(
     Create all tools needed for Strands-based assessment.
 
     Args:
-        page_images: List of page image bytes (with grid overlay already applied)
+        page_images: List of raw page image bytes (ruler overlay added on-demand by view_image tool)
         sorted_page_ids: List of page IDs in sorted order
 
     Returns:
diff --git a/lib/idp_common_pkg/idp_common/config/models.py b/lib/idp_common_pkg/idp_common/config/models.py
@@ -141,14 +141,6 @@ def set_default_review_agent_model(self) -> Self:
 
         return self
 
-    @model_validator(mode="after")
-    def set_default_review_agent_model(self) -> Self:
-        """Set review_agent_model to extraction model if not specified."""
-        if not self.agentic.review_agent_model:
-            self.agentic.review_agent_model = self.model
-
-        return self
-
 
 class ClassificationConfig(BaseModel):
     """Document classification configuration"""
diff --git a/lib/idp_common_pkg/idp_common/extraction/agentic_idp.py b/lib/idp_common_pkg/idp_common/extraction/agentic_idp.py
@@ -11,6 +11,7 @@
 import json
 import logging
 import os
+import re
 import threading
 from pathlib import Path
 from typing import (
@@ -21,6 +22,7 @@
 
 import jsonpatch
 from aws_lambda_powertools import Logger
+from botocore.config import Config
 from PIL import Image
 from pydantic import BaseModel, Field
 from strands import Agent, tool
@@ -36,6 +38,8 @@
 
 from idp_common.bedrock import (
     build_model_config,
+    supports_prompt_caching,
+    supports_tool_caching,
 )
 from idp_common.config.models import IDPConfig
 from idp_common.utils.bedrock_utils import (
diff --git a/lib/idp_common_pkg/idp_common/utils/bedrock_utils.py b/lib/idp_common_pkg/idp_common/utils/bedrock_utils.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import asyncio
 import json
 import logging
@@ -7,19 +9,20 @@
 import time
 from collections.abc import Awaitable, Callable
 from functools import wraps
-from typing import Unpack
+from typing import TYPE_CHECKING, Unpack
 
 import botocore.exceptions
-from mypy_boto3_bedrock_runtime import BedrockRuntimeClient
-from mypy_boto3_bedrock_runtime.type_defs import (
-    ConverseRequestTypeDef,
-    ConverseResponseTypeDef,
-    ConverseStreamRequestTypeDef,
-    ConverseStreamResponseTypeDef,
-    InvokeModelRequestTypeDef,
-    InvokeModelResponseTypeDef,
-)
-from strands.models.bedrock import ModelThrottledException
+
+if TYPE_CHECKING:
+    from mypy_boto3_bedrock_runtime import BedrockRuntimeClient
+    from mypy_boto3_bedrock_runtime.type_defs import (
+        ConverseRequestTypeDef,
+        ConverseResponseTypeDef,
+        ConverseStreamRequestTypeDef,
+        ConverseStreamResponseTypeDef,
+        InvokeModelRequestTypeDef,
+        InvokeModelResponseTypeDef,
+    )
 
 # Optional import for strands-agents (may not be installed in all environments)
 try: