Skip to content

Commit acd8967

Browse files
committed
small fixes
1 parent e21efb8 commit acd8967

File tree

6 files changed

+62
-67
lines changed

6 files changed

+62
-67
lines changed

lib/idp_common_pkg/idp_common/assessment/granular_service.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,19 @@
1111
4. Maintaining assessment structure that mirrors extraction results
1212
"""
1313

14+
from __future__ import annotations
15+
1416
import json
1517
import os
1618
import time
17-
from typing import Any
19+
from typing import TYPE_CHECKING, Any
1820

1921
from aws_lambda_powertools import Logger
20-
from mypy_boto3_dynamodb.service_resource import DynamoDBServiceResource
2122

2223
from idp_common import image, metrics, s3, utils
24+
25+
if TYPE_CHECKING:
26+
from mypy_boto3_dynamodb.service_resource import DynamoDBServiceResource
2327
from idp_common.assessment.models import AssessmentResult, AssessmentTask
2428
from idp_common.assessment.strands_executor import execute_assessment_tasks_parallel
2529
from idp_common.assessment.strands_service import _convert_field_path_to_string

lib/idp_common_pkg/idp_common/assessment/strands_executor.py

Lines changed: 37 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
"""
77

88
import asyncio
9+
import concurrent.futures
910
import os
1011
import time
1112
from typing import Any, cast
@@ -188,53 +189,44 @@ def execute_assessment_tasks_parallel(
188189

189190
start_time = time.time()
190191

191-
# Run async executor
192-
# Use asyncio.run() for clean event loop management
192+
# Define the async coroutine to run
193+
async def _run() -> tuple[list[AssessmentResult], dict[str, Any]]:
194+
return await execute_tasks_async(
195+
tasks=tasks,
196+
extraction_results=extraction_results,
197+
page_images=page_images,
198+
sorted_page_ids=sorted_page_ids,
199+
model_id=model_id,
200+
system_prompt=system_prompt,
201+
temperature=temperature,
202+
max_tokens=max_tokens,
203+
document_schema=document_schema,
204+
max_concurrent=max_concurrent,
205+
max_retries=max_retries,
206+
connect_timeout=connect_timeout,
207+
read_timeout=read_timeout,
208+
)
209+
210+
# Check if there's already a running event loop
211+
# This is more robust than catching exceptions with string matching
193212
try:
194-
results, metering = asyncio.run(
195-
execute_tasks_async(
196-
tasks=tasks,
197-
extraction_results=extraction_results,
198-
page_images=page_images,
199-
sorted_page_ids=sorted_page_ids,
200-
model_id=model_id,
201-
system_prompt=system_prompt,
202-
temperature=temperature,
203-
max_tokens=max_tokens,
204-
document_schema=document_schema,
205-
max_concurrent=max_concurrent,
206-
max_retries=max_retries,
207-
connect_timeout=connect_timeout,
208-
read_timeout=read_timeout,
209-
)
213+
loop = asyncio.get_running_loop()
214+
except RuntimeError:
215+
loop = None
216+
217+
if loop is not None and loop.is_running():
218+
# We're inside an async context (e.g., Jupyter, nested async call)
219+
# Execute in a separate thread to avoid "cannot be called from a running event loop"
220+
logger.warning(
221+
"Event loop already running, executing in separate thread",
222+
extra={"loop": str(loop)},
210223
)
211-
except RuntimeError as e:
212-
# Handle case where event loop already exists (shouldn't happen in Lambda)
213-
if "There is no current event loop" in str(e) or "asyncio.run()" in str(e):
214-
logger.warning(
215-
"Event loop already exists, using get_event_loop",
216-
extra={"error": str(e)},
217-
)
218-
loop = asyncio.get_event_loop()
219-
results, metering = loop.run_until_complete(
220-
execute_tasks_async(
221-
tasks=tasks,
222-
extraction_results=extraction_results,
223-
page_images=page_images,
224-
sorted_page_ids=sorted_page_ids,
225-
model_id=model_id,
226-
system_prompt=system_prompt,
227-
temperature=temperature,
228-
max_tokens=max_tokens,
229-
document_schema=document_schema,
230-
max_concurrent=max_concurrent,
231-
max_retries=max_retries,
232-
connect_timeout=connect_timeout,
233-
read_timeout=read_timeout,
234-
)
235-
)
236-
else:
237-
raise
224+
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
225+
future = executor.submit(asyncio.run, _run())
226+
results, metering = future.result()
227+
else:
228+
# No running loop - safe to use asyncio.run()
229+
results, metering = asyncio.run(_run())
238230

239231
duration = time.time() - start_time
240232

lib/idp_common_pkg/idp_common/assessment/strands_tools.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ def create_strands_tools(
172172
Create all tools needed for Strands-based assessment.
173173
174174
Args:
175-
page_images: List of page image bytes (with grid overlay already applied)
175+
page_images: List of raw page image bytes (ruler overlay added on-demand by view_image tool)
176176
sorted_page_ids: List of page IDs in sorted order
177177
178178
Returns:

lib/idp_common_pkg/idp_common/config/models.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -141,14 +141,6 @@ def set_default_review_agent_model(self) -> Self:
141141

142142
return self
143143

144-
@model_validator(mode="after")
145-
def set_default_review_agent_model(self) -> Self:
146-
"""Set review_agent_model to extraction model if not specified."""
147-
if not self.agentic.review_agent_model:
148-
self.agentic.review_agent_model = self.model
149-
150-
return self
151-
152144

153145
class ClassificationConfig(BaseModel):
154146
"""Document classification configuration"""

lib/idp_common_pkg/idp_common/extraction/agentic_idp.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import json
1212
import logging
1313
import os
14+
import re
1415
import threading
1516
from pathlib import Path
1617
from typing import (
@@ -21,6 +22,7 @@
2122

2223
import jsonpatch
2324
from aws_lambda_powertools import Logger
25+
from botocore.config import Config
2426
from PIL import Image
2527
from pydantic import BaseModel, Field
2628
from strands import Agent, tool
@@ -36,6 +38,8 @@
3638

3739
from idp_common.bedrock import (
3840
build_model_config,
41+
supports_prompt_caching,
42+
supports_tool_caching,
3943
)
4044
from idp_common.config.models import IDPConfig
4145
from idp_common.utils.bedrock_utils import (

lib/idp_common_pkg/idp_common/utils/bedrock_utils.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from __future__ import annotations
2+
13
import asyncio
24
import json
35
import logging
@@ -7,19 +9,20 @@
79
import time
810
from collections.abc import Awaitable, Callable
911
from functools import wraps
10-
from typing import Unpack
12+
from typing import TYPE_CHECKING, Unpack
1113

1214
import botocore.exceptions
13-
from mypy_boto3_bedrock_runtime import BedrockRuntimeClient
14-
from mypy_boto3_bedrock_runtime.type_defs import (
15-
ConverseRequestTypeDef,
16-
ConverseResponseTypeDef,
17-
ConverseStreamRequestTypeDef,
18-
ConverseStreamResponseTypeDef,
19-
InvokeModelRequestTypeDef,
20-
InvokeModelResponseTypeDef,
21-
)
22-
from strands.models.bedrock import ModelThrottledException
15+
16+
if TYPE_CHECKING:
17+
from mypy_boto3_bedrock_runtime import BedrockRuntimeClient
18+
from mypy_boto3_bedrock_runtime.type_defs import (
19+
ConverseRequestTypeDef,
20+
ConverseResponseTypeDef,
21+
ConverseStreamRequestTypeDef,
22+
ConverseStreamResponseTypeDef,
23+
InvokeModelRequestTypeDef,
24+
InvokeModelResponseTypeDef,
25+
)
2326

2427
# Optional import for strands-agents (may not be installed in all environments)
2528
try:

0 commit comments

Comments
 (0)