Skip to content

Commit 63547af

Browse files
committed
encapsulate ruler
1 parent 8b9d290 commit 63547af

File tree

9 files changed

+286
-350
lines changed

9 files changed

+286
-350
lines changed

lib/idp_common_pkg/idp_common/assessment/granular_service.py

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@
3636
from idp_common.extraction.models import ExtractionData
3737
from idp_common.models import Document, Status
3838
from idp_common.utils import check_token_limit
39-
from idp_common.utils.grid_overlay import add_ruler_edges
4039

4140
logger = Logger(service="assessment", level=os.getenv("LOG_LEVEL", "INFO"))
4241

@@ -845,28 +844,21 @@ def process_document_section(self, document: Document, section_id: str) -> Docum
845844
f"Found {len(cached_task_results)} cached assessment task results, processing {len(tasks_to_process)} remaining tasks"
846845
)
847846

848-
# Apply grid overlay to page images for assessment
849-
grid_page_images = []
850-
for idx, page_img in enumerate(page_images):
851-
grid_img = add_ruler_edges(page_img)
852-
logger.info(
853-
f"Added ruler overlay to page {idx}: {len(page_img):,} bytes -> {len(grid_img):,} bytes"
854-
)
855-
grid_page_images.append(grid_img)
856-
857847
# Execute tasks using Strands-based parallel executor
848+
# Note: ruler overlay is added internally by strands_service/strands_tools
858849
logger.info(
859850
f"Processing {len(tasks_to_process)} assessment tasks with Strands executor (max_concurrent={self.max_workers})"
860851
)
861852

862853
request_start_time = time.time()
863854

864855
# Call Strands executor - handles both parallel and sequential based on max_concurrent
856+
# Pass raw page_images - ruler overlay is added internally when needed
865857
task_results, task_metering, processing_time = (
866858
execute_assessment_tasks_parallel(
867859
tasks=tasks_to_process,
868860
extraction_results=extraction_results,
869-
page_images=grid_page_images,
861+
page_images=page_images,
870862
sorted_page_ids=sorted_page_ids,
871863
model_id=self.config.assessment.model,
872864
system_prompt=self.config.assessment.system_prompt,

lib/idp_common_pkg/idp_common/assessment/models.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ def from_corners(
114114
scale: float = 1000.0,
115115
) -> "BoundingBoxCoordinates":
116116
"""
117-
Create from corner coordinates in document space.
117+
Create from corner coordinates.
118118
119119
Args:
120120
x1, y1: Top-left corner in 0-scale range
@@ -228,7 +228,16 @@ def from_llm_response(
228228
bbox_coords: list[float] | None = None,
229229
page_num: int | None = None,
230230
) -> "FieldAssessmentData":
231-
"""Create from LLM response data."""
231+
"""
232+
Create from LLM response data.
233+
234+
Args:
235+
confidence: Confidence score (0-1)
236+
reasoning: Explanation for confidence
237+
confidence_threshold: Threshold for confidence alerts
238+
bbox_coords: Optional bounding box coordinates [x1, y1, x2, y2]
239+
page_num: Optional page number (1-indexed)
240+
"""
232241
geometry = None
233242
if bbox_coords is not None and page_num is not None:
234243
geom = Geometry.from_bbox_list(bbox_coords, page_num)

lib/idp_common_pkg/idp_common/assessment/strands_executor.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ async def execute_tasks_async(
4040
Args:
4141
tasks: List of assessment tasks to execute
4242
extraction_results: Full extraction results
43-
page_images: List of page images (with grid overlay)
43+
page_images: List of raw page images (ruler overlay added internally)
4444
sorted_page_ids: List of page IDs
4545
model_id: Model to use
4646
system_prompt: System prompt
@@ -166,7 +166,7 @@ def execute_assessment_tasks_parallel(
166166
Args:
167167
tasks: List of assessment tasks
168168
extraction_results: Full extraction results
169-
page_images: List of page images (with grid overlay already applied)
169+
page_images: List of raw page images (ruler overlay added internally)
170170
sorted_page_ids: List of page IDs in sorted order
171171
model_id: Model ID
172172
system_prompt: System prompt

lib/idp_common_pkg/idp_common/assessment/strands_service.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
from idp_common.assessment.strands_tools import create_strands_tools
2727
from idp_common.bedrock import build_model_config
2828
from idp_common.utils.bedrock_utils import async_exponential_backoff_retry
29+
from idp_common.utils.grid_overlay import add_ruler_edges
2930

3031
logger = Logger(service="assessment", level=os.getenv("LOG_LEVEL", "INFO"))
3132

@@ -49,9 +50,8 @@ async def assess_attribute_with_strands(
4950
5051
Args:
5152
task: Assessment task to process
52-
base_content: Base prompt content (includes images)
5353
extraction_results: Full extraction results
54-
page_images: List of page images (with grid overlay already applied)
54+
page_images: List of raw page images (ruler overlay added internally)
5555
sorted_page_ids: List of page IDs in sorted order
5656
model_id: Bedrock model ID
5757
system_prompt: System prompt for assessment
@@ -364,16 +364,21 @@ def _build_task_prompt(
364364
365365
Args:
366366
task: Assessment task for one specific field
367-
page_images: List of page images to include in the prompt
367+
page_images: List of raw page images (ruler will be added here)
368368
369369
Returns:
370370
List of content blocks with images and task text
371371
"""
372372
field_path_str = _convert_field_path_to_string(task.field_path)
373373

374-
# Create image content blocks
374+
# Create image content blocks with ruler overlay
375+
# Rulers are added here so the LLM can see coordinate reference marks
375376
image_blocks = [
376-
ContentBlock(image=ImageContent(format="png", source=ImageSource(bytes=img)))
377+
ContentBlock(
378+
image=ImageContent(
379+
format="png", source=ImageSource(bytes=add_ruler_edges(img))
380+
)
381+
)
377382
for img in page_images
378383
]
379384

@@ -414,6 +419,8 @@ def _convert_to_assessment_result(
414419
assessment = output.assessment
415420

416421
# Create standardized field assessment data
422+
# Note: bounding box coordinates are already adjusted for ruler offset
423+
# by the submit_assessment tool in strands_tools.py
417424
field_data = FieldAssessmentData.from_llm_response(
418425
confidence=assessment.confidence,
419426
reasoning=assessment.reasoning,

lib/idp_common_pkg/idp_common/assessment/strands_tools.py

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from strands import Agent, tool
1414

1515
from idp_common.assessment.strands_models import AssessmentOutput, BoundingBox
16-
from idp_common.utils.grid_overlay import draw_bounding_boxes
16+
from idp_common.utils.grid_overlay import add_ruler_and_draw_boxes, add_ruler_edges
1717
from idp_common.utils.strands_agent_tools.todo_list import (
1818
create_todo_list,
1919
update_todo,
@@ -37,10 +37,13 @@ class ViewImageInput(BaseModel):
3737

3838
@tool
3939
def submit_assessment(assessment: AssessmentOutput, agent: Agent) -> str:
40+
"""Submit the final assessment for a field."""
4041
# Validate assessment structure and return helpful errors
4142
validated_assessment = AssessmentOutput.model_validate(assessment)
4243

43-
# Store in agent state
44+
# Store in agent state - coordinates are in 0-1000 document space
45+
# The ruler shows 0-1000 scale mapping to the document, so LLM coordinates
46+
# are already in document space and need no adjustment
4447
agent.state.set("assessment_output", validated_assessment.model_dump(mode="json"))
4548

4649
logger.info(
@@ -56,15 +59,15 @@ def create_view_image_tool(page_images: list[bytes], sorted_page_ids: list[str])
5659
Create a view_image tool that has access to page images.
5760
5861
Args:
59-
page_images: List of page image bytes (with grid overlay already applied)
62+
page_images: List of raw page image bytes (without ruler overlay)
6063
sorted_page_ids: List of page IDs in sorted order
6164
6265
Returns:
6366
A Strands tool function for viewing images
6467
"""
6568

6669
@tool
67-
def view_image(input_data: ViewImageInput, agent: Agent) -> dict:
70+
def view_image(input_data: ViewImageInput, agent: Agent) -> dict[str, Any]:
6871
"""
6972
View a specific page image, optionally highlighting a bounding box area.
7073
@@ -97,13 +100,13 @@ def view_image(input_data: ViewImageInput, agent: Agent) -> dict:
97100
f"Valid range: 0-{len(page_images) - 1}"
98101
)
99102

100-
# Get the base image (already has grid overlay)
101-
img_bytes = page_images[view_input.image_index]
103+
# Get the raw image (no ruler overlay yet)
104+
raw_img_bytes = page_images[view_input.image_index]
102105
page_id = sorted_page_ids[view_input.image_index]
103106

104-
# If bounding box is specified, draw it on the image
107+
# Add ruler and optionally draw bounding box
105108
if view_input.bounding_box:
106-
# Convert BoundingBox to dict format for draw_bounding_boxes
109+
# Convert BoundingBox to dict format
107110
bbox_dict = {
108111
"bbox": [
109112
view_input.bounding_box.x1,
@@ -115,21 +118,19 @@ def view_image(input_data: ViewImageInput, agent: Agent) -> dict:
115118
"color": "red",
116119
}
117120

118-
# Draw the bounding box on the image (which has 30px margin for ruler)
119-
# Let drawing errors propagate - if we can't draw, something is wrong
120-
img_bytes = draw_bounding_boxes(
121-
img_bytes,
122-
[bbox_dict],
123-
margin_offset=30,
124-
)
121+
# Add ruler overlay and draw bounding box in one step
122+
img_bytes = add_ruler_and_draw_boxes(raw_img_bytes, [bbox_dict])
125123

126124
logger.debug(
127-
"Drew bounding box on image",
125+
"Added ruler and drew bounding box on image",
128126
extra={
129127
"image_index": view_input.image_index,
130128
"bbox": bbox_dict["bbox"],
131129
},
132130
)
131+
else:
132+
# Just add ruler overlay (no bounding box)
133+
img_bytes = add_ruler_edges(raw_img_bytes)
133134

134135
logger.info(
135136
"Returning image to agent",

0 commit comments

Comments
 (0)