Skip to content

Commit 20d8f22

Browse files
committed
cleanup
1 parent c7288ea commit 20d8f22

File tree

13 files changed

+1187
-80
lines changed

13 files changed

+1187
-80
lines changed

lib/idp_common_pkg/idp_common/assessment/example_usage.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -142,14 +142,15 @@ def example_granular_assessment():
142142

143143
# Demonstrate task creation (this would normally be done internally)
144144
if hasattr(assessment_service, "_create_assessment_tasks"):
145-
attributes = assessment_service._get_class_attributes("Bank Statement")
146-
tasks = assessment_service._create_assessment_tasks(
147-
example_extraction_results, attributes, 0.9
145+
class_schema = assessment_service._get_class_schema("Bank Statement")
146+
properties = class_schema.get("properties", {})
147+
tasks, assessment_structure = assessment_service._create_assessment_tasks(
148+
example_extraction_results, properties, 0.9
148149
)
149150

150151
logger.info(f"\nCreated {len(tasks)} assessment tasks:")
151152
for task in tasks:
152-
logger.info(f" - {task.task_id}: {task.task_type} for {task.attributes}")
153+
logger.info(f" - {task.task_id}: {task.task_type} for {task.field_name}")
153154

154155
return assessment_service, config
155156

lib/idp_common_pkg/idp_common/assessment/granular_service.py

Lines changed: 49 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,6 @@ def _traverse(
309309
field_name=_convert_field_path_to_string(item_path),
310310
field_schema=items_schema,
311311
confidence_threshold=threshold,
312-
parent_assessment_dict=assessment_list, # type: ignore
313312
)
314313
tasks.append(task)
315314
task_counter[0] += 1
@@ -332,7 +331,6 @@ def _traverse(
332331
field_name=_convert_field_path_to_string(field_path),
333332
field_schema=prop_schema,
334333
confidence_threshold=threshold,
335-
parent_assessment_dict=parent_dict,
336334
)
337335
tasks.append(task)
338336
task_counter[0] += 1
@@ -536,14 +534,40 @@ def _is_throttling_exception(self, exception: Exception) -> bool:
536534
for throttle_term in self.throttling_exceptions
537535
)
538536

537+
def _insert_at_field_path(
538+
self,
539+
structure: dict[str, Any],
540+
field_path: tuple[str | int, ...],
541+
value: Any,
542+
) -> None:
543+
"""
544+
Navigate through structure using field_path and insert value at the end.
545+
546+
Args:
547+
structure: The assessment structure to navigate
548+
field_path: Tuple path like ("Account Holder Address", "City") or ("Transactions", 0, "Amount")
549+
value: The assessment data to insert
550+
551+
Example:
552+
field_path = ("Account Holder Address", "City")
553+
-> structure["Account Holder Address"]["City"] = value
554+
555+
field_path = ("Transactions", 0, "Amount")
556+
-> structure["Transactions"][0]["Amount"] = value
557+
"""
558+
parent = structure
559+
for key in field_path[:-1]:
560+
parent = parent[key]
561+
parent[field_path[-1]] = value
562+
539563
def _aggregate_assessment_results(
540564
self,
541565
tasks: list[AssessmentTask],
542566
results: list[AssessmentResult],
543567
assessment_structure: dict[str, Any],
544568
) -> tuple[dict[str, Any], list[dict[str, Any]], dict[str, Any]]:
545569
"""
546-
Aggregate individual task results into assessment structure using direct parent insertion.
570+
Aggregate individual task results into assessment structure using field_path navigation.
547571
548572
Args:
549573
tasks: list of assessment tasks
@@ -553,13 +577,12 @@ def _aggregate_assessment_results(
553577
Returns:
554578
Tuple of (assessment_structure, confidence_alerts, aggregated_metering)
555579
"""
556-
all_confidence_alerts = []
557-
aggregated_metering = {}
580+
all_confidence_alerts: list[dict[str, Any]] = []
581+
aggregated_metering: dict[str, Any] = {}
558582

559583
# Create a mapping from task_id to result
560584
result_map = {result.task_id: result for result in results}
561585

562-
# Process each task result - direct O(1) insertion using parent reference
563586
for task in tasks:
564587
result = result_map.get(task.task_id)
565588
if not result or not result.success:
@@ -575,45 +598,31 @@ def _aggregate_assessment_results(
575598
# Add confidence alerts
576599
all_confidence_alerts.extend(result.confidence_alerts)
577600

578-
# Get assessment data from result - should be a single assessment object
579-
# The Strands agent returns the assessment in result.assessment_data
580-
assessment_obj = result.assessment_data
601+
# Get assessment data directly from result
602+
# strands_service returns flat assessment dict: {confidence, value, reasoning, ...}
603+
field_assessment = result.assessment_data
581604

582-
if not isinstance(assessment_obj, dict):
605+
if not isinstance(field_assessment, dict):
583606
logger.warning(
584-
f"Task {task.task_id}: expected dict assessment, got {type(assessment_obj)}"
607+
f"Task {task.task_id}: expected dict assessment, got {type(field_assessment)}"
585608
)
586609
continue
587610

588-
# Add confidence_threshold to the assessment object
589-
assessment_obj["confidence_threshold"] = task.confidence_threshold
590-
591-
# Direct insertion using parent reference - O(1) operation!
592-
parent = task.parent_assessment_dict
593-
field_name = task.field_name
594-
595-
if isinstance(parent, dict):
596-
# Regular field - insert into parent dict
597-
parent[field_name] = assessment_obj
598-
elif isinstance(parent, list):
599-
# Array item - get index from field_path
600-
# field_path is like ("items", 0, "price") - second-to-last is the index
601-
if len(task.field_path) >= 2 and isinstance(task.field_path[-2], int):
602-
idx = task.field_path[-2]
603-
# Replace the None placeholder we created during structure building
604-
if idx < len(parent):
605-
parent[idx] = assessment_obj
606-
else:
607-
logger.warning(
608-
f"Task {task.task_id}: index {idx} out of range for list of length {len(parent)}"
609-
)
610-
else:
611-
logger.warning(
612-
f"Task {task.task_id}: cannot determine array index from path {task.field_path}"
613-
)
614-
else:
615-
logger.warning(
616-
f"Task {task.task_id}: unexpected parent type {type(parent)}"
611+
# Add confidence_threshold if not already present
612+
if "confidence_threshold" not in field_assessment:
613+
field_assessment["confidence_threshold"] = task.confidence_threshold
614+
615+
# Insert directly at field_path - no unwrapping needed
616+
try:
617+
self._insert_at_field_path(
618+
assessment_structure, task.field_path, field_assessment
619+
)
620+
logger.debug(
621+
f"Task {task.task_id}: Inserted assessment at {task.field_path}"
622+
)
623+
except (KeyError, IndexError, TypeError) as e:
624+
logger.error(
625+
f"Task {task.task_id}: Failed to insert at path {task.field_path}: {e}"
617626
)
618627

619628
return assessment_structure, all_confidence_alerts, aggregated_metering

lib/idp_common_pkg/idp_common/assessment/models.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -78,10 +78,6 @@ class AssessmentTask(BaseModel):
7878
# Confidence threshold for this field
7979
confidence_threshold: float
8080

81-
# Direct reference to parent container in assessment structure (for O(1) insertion)
82-
# Can be Dict for regular fields or list for array items
83-
parent_assessment_dict: dict[str, Any] | list[Any]
84-
8581

8682
class AssessmentResult(BaseModel):
8783
"""Result of a single assessment task (used by both granular and strands services)."""

lib/idp_common_pkg/idp_common/assessment/strands_service.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -405,8 +405,12 @@ def _convert_to_assessment_result(
405405
metering: dict[str, Any],
406406
processing_time: float,
407407
) -> AssessmentResult:
408-
"""Convert Strands AssessmentOutput to AssessmentResult with standardized geometry format."""
409-
# Single field assessment
408+
"""
409+
Convert Strands AssessmentOutput to AssessmentResult with standardized geometry format.
410+
411+
The assessment_data is returned as a flat dict (not wrapped by field name) because
412+
the aggregation step uses task.field_path for insertion into the final structure.
413+
"""
410414
field_name = output.field_name
411415
assessment = output.assessment
412416

@@ -429,8 +433,9 @@ def _convert_to_assessment_result(
429433
page_num=assessment.bounding_box.page if assessment.bounding_box else None,
430434
)
431435

432-
# Convert to explainability format
433-
assessment_data = {field_name: field_data.to_explainability_format()}
436+
# Return assessment data directly (not wrapped by field name)
437+
# The aggregation step uses task.field_path for proper insertion
438+
assessment_data = field_data.to_explainability_format()
434439

435440
# Check for confidence threshold violations
436441
confidence_alerts = []

lib/idp_common_pkg/idp_common/assessment/strands_tools.py

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -37,22 +37,6 @@ class ViewImageInput(BaseModel):
3737

3838
@tool
3939
def submit_assessment(assessment: AssessmentOutput, agent: Agent) -> str:
40-
"""
41-
Submit your final confidence assessment.
42-
43-
Use this tool when you have:
44-
1. Located the values in the document images
45-
2. Determined precise bounding box coordinates using ruler markings
46-
3. Assessed the confidence based on clarity and accuracy
47-
48-
Args:
49-
assessment: Dictionary with:
50-
- assessments: dict mapping attribute names to ConfidenceAssessment
51-
- alerts: list of any threshold alerts (optional)
52-
53-
Returns:
54-
Success confirmation message or validation error details
55-
"""
5640
# Validate assessment structure and return helpful errors
5741
validated_assessment = AssessmentOutput.model_validate(assessment)
5842

lib/idp_common_pkg/idp_common/image/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ def resize_image(
7979
logger.info(
8080
f"Resizing image from {current_width}x{current_height} to {new_width}x{new_height} (scale: {scale_factor:.3f})"
8181
)
82-
image = image.resize((new_width, new_height), Image.LANCZOS)
82+
image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
8383

8484
# Save in original format if possible
8585
img_byte_array = io.BytesIO()

lib/idp_common_pkg/idp_common/utils/pdf_helpers.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def pdf_page_to_image(
7575

7676
# Render page to pixmap
7777
mat = fitz.Matrix(dpi_scale, dpi_scale)
78-
pix = page.get_pixmap(matrix=mat)
78+
pix = page.get_pixmap(matrix=mat) # pyright: ignore[reportAttributeAccessIssue]
7979

8080
# Convert to PNG bytes
8181
png_bytes = pix.tobytes("png")
@@ -144,7 +144,7 @@ def pdf_to_images(
144144

145145
# Render page to pixmap
146146
mat = fitz.Matrix(dpi_scale, dpi_scale)
147-
pix = page.get_pixmap(matrix=mat)
147+
pix = page.get_pixmap(matrix=mat) # pyright: ignore[reportAttributeAccessIssue]
148148

149149
# Convert to PNG bytes
150150
png_bytes = pix.tobytes("png")

0 commit comments

Comments
 (0)