Skip to content

Commit 6b4bf16

Browse files
committed
Fix HITL Confidence alerts for Pattern1
1 parent 3674b87 commit 6b4bf16

File tree

1 file changed

+72
-70
lines changed
  • patterns/pattern-1/src/processresults_function

1 file changed

+72
-70
lines changed

patterns/pattern-1/src/processresults_function/index.py

Lines changed: 72 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
ssm_client = boto3.client('ssm')
3232
bedrock_client = boto3.client('bedrock-data-automation')
3333
SAGEMAKER_A2I_REVIEW_PORTAL_URL = os.environ.get('SAGEMAKER_A2I_REVIEW_PORTAL_URL', '')
34+
enable_hitl = os.environ.get('ENABLE_HITL', 'false').lower() == 'true'
3435

3536
def get_confidence_threshold_from_config(document: Document) -> float:
3637
"""
@@ -43,16 +44,16 @@ def get_confidence_threshold_from_config(document: Document) -> float:
4344
float: The confidence threshold as a decimal (0.0-1.0)
4445
"""
4546
try:
46-
config = get_config(document)
47-
assessment_config = config.get('assessment', {})
48-
threshold_value = float(assessment_config.get('default_confidence_threshold', 0.8))
47+
config = get_config()
48+
threshold_value = float(config['assessment']['default_confidence_threshold'])
4949

5050
# Validate that the threshold is in the expected 0.0-1.0 range
5151
if threshold_value < 0.0 or threshold_value > 1.0:
5252
logger.warning(f"Invalid confidence threshold value {threshold_value}. Must be between 0.0 and 1.0. Using default: 0.80")
5353
return 0.80
5454

5555
logger.info(f"Retrieved confidence threshold from configuration: {threshold_value}")
56+
threshold_value=0.8
5657
return threshold_value
5758
except Exception as e:
5859
logger.warning(f"Failed to retrieve confidence threshold from configuration: {e}")
@@ -870,11 +871,13 @@ def process_segments(
870871
bp_confidence = custom_output["matched_blueprint"]["confidence"]
871872

872873
# Check if any key-value or blueprint confidence is below threshold
873-
low_confidence = any(
874+
low_confidence = (any(
874875
kv['confidence'] < confidence_threshold
875876
for page_num in page_indices
876877
for kv in pagespecific_details['key_value_details'].get(str(page_num), [])
877-
) or float(bp_confidence) < confidence_threshold
878+
) or float(bp_confidence) < confidence_threshold) and enable_hitl=='true'
879+
880+
logger.info(f"HITL STatus Low confidence {low_confidence}")
878881

879882
item.update({
880883
"page_array": page_indices,
@@ -931,7 +934,7 @@ def process_segments(
931934
page_array = list(range(start_page, end_page + 1))
932935
item.update({
933936
"page_array": page_array,
934-
"hitl_triggered": True,
937+
"hitl_triggered": enable_hitl,
935938
"extraction_bp_name": "None",
936939
"extracted_result": std_output
937940
})
@@ -941,30 +944,31 @@ def process_segments(
941944
record_number=record_number,
942945
bp_match=segment.get('custom_output_status'),
943946
extraction_bp_name="None",
944-
hitl_triggered=True,
947+
hitl_triggered=enable_hitl,
945948
page_array=page_array,
946949
review_portal_url=SAGEMAKER_A2I_REVIEW_PORTAL_URL
947950
)
948951

949-
hitl_triggered = True
950-
for page_number in range(start_page, end_page + 1):
951-
ImageUri = f"s3://{output_bucket}/{object_key}/pages/{page_number}/image.jpg"
952-
try:
953-
human_loop_response = start_human_loop(
954-
execution_id=execution_id,
955-
kv_pairs=[],
956-
source_image_uri=ImageUri,
957-
bounding_boxes=[],
958-
blueprintName="",
959-
bp_confidence=0.00,
960-
confidenceThreshold=confidence_threshold,
961-
page_id=page_number,
962-
page_indices=page_array,
963-
record_number=record_number
964-
)
965-
logger.info(f"Triggered human loop for page {page_number}: {human_loop_response}")
966-
except Exception as e:
967-
logger.error(f"Failed to start human loop for page {page_number}: {str(e)}")
952+
hitl_triggered = enable_hitl
953+
if enable_hitl:
954+
for page_number in range(start_page, end_page + 1):
955+
ImageUri = f"s3://{output_bucket}/{object_key}/pages/{page_number}/image.jpg"
956+
try:
957+
human_loop_response = start_human_loop(
958+
execution_id=execution_id,
959+
kv_pairs=[],
960+
source_image_uri=ImageUri,
961+
bounding_boxes=[],
962+
blueprintName="",
963+
bp_confidence=0.00,
964+
confidenceThreshold=confidence_threshold,
965+
page_id=page_number,
966+
page_indices=page_array,
967+
record_number=record_number
968+
)
969+
logger.info(f"Triggered human loop for page {page_number}: {human_loop_response}")
970+
except Exception as e:
971+
logger.error(f"Failed to start human loop for page {page_number}: {str(e)}")
968972

969973
document.hitl_metadata.append(hitl_metadata)
970974

@@ -1102,53 +1106,51 @@ def handler(event, context):
11021106

11031107
# Process HITL if enabled
11041108
hitl_triggered = "false"
1105-
enable_hitl = os.environ.get('ENABLE_HITL', 'false').lower() == 'true'
11061109

1107-
if enable_hitl:
1108-
try:
1109-
# Use the confidence threshold already calculated above
1110-
metdatafile_path = '/'.join(bda_result_prefix.split('/')[:-1])
1111-
job_metadata_key = f'{metdatafile_path}/job_metadata.json'
1112-
execution_id = event.get("execution_arn", "").split(':')[-1]
1113-
logger.info(f"HITL execution ID: {execution_id}")
1110+
try:
1111+
# Use the confidence threshold already calculated above
1112+
metdatafile_path = '/'.join(bda_result_prefix.split('/')[:-1])
1113+
job_metadata_key = f'{metdatafile_path}/job_metadata.json'
1114+
execution_id = event.get("execution_arn", "").split(':')[-1]
1115+
logger.info(f"HITL execution ID: {execution_id}")
11141116

1115-
try:
1116-
jobmetadata_file = s3_client.get_object(Bucket=bda_result_bucket, Key=job_metadata_key)
1117-
job_metadata = json.loads(jobmetadata_file['Body'].read())
1118-
if 'output_metadata' in job_metadata:
1119-
output_metadata = job_metadata['output_metadata']
1120-
if isinstance(output_metadata, list):
1121-
for asset in output_metadata:
1122-
document, hitl_result = process_segments(
1123-
input_bucket,
1124-
output_bucket,
1125-
object_key,
1126-
asset.get('segment_metadata', []),
1127-
confidence_threshold,
1128-
execution_id,
1129-
document
1130-
)
1131-
if hitl_result:
1132-
hitl_triggered = "true"
1133-
elif isinstance(output_metadata, dict):
1134-
for asset_id, asset in output_metadata.items():
1135-
document, hitl_result = process_segments(
1136-
input_bucket,
1137-
output_bucket,
1138-
object_key,
1139-
asset.get('segment_metadata', []),
1140-
confidence_threshold,
1141-
execution_id,
1142-
document
1143-
)
1144-
if hitl_result:
1145-
hitl_triggered = "true"
1146-
else:
1147-
logger.error("Unexpected output_metadata format in job_metadata.json")
1148-
except Exception as e:
1149-
logger.error(f"Error processing job_metadata.json: {str(e)}")
1117+
try:
1118+
jobmetadata_file = s3_client.get_object(Bucket=bda_result_bucket, Key=job_metadata_key)
1119+
job_metadata = json.loads(jobmetadata_file['Body'].read())
1120+
if 'output_metadata' in job_metadata:
1121+
output_metadata = job_metadata['output_metadata']
1122+
if isinstance(output_metadata, list):
1123+
for asset in output_metadata:
1124+
document, hitl_result = process_segments(
1125+
input_bucket,
1126+
output_bucket,
1127+
object_key,
1128+
asset.get('segment_metadata', []),
1129+
confidence_threshold,
1130+
execution_id,
1131+
document
1132+
)
1133+
if hitl_result:
1134+
hitl_triggered = "true"
1135+
elif isinstance(output_metadata, dict):
1136+
for asset_id, asset in output_metadata.items():
1137+
document, hitl_result = process_segments(
1138+
input_bucket,
1139+
output_bucket,
1140+
object_key,
1141+
asset.get('segment_metadata', []),
1142+
confidence_threshold,
1143+
execution_id,
1144+
document
1145+
)
1146+
if hitl_result:
1147+
hitl_triggered = "true"
1148+
else:
1149+
logger.error("Unexpected output_metadata format in job_metadata.json")
11501150
except Exception as e:
1151-
logger.error(f"Error in HITL processing: {str(e)}")
1151+
logger.error(f"Error processing job_metadata.json: {str(e)}")
1152+
except Exception as e:
1153+
logger.error(f"Error in HITL processing: {str(e)}")
11521154

11531155
# Record metrics for processed pages
11541156
metrics.put_metric('ProcessedDocuments', 1)

0 commit comments

Comments
 (0)