|
| 1 | +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. |
| 2 | +# SPDX-License-Identifier: MIT-0 |
| 3 | + |
| 4 | +import os |
| 5 | +import json |
| 6 | +import time |
| 7 | +import logging |
| 8 | + |
| 9 | +from idp_common import get_config, assessment |
| 10 | +from idp_common.models import Document, Status |
| 11 | +from idp_common.appsync.service import DocumentAppSyncService |
| 12 | + |
| 13 | +# Configuration will be loaded in handler function |
| 14 | + |
| 15 | +logger = logging.getLogger() |
| 16 | +logger.setLevel(os.environ.get("LOG_LEVEL", "INFO")) |
| 17 | +logging.getLogger('idp_common.bedrock.client').setLevel(os.environ.get("BEDROCK_LOG_LEVEL", "INFO")) |
| 18 | + |
| 19 | +def handler(event, context): |
| 20 | + """ |
| 21 | + Lambda handler for document assessment. |
| 22 | + This function assesses the confidence of extraction results for a document section |
| 23 | + using the Assessment service from the idp_common library. |
| 24 | + """ |
| 25 | + logger.info(f"Starting assessment processing for event: {json.dumps(event, default=str)}") |
| 26 | + |
| 27 | + # Load configuration |
| 28 | + config = get_config() |
| 29 | + logger.info(f"Config: {json.dumps(config)}") |
| 30 | + |
| 31 | + # Extract input from event |
| 32 | + document_dict = event.get('document', {}) |
| 33 | + section_id = event.get('section_id') |
| 34 | + |
| 35 | + # Validate inputs |
| 36 | + if not document_dict: |
| 37 | + raise ValueError("No document provided in event") |
| 38 | + |
| 39 | + if not section_id: |
| 40 | + raise ValueError("No section_id provided in event") |
| 41 | + |
| 42 | + # Convert document dictionary to Document object |
| 43 | + document = Document.from_dict(document_dict) |
| 44 | + logger.info(f"Processing assessment for document {document.id}, section {section_id}") |
| 45 | + |
| 46 | + # Update document status to ASSESSING |
| 47 | + status = Document( |
| 48 | + id=document.id, |
| 49 | + input_key=document.input_key, |
| 50 | + status=Status.ASSESSING, |
| 51 | + ) |
| 52 | + appsync_service = DocumentAppSyncService() |
| 53 | + logger.info(f"Updating document status to {status.status}") |
| 54 | + appsync_service.update_document(status) |
| 55 | + |
| 56 | + # Initialize assessment service |
| 57 | + assessment_service = assessment.AssessmentService(config=config) |
| 58 | + |
| 59 | + # Process the document section for assessment |
| 60 | + t0 = time.time() |
| 61 | + logger.info(f"Starting assessment for section {section_id}") |
| 62 | + updated_document = assessment_service.process_document_section(document, section_id) |
| 63 | + t1 = time.time() |
| 64 | + logger.info(f"Total extraction time: {t1-t0:.2f} seconds") |
| 65 | + |
| 66 | + # Check if document processing failed |
| 67 | + if updated_document.status == Status.FAILED: |
| 68 | + error_message = f"Assessment failed for document {updated_document.id}, section {section_id}" |
| 69 | + logger.error(error_message) |
| 70 | + raise Exception(error_message) |
| 71 | + |
| 72 | + # Return the updated document as a dictionary |
| 73 | + result = { |
| 74 | + 'document': updated_document.to_dict(), |
| 75 | + 'section_id': section_id |
| 76 | + } |
| 77 | + |
| 78 | + logger.info("Assessment processing completed") |
| 79 | + return result |
| 80 | + |
0 commit comments