Skip to content

Commit 2eb316f

Browse files
author
Bob Strahan
committed
refactor exception handling
1 parent dc830f1 commit 2eb316f

File tree

3 files changed

+80
-9
lines changed
  • lib/idp_common_pkg/idp_common/classification
  • patterns
    • pattern-2/src/classification_function
    • pattern-3/src/classification_function

3 files changed

+80
-9
lines changed

lib/idp_common_pkg/idp_common/classification/service.py

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1168,6 +1168,7 @@ def classify_document(self, document: Document) -> Document:
11681168
all_page_results = list(cached_page_classifications.values())
11691169
combined_metering = {}
11701170
errors_lock = threading.Lock() # Thread safety for error collection
1171+
failed_page_exceptions = {} # Store original exceptions for failed pages
11711172

11721173
# Determine which pages need classification
11731174
pages_to_classify = {}
@@ -1240,16 +1241,36 @@ def classify_document(self, document: Document) -> Document:
12401241
combined_metering, page_metering
12411242
)
12421243
except Exception as e:
1244+
# Capture exception details in the document object instead of raising
12431245
error_msg = f"Error classifying page {page_id}: {str(e)}"
12441246
logger.error(error_msg)
12451247
with errors_lock:
12461248
document.errors.append(error_msg)
1249+
# Store the original exception for later use
1250+
failed_page_exceptions[page_id] = e
1251+
12471252
# Mark page as unclassified on error
12481253
if page_id in document.pages:
12491254
document.pages[page_id].classification = "unclassified"
12501255
document.pages[page_id].confidence = 0.0
1251-
# raise exception to enable client retries
1252-
raise
1256+
1257+
# Store failed page exceptions in document metadata for caller to access
1258+
if failed_page_exceptions:
1259+
# Store the first encountered exception as the primary failure cause
1260+
first_exception = next(iter(failed_page_exceptions.values()))
1261+
document.metadata = document.metadata or {}
1262+
document.metadata["failed_page_exceptions"] = {
1263+
page_id: {
1264+
"exception_type": type(exc).__name__,
1265+
"exception_message": str(exc),
1266+
"exception_class": exc.__class__.__module__
1267+
+ "."
1268+
+ exc.__class__.__name__,
1269+
}
1270+
for page_id, exc in failed_page_exceptions.items()
1271+
}
1272+
# Store the primary exception for easy access by caller
1273+
document.metadata["primary_exception"] = first_exception
12531274
else:
12541275
logger.info(
12551276
f"All {len(cached_page_classifications)} page classifications found in cache"
@@ -1301,7 +1322,7 @@ def classify_document(self, document: Document) -> Document:
13011322
)
13021323

13031324
except Exception as e:
1304-
# Cache successful page classifications before raising exception
1325+
# Cache successful page classifications before handling exception
13051326
if pages_to_classify:
13061327
successful_results = [
13071328
r
@@ -1317,6 +1338,9 @@ def classify_document(self, document: Document) -> Document:
13171338
document = self._update_document_status(
13181339
document, success=False, error_message=error_msg
13191340
)
1341+
# Store the exception in metadata for caller to access
1342+
document.metadata = document.metadata or {}
1343+
document.metadata["primary_exception"] = e
13201344
# raise exception to enable client retries
13211345
raise
13221346

patterns/pattern-2/src/classification_function/index.py

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,13 +66,36 @@ def handler(event, context):
6666
# Classify the document - the service will update the Document directly
6767
document = service.classify_document(document)
6868

69-
# Check if document processing failed
70-
if document.status == Status.FAILED:
69+
# Check if document processing failed or has pages that failed to classify
70+
failed_page_exceptions = None
71+
primary_exception = None
72+
73+
# Check for failed page exceptions in metadata
74+
if document.metadata and "failed_page_exceptions" in document.metadata:
75+
failed_page_exceptions = document.metadata["failed_page_exceptions"]
76+
primary_exception = document.metadata.get("primary_exception")
77+
78+
# Log details about failed pages
79+
logger.error(f"Document {document.id} has {len(failed_page_exceptions)} pages that failed to classify:")
80+
for page_id, exc_info in failed_page_exceptions.items():
81+
logger.error(f" Page {page_id}: {exc_info['exception_type']} - {exc_info['exception_message']}")
82+
83+
# Check if document processing completely failed or has critical page failures
84+
if document.status == Status.FAILED or failed_page_exceptions:
7185
error_message = f"Classification failed for document {document.id}"
86+
if failed_page_exceptions:
87+
error_message += f" - {len(failed_page_exceptions)} pages failed to classify"
88+
7289
logger.error(error_message)
7390
# Update document status in AppSync before raising exception
7491
appsync_service.update_document(document)
75-
raise Exception(error_message)
92+
93+
# Raise the original exception type if available, otherwise raise generic exception
94+
if primary_exception:
95+
logger.error(f"Re-raising original exception: {type(primary_exception).__name__}")
96+
raise primary_exception
97+
else:
98+
raise Exception(error_message)
7699

77100
t1 = time.time()
78101
logger.info(f"Time taken for classification: {t1-t0:.2f} seconds")

patterns/pattern-3/src/classification_function/index.py

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,12 +72,36 @@ def handler(event, context):
7272
# Classify the document - the service will update the Document directly
7373
document = service.classify_document(document)
7474

75-
# Check if document processing failed
76-
if document.status == Status.FAILED:
75+
# Check if document processing failed or has pages that failed to classify
76+
failed_page_exceptions = None
77+
primary_exception = None
78+
79+
# Check for failed page exceptions in metadata
80+
if document.metadata and "failed_page_exceptions" in document.metadata:
81+
failed_page_exceptions = document.metadata["failed_page_exceptions"]
82+
primary_exception = document.metadata.get("primary_exception")
83+
84+
# Log details about failed pages
85+
logger.error(f"Document {document.id} has {len(failed_page_exceptions)} pages that failed to classify:")
86+
for page_id, exc_info in failed_page_exceptions.items():
87+
logger.error(f" Page {page_id}: {exc_info['exception_type']} - {exc_info['exception_message']}")
88+
89+
# Check if document processing completely failed or has critical page failures
90+
if document.status == Status.FAILED or failed_page_exceptions:
7791
error_message = f"Classification failed for document {document.id}"
92+
if failed_page_exceptions:
93+
error_message += f" - {len(failed_page_exceptions)} pages failed to classify"
94+
7895
logger.error(error_message)
96+
# Update document status in AppSync before raising exception
7997
appsync_service.update_document(document)
80-
raise Exception(error_message)
98+
99+
# Raise the original exception type if available, otherwise raise generic exception
100+
if primary_exception:
101+
logger.error(f"Re-raising original exception: {type(primary_exception).__name__}")
102+
raise primary_exception
103+
else:
104+
raise Exception(error_message)
81105

82106
t1 = time.time()
83107
logger.info(f"Time taken for classification: {t1-t0:.2f} seconds")

0 commit comments

Comments
 (0)