Skip to content

Commit cb10ebc

Browse files
author
Daniel Lorch
committed
chore: update input parameter for document_text + fixes
1 parent 88ee14f commit cb10ebc

File tree

2 files changed

+12
-14
lines changed

2 files changed

+12
-14
lines changed

notebooks/examples/dynamic-few-shot-lambda/GENAIIDP-dynamic-few-shot.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@
2121
from idp_common import bedrock, s3
2222

2323
logger = logging.getLogger(__name__)
24-
logger.setLevel(logging.INFO)
24+
level = logging.getLevelName(os.environ.get("LOG_LEVEL", "INFO"))
25+
logger.setLevel(level)
2526

2627
# Parse environment variables with error handling
2728
try:
@@ -45,7 +46,7 @@ def lambda_handler(event, context):
4546
Input event:
4647
{
4748
"class_label": "<class_label>",
48-
"document_texts": ["<document_text_1>", "<document_text_2>", ...],
49+
"document_text": "<document_text>",
4950
"image_content": ["<base64_image_content_1>", "<base64_image_content_2>", ...]
5051
}
5152
@@ -66,13 +67,13 @@ def lambda_handler(event, context):
6667

6768
# Validate input
6869
class_label = event.get("class_label")
69-
document_texts = event.get("document_texts", [])
70+
document_text = event.get("document_text")
7071
image_content = event.get("image_content", [])
7172

7273
logger.info(f"=== INPUT VALUES ===")
7374
logger.info(f"Class label: {class_label if class_label else 'Not specified'}")
74-
logger.info(f"Document texts: {len(document_texts)}")
75-
logger.info(f"Image content: {len(image_content)}")
75+
logger.info(f"Document text: {len(document_text) if document_text else "0"} bytes")
76+
logger.info(f"Image content: {len(image_content)} images")
7677

7778
# Decode input data
7879
image_data = _decode_images(image_content)
@@ -114,17 +115,17 @@ def _encode_images(image_content):
114115

115116
def _s3vectors_find_similar_items(image_data):
116117
"""Find similar items for input"""
117-
118118
# find similar items based on image similarity only
119119
similar_items = {}
120120
for page_image in image_data:
121-
result = _s3vectors_find_similar_items_from_image(image_data)
121+
result = _s3vectors_find_similar_items_from_image(page_image)
122122
_merge_examples(similar_items, result)
123123

124124
# create result set
125125
result = []
126126
for key, example in similar_items.items():
127127
metadata = example.get("metadata", {})
128+
distance = example.get("distance")
128129
attributes_prompt = metadata.get("attributesPrompt")
129130

130131
# Only process this example if it has a non-empty attributesPrompt
@@ -134,7 +135,7 @@ def _s3vectors_find_similar_items(image_data):
134135
)
135136
continue
136137

137-
attributes = _extract_metadata(metadata)
138+
attributes = _extract_metadata(metadata, distance)
138139
result.append(attributes)
139140

140141
return result
@@ -169,8 +170,8 @@ def _merge_examples(examples, new_examples):
169170
new_distance = new_example.get("distance", 1.0)
170171

171172
# update example
172-
if combined_examples.get(key):
173-
existing_distance = combined_examples[key].get("distance", 1.0)
173+
if examples.get(key):
174+
existing_distance = examples[key].get("distance", 1.0)
174175
examples[key]["distance"] = min(new_distance, existing_distance)
175176
examples[key]["metadata"] = new_example.get("metadata")
176177
# insert example

notebooks/examples/dynamic-few-shot-lambda/README.md

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -103,10 +103,7 @@ extraction:
103103
```json
104104
{
105105
"class_label": "invoice",
106-
"document_texts": [
107-
"Invoice text or markdown from page 1...",
108-
"Invoice text or markdown from page 2..."
109-
],
106+
"document_text": "Text or markdown from section 1 (pages 1-3)...",
110107
"image_content": [
111108
"base64_encoded_image_1",
112109
"base64_encoded_image_2"

0 commit comments

Comments
 (0)