Skip to content

Commit 8a76072

Browse files
author
Bob Strahan
committed
Add {DOCUMENT_IMAGE} placeholder to classification and extraction prompt, and update relevant config_library configurations
1 parent 0d5437f commit 8a76072

File tree

1 file changed

+240
-52
lines changed
  • lib/idp_common_pkg/idp_common/extraction

1 file changed

+240
-52
lines changed

lib/idp_common_pkg/idp_common/extraction/service.py

Lines changed: 240 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -104,47 +104,83 @@ def _prepare_prompt_from_template(
104104

105105
return format_prompt(prompt_template, substitutions, required_placeholders)
106106

107-
def _build_content_with_few_shot_examples(
107+
def _build_content_with_or_without_image_placeholder(
108108
self,
109-
task_prompt_template: str,
109+
prompt_template: str,
110110
document_text: str,
111111
class_label: str,
112112
attribute_descriptions: str,
113+
image_content: Any = None,
113114
) -> List[Dict[str, Any]]:
114115
"""
115-
Build content array with few-shot examples inserted at the FEW_SHOT_EXAMPLES placeholder.
116+
Build content array, automatically deciding whether to use image placeholder processing.
116117
117118
Args:
118-
task_prompt_template: The task prompt template containing {FEW_SHOT_EXAMPLES}
119+
prompt_template: The prompt template that may contain {DOCUMENT_IMAGE}
119120
document_text: The document text content
120121
class_label: The document class label
121122
attribute_descriptions: Formatted attribute names and descriptions
123+
image_content: Optional image content to insert
122124
123125
Returns:
124126
List of content items with text and image content properly ordered
125127
"""
126-
# Split the task prompt at the FEW_SHOT_EXAMPLES placeholder
127-
parts = task_prompt_template.split("{FEW_SHOT_EXAMPLES}")
128+
if "{DOCUMENT_IMAGE}" in prompt_template:
129+
return self._build_content_with_image_placeholder(
130+
prompt_template,
131+
document_text,
132+
class_label,
133+
attribute_descriptions,
134+
image_content,
135+
)
136+
else:
137+
return self._build_content_without_image_placeholder(
138+
prompt_template,
139+
document_text,
140+
class_label,
141+
attribute_descriptions,
142+
image_content,
143+
)
144+
145+
def _build_content_with_image_placeholder(
146+
self,
147+
prompt_template: str,
148+
document_text: str,
149+
class_label: str,
150+
attribute_descriptions: str,
151+
image_content: Any = None,
152+
) -> List[Dict[str, Any]]:
153+
"""
154+
Build content array with image inserted at DOCUMENT_IMAGE placeholder if present.
155+
156+
Args:
157+
prompt_template: The prompt template that may contain {DOCUMENT_IMAGE}
158+
document_text: The document text content
159+
class_label: The document class label
160+
attribute_descriptions: Formatted attribute names and descriptions
161+
image_content: Optional image content to insert
162+
163+
Returns:
164+
List of content items with text and image content properly ordered
165+
"""
166+
# Split the prompt at the DOCUMENT_IMAGE placeholder
167+
parts = prompt_template.split("{DOCUMENT_IMAGE}")
128168

129169
if len(parts) != 2:
130-
# Fallback to regular prompt processing if placeholder not found or malformed
131-
task_prompt = self._prepare_prompt_from_template(
132-
task_prompt_template,
133-
{
134-
"DOCUMENT_TEXT": document_text,
135-
"DOCUMENT_CLASS": class_label,
136-
"ATTRIBUTE_NAMES_AND_DESCRIPTIONS": attribute_descriptions,
137-
},
138-
required_placeholders=[
139-
"DOCUMENT_TEXT",
140-
"DOCUMENT_CLASS",
141-
"ATTRIBUTE_NAMES_AND_DESCRIPTIONS",
142-
],
170+
logger.warning(
171+
"Invalid DOCUMENT_IMAGE placeholder usage, falling back to standard processing"
172+
)
173+
# Fallback to standard processing
174+
return self._build_content_without_image_placeholder(
175+
prompt_template,
176+
document_text,
177+
class_label,
178+
attribute_descriptions,
179+
image_content,
143180
)
144-
return [{"text": task_prompt}]
145181

146-
# Replace other placeholders in the prompt parts
147-
before_examples = self._prepare_prompt_from_template(
182+
# Process the parts before and after the image placeholder
183+
before_image = self._prepare_prompt_from_template(
148184
parts[0],
149185
{
150186
"DOCUMENT_TEXT": document_text,
@@ -154,7 +190,7 @@ def _build_content_with_few_shot_examples(
154190
required_placeholders=[], # Don't enforce required placeholders for partial templates
155191
)
156192

157-
after_examples = self._prepare_prompt_from_template(
193+
after_image = self._prepare_prompt_from_template(
158194
parts[1],
159195
{
160196
"DOCUMENT_TEXT": document_text,
@@ -164,20 +200,169 @@ def _build_content_with_few_shot_examples(
164200
required_placeholders=[], # Don't enforce required placeholders for partial templates
165201
)
166202

203+
# Build content array with image in the middle
204+
content = []
205+
206+
# Add the part before the image
207+
if before_image.strip():
208+
content.append({"text": before_image})
209+
210+
# Add the image if available
211+
if image_content:
212+
if isinstance(image_content, list):
213+
# Multiple images (limit to 20 as per Bedrock constraints)
214+
if len(image_content) > 20:
215+
logger.warning(
216+
f"Found {len(image_content)} images, truncating to 20 due to Bedrock constraints. "
217+
f"{len(image_content) - 20} images will be dropped."
218+
)
219+
for img in image_content[:20]:
220+
content.append(image.prepare_bedrock_image_attachment(img))
221+
else:
222+
# Single image
223+
content.append(image.prepare_bedrock_image_attachment(image_content))
224+
225+
# Add the part after the image
226+
if after_image.strip():
227+
content.append({"text": after_image})
228+
229+
return content
230+
231+
def _build_content_without_image_placeholder(
232+
self,
233+
prompt_template: str,
234+
document_text: str,
235+
class_label: str,
236+
attribute_descriptions: str,
237+
image_content: Any = None,
238+
) -> List[Dict[str, Any]]:
239+
"""
240+
Build content array without DOCUMENT_IMAGE placeholder (standard processing).
241+
242+
Args:
243+
prompt_template: The prompt template
244+
document_text: The document text content
245+
class_label: The document class label
246+
attribute_descriptions: Formatted attribute names and descriptions
247+
image_content: Optional image content to append at the end
248+
249+
Returns:
250+
List of content items with text and image content
251+
"""
252+
# Prepare the full prompt
253+
task_prompt = self._prepare_prompt_from_template(
254+
prompt_template,
255+
{
256+
"DOCUMENT_TEXT": document_text,
257+
"DOCUMENT_CLASS": class_label,
258+
"ATTRIBUTE_NAMES_AND_DESCRIPTIONS": attribute_descriptions,
259+
},
260+
required_placeholders=[
261+
"DOCUMENT_TEXT",
262+
"DOCUMENT_CLASS",
263+
"ATTRIBUTE_NAMES_AND_DESCRIPTIONS",
264+
],
265+
)
266+
267+
content = [{"text": task_prompt}]
268+
269+
# Add image at the end if available
270+
if image_content:
271+
if isinstance(image_content, list):
272+
# Multiple images (limit to 20 as per Bedrock constraints)
273+
if len(image_content) > 20:
274+
logger.warning(
275+
f"Found {len(image_content)} images, truncating to 20 due to Bedrock constraints. "
276+
f"{len(image_content) - 20} images will be dropped."
277+
)
278+
for img in image_content[:20]:
279+
content.append(image.prepare_bedrock_image_attachment(img))
280+
else:
281+
# Single image
282+
content.append(image.prepare_bedrock_image_attachment(image_content))
283+
284+
return content
285+
286+
def _build_content_with_few_shot_examples(
287+
self,
288+
task_prompt_template: str,
289+
document_text: str,
290+
class_label: str,
291+
attribute_descriptions: str,
292+
image_content: Any = None,
293+
) -> List[Dict[str, Any]]:
294+
"""
295+
Build content array with few-shot examples inserted at the FEW_SHOT_EXAMPLES placeholder.
296+
Also supports DOCUMENT_IMAGE placeholder for image positioning.
297+
298+
Args:
299+
task_prompt_template: The task prompt template containing {FEW_SHOT_EXAMPLES}
300+
document_text: The document text content
301+
class_label: The document class label
302+
attribute_descriptions: Formatted attribute names and descriptions
303+
image_content: Optional image content to insert
304+
305+
Returns:
306+
List of content items with text and image content properly ordered
307+
"""
308+
# Split the task prompt at the FEW_SHOT_EXAMPLES placeholder
309+
parts = task_prompt_template.split("{FEW_SHOT_EXAMPLES}")
310+
311+
if len(parts) != 2:
312+
# Fallback to regular prompt processing if placeholder not found or malformed
313+
return self._build_content_with_or_without_image_placeholder(
314+
task_prompt_template,
315+
document_text,
316+
class_label,
317+
attribute_descriptions,
318+
image_content,
319+
)
320+
321+
# Process each part using the unified function
322+
before_examples_content = self._build_content_with_or_without_image_placeholder(
323+
parts[0], document_text, class_label, attribute_descriptions, image_content
324+
)
325+
326+
# Only pass image_content if it wasn't already used in the first part
327+
image_for_second_part = (
328+
None if "{DOCUMENT_IMAGE}" in parts[0] else image_content
329+
)
330+
after_examples_content = self._build_content_with_or_without_image_placeholder(
331+
parts[1],
332+
document_text,
333+
class_label,
334+
attribute_descriptions,
335+
image_for_second_part,
336+
)
337+
167338
# Build content array
168339
content = []
169340

170-
# Add the part before examples
171-
if before_examples.strip():
172-
content.append({"text": before_examples})
341+
# Add the part before examples (may include image if DOCUMENT_IMAGE was in the first part)
342+
content.extend(before_examples_content)
173343

174344
# Add few-shot examples from config for this specific class
175345
examples_content = self._build_few_shot_examples_content(class_label)
176346
content.extend(examples_content)
177347

178-
# Add the part after examples
179-
if after_examples.strip():
180-
content.append({"text": after_examples})
348+
# Add the part after examples (may include image if DOCUMENT_IMAGE was in the second part)
349+
content.extend(after_examples_content)
350+
351+
# If no DOCUMENT_IMAGE placeholder was found in either part and we have image content,
352+
# append it at the end (fallback behavior)
353+
if image_content and "{DOCUMENT_IMAGE}" not in task_prompt_template:
354+
if isinstance(image_content, list):
355+
# Multiple images (limit to 20 as per Bedrock constraints)
356+
if len(image_content) > 20:
357+
logger.warning(
358+
f"Found {len(image_content)} images, truncating to 20 due to Bedrock constraints. "
359+
f"{len(image_content) - 20} images will be dropped."
360+
)
361+
for img in image_content[:20]:
362+
content.append(image.prepare_bedrock_image_attachment(img))
363+
else:
364+
# Single image
365+
content.append(image.prepare_bedrock_image_attachment(image_content))
181366

182367
return content
183368

@@ -454,6 +639,15 @@ def process_document_section(self, document: Document, section_id: str) -> Docum
454639
Respond with a JSON object containing each field name and its extracted value.
455640
"""
456641
content = [{"text": task_prompt}]
642+
643+
# Add image attachments to the content (limit to 20 images as per Bedrock constraints)
644+
if page_images:
645+
logger.info(
646+
f"Attaching images to prompt, for {len(page_images)} pages."
647+
)
648+
# Limit to 20 images as per Bedrock constraints
649+
for img in page_images[:20]:
650+
content.append(image.prepare_bedrock_image_attachment(img))
457651
else:
458652
# Check if task prompt contains FEW_SHOT_EXAMPLES placeholder
459653
if "{FEW_SHOT_EXAMPLES}" in prompt_template:
@@ -462,26 +656,18 @@ def process_document_section(self, document: Document, section_id: str) -> Docum
462656
document_text,
463657
class_label,
464658
attribute_descriptions,
659+
page_images, # Pass images to the content builder
465660
)
466661
else:
467-
# Use the common format_prompt function from bedrock
468-
from idp_common.bedrock import format_prompt
469-
662+
# Use the unified content builder for DOCUMENT_IMAGE placeholder support
470663
try:
471-
task_prompt = format_prompt(
664+
content = self._build_content_with_or_without_image_placeholder(
472665
prompt_template,
473-
{
474-
"DOCUMENT_TEXT": document_text,
475-
"DOCUMENT_CLASS": class_label,
476-
"ATTRIBUTE_NAMES_AND_DESCRIPTIONS": attribute_descriptions,
477-
},
478-
required_placeholders=[
479-
"DOCUMENT_TEXT",
480-
"DOCUMENT_CLASS",
481-
"ATTRIBUTE_NAMES_AND_DESCRIPTIONS",
482-
],
666+
document_text,
667+
class_label,
668+
attribute_descriptions,
669+
page_images, # Pass images to the content builder
483670
)
484-
content = [{"text": task_prompt}]
485671
except ValueError as e:
486672
logger.warning(
487673
f"Error formatting prompt template: {str(e)}. Using default prompt."
@@ -499,14 +685,16 @@ def process_document_section(self, document: Document, section_id: str) -> Docum
499685
"""
500686
content = [{"text": task_prompt}]
501687

502-
# Add image attachments to the content (limit to 20 images as per Bedrock constraints)
503-
if page_images:
504-
logger.info(
505-
f"Attaching images to prompt, for {len(page_images)} pages."
506-
)
507-
# Limit to 20 images as per Bedrock constraints
508-
for img in page_images[:20]:
509-
content.append(image.prepare_bedrock_image_attachment(img))
688+
# Add image attachments for fallback case
689+
if page_images:
690+
logger.info(
691+
f"Attaching images to prompt, for {len(page_images)} pages."
692+
)
693+
# Limit to 20 images as per Bedrock constraints
694+
for img in page_images[:20]:
695+
content.append(
696+
image.prepare_bedrock_image_attachment(img)
697+
)
510698

511699
logger.info(
512700
f"Extracting fields for {class_label} document, section {section_id}"

0 commit comments

Comments
 (0)