@@ -104,47 +104,83 @@ def _prepare_prompt_from_template(
104104
105105 return format_prompt (prompt_template , substitutions , required_placeholders )
106106
107- def _build_content_with_few_shot_examples (
107+ def _build_content_with_or_without_image_placeholder (
108108 self ,
109- task_prompt_template : str ,
109+ prompt_template : str ,
110110 document_text : str ,
111111 class_label : str ,
112112 attribute_descriptions : str ,
113+ image_content : Any = None ,
113114 ) -> List [Dict [str , Any ]]:
114115 """
115- Build content array with few-shot examples inserted at the FEW_SHOT_EXAMPLES placeholder.
116+ Build content array, automatically deciding whether to use image placeholder processing .
116117
117118 Args:
118- task_prompt_template : The task prompt template containing {FEW_SHOT_EXAMPLES }
119+ prompt_template : The prompt template that may contain {DOCUMENT_IMAGE }
119120 document_text: The document text content
120121 class_label: The document class label
121122 attribute_descriptions: Formatted attribute names and descriptions
123+ image_content: Optional image content to insert
122124
123125 Returns:
124126 List of content items with text and image content properly ordered
125127 """
126- # Split the task prompt at the FEW_SHOT_EXAMPLES placeholder
127- parts = task_prompt_template .split ("{FEW_SHOT_EXAMPLES}" )
128+ if "{DOCUMENT_IMAGE}" in prompt_template :
129+ return self ._build_content_with_image_placeholder (
130+ prompt_template ,
131+ document_text ,
132+ class_label ,
133+ attribute_descriptions ,
134+ image_content ,
135+ )
136+ else :
137+ return self ._build_content_without_image_placeholder (
138+ prompt_template ,
139+ document_text ,
140+ class_label ,
141+ attribute_descriptions ,
142+ image_content ,
143+ )
144+
145+ def _build_content_with_image_placeholder (
146+ self ,
147+ prompt_template : str ,
148+ document_text : str ,
149+ class_label : str ,
150+ attribute_descriptions : str ,
151+ image_content : Any = None ,
152+ ) -> List [Dict [str , Any ]]:
153+ """
154+ Build content array with image inserted at DOCUMENT_IMAGE placeholder if present.
155+
156+ Args:
157+ prompt_template: The prompt template that may contain {DOCUMENT_IMAGE}
158+ document_text: The document text content
159+ class_label: The document class label
160+ attribute_descriptions: Formatted attribute names and descriptions
161+ image_content: Optional image content to insert
162+
163+ Returns:
164+ List of content items with text and image content properly ordered
165+ """
166+ # Split the prompt at the DOCUMENT_IMAGE placeholder
167+ parts = prompt_template .split ("{DOCUMENT_IMAGE}" )
128168
129169 if len (parts ) != 2 :
130- # Fallback to regular prompt processing if placeholder not found or malformed
131- task_prompt = self ._prepare_prompt_from_template (
132- task_prompt_template ,
133- {
134- "DOCUMENT_TEXT" : document_text ,
135- "DOCUMENT_CLASS" : class_label ,
136- "ATTRIBUTE_NAMES_AND_DESCRIPTIONS" : attribute_descriptions ,
137- },
138- required_placeholders = [
139- "DOCUMENT_TEXT" ,
140- "DOCUMENT_CLASS" ,
141- "ATTRIBUTE_NAMES_AND_DESCRIPTIONS" ,
142- ],
170+ logger .warning (
171+ "Invalid DOCUMENT_IMAGE placeholder usage, falling back to standard processing"
172+ )
173+ # Fallback to standard processing
174+ return self ._build_content_without_image_placeholder (
175+ prompt_template ,
176+ document_text ,
177+ class_label ,
178+ attribute_descriptions ,
179+ image_content ,
143180 )
144- return [{"text" : task_prompt }]
145181
146- # Replace other placeholders in the prompt parts
147- before_examples = self ._prepare_prompt_from_template (
182+ # Process the parts before and after the image placeholder
183+ before_image = self ._prepare_prompt_from_template (
148184 parts [0 ],
149185 {
150186 "DOCUMENT_TEXT" : document_text ,
@@ -154,7 +190,7 @@ def _build_content_with_few_shot_examples(
154190 required_placeholders = [], # Don't enforce required placeholders for partial templates
155191 )
156192
157- after_examples = self ._prepare_prompt_from_template (
193+ after_image = self ._prepare_prompt_from_template (
158194 parts [1 ],
159195 {
160196 "DOCUMENT_TEXT" : document_text ,
@@ -164,20 +200,169 @@ def _build_content_with_few_shot_examples(
164200 required_placeholders = [], # Don't enforce required placeholders for partial templates
165201 )
166202
203+ # Build content array with image in the middle
204+ content = []
205+
206+ # Add the part before the image
207+ if before_image .strip ():
208+ content .append ({"text" : before_image })
209+
210+ # Add the image if available
211+ if image_content :
212+ if isinstance (image_content , list ):
213+ # Multiple images (limit to 20 as per Bedrock constraints)
214+ if len (image_content ) > 20 :
215+ logger .warning (
216+ f"Found { len (image_content )} images, truncating to 20 due to Bedrock constraints. "
217+ f"{ len (image_content ) - 20 } images will be dropped."
218+ )
219+ for img in image_content [:20 ]:
220+ content .append (image .prepare_bedrock_image_attachment (img ))
221+ else :
222+ # Single image
223+ content .append (image .prepare_bedrock_image_attachment (image_content ))
224+
225+ # Add the part after the image
226+ if after_image .strip ():
227+ content .append ({"text" : after_image })
228+
229+ return content
230+
231+ def _build_content_without_image_placeholder (
232+ self ,
233+ prompt_template : str ,
234+ document_text : str ,
235+ class_label : str ,
236+ attribute_descriptions : str ,
237+ image_content : Any = None ,
238+ ) -> List [Dict [str , Any ]]:
239+ """
240+ Build content array without DOCUMENT_IMAGE placeholder (standard processing).
241+
242+ Args:
243+ prompt_template: The prompt template
244+ document_text: The document text content
245+ class_label: The document class label
246+ attribute_descriptions: Formatted attribute names and descriptions
247+ image_content: Optional image content to append at the end
248+
249+ Returns:
250+ List of content items with text and image content
251+ """
252+ # Prepare the full prompt
253+ task_prompt = self ._prepare_prompt_from_template (
254+ prompt_template ,
255+ {
256+ "DOCUMENT_TEXT" : document_text ,
257+ "DOCUMENT_CLASS" : class_label ,
258+ "ATTRIBUTE_NAMES_AND_DESCRIPTIONS" : attribute_descriptions ,
259+ },
260+ required_placeholders = [
261+ "DOCUMENT_TEXT" ,
262+ "DOCUMENT_CLASS" ,
263+ "ATTRIBUTE_NAMES_AND_DESCRIPTIONS" ,
264+ ],
265+ )
266+
267+ content = [{"text" : task_prompt }]
268+
269+ # Add image at the end if available
270+ if image_content :
271+ if isinstance (image_content , list ):
272+ # Multiple images (limit to 20 as per Bedrock constraints)
273+ if len (image_content ) > 20 :
274+ logger .warning (
275+ f"Found { len (image_content )} images, truncating to 20 due to Bedrock constraints. "
276+ f"{ len (image_content ) - 20 } images will be dropped."
277+ )
278+ for img in image_content [:20 ]:
279+ content .append (image .prepare_bedrock_image_attachment (img ))
280+ else :
281+ # Single image
282+ content .append (image .prepare_bedrock_image_attachment (image_content ))
283+
284+ return content
285+
286+ def _build_content_with_few_shot_examples (
287+ self ,
288+ task_prompt_template : str ,
289+ document_text : str ,
290+ class_label : str ,
291+ attribute_descriptions : str ,
292+ image_content : Any = None ,
293+ ) -> List [Dict [str , Any ]]:
294+ """
295+ Build content array with few-shot examples inserted at the FEW_SHOT_EXAMPLES placeholder.
296+ Also supports DOCUMENT_IMAGE placeholder for image positioning.
297+
298+ Args:
299+ task_prompt_template: The task prompt template containing {FEW_SHOT_EXAMPLES}
300+ document_text: The document text content
301+ class_label: The document class label
302+ attribute_descriptions: Formatted attribute names and descriptions
303+ image_content: Optional image content to insert
304+
305+ Returns:
306+ List of content items with text and image content properly ordered
307+ """
308+ # Split the task prompt at the FEW_SHOT_EXAMPLES placeholder
309+ parts = task_prompt_template .split ("{FEW_SHOT_EXAMPLES}" )
310+
311+ if len (parts ) != 2 :
312+ # Fallback to regular prompt processing if placeholder not found or malformed
313+ return self ._build_content_with_or_without_image_placeholder (
314+ task_prompt_template ,
315+ document_text ,
316+ class_label ,
317+ attribute_descriptions ,
318+ image_content ,
319+ )
320+
321+ # Process each part using the unified function
322+ before_examples_content = self ._build_content_with_or_without_image_placeholder (
323+ parts [0 ], document_text , class_label , attribute_descriptions , image_content
324+ )
325+
326+ # Only pass image_content if it wasn't already used in the first part
327+ image_for_second_part = (
328+ None if "{DOCUMENT_IMAGE}" in parts [0 ] else image_content
329+ )
330+ after_examples_content = self ._build_content_with_or_without_image_placeholder (
331+ parts [1 ],
332+ document_text ,
333+ class_label ,
334+ attribute_descriptions ,
335+ image_for_second_part ,
336+ )
337+
167338 # Build content array
168339 content = []
169340
170- # Add the part before examples
171- if before_examples .strip ():
172- content .append ({"text" : before_examples })
341+ # Add the part before examples (may include image if DOCUMENT_IMAGE was in the first part)
342+ content .extend (before_examples_content )
173343
174344 # Add few-shot examples from config for this specific class
175345 examples_content = self ._build_few_shot_examples_content (class_label )
176346 content .extend (examples_content )
177347
178- # Add the part after examples
179- if after_examples .strip ():
180- content .append ({"text" : after_examples })
348+ # Add the part after examples (may include image if DOCUMENT_IMAGE was in the second part)
349+ content .extend (after_examples_content )
350+
351+ # If no DOCUMENT_IMAGE placeholder was found in either part and we have image content,
352+ # append it at the end (fallback behavior)
353+ if image_content and "{DOCUMENT_IMAGE}" not in task_prompt_template :
354+ if isinstance (image_content , list ):
355+ # Multiple images (limit to 20 as per Bedrock constraints)
356+ if len (image_content ) > 20 :
357+ logger .warning (
358+ f"Found { len (image_content )} images, truncating to 20 due to Bedrock constraints. "
359+ f"{ len (image_content ) - 20 } images will be dropped."
360+ )
361+ for img in image_content [:20 ]:
362+ content .append (image .prepare_bedrock_image_attachment (img ))
363+ else :
364+ # Single image
365+ content .append (image .prepare_bedrock_image_attachment (image_content ))
181366
182367 return content
183368
@@ -454,6 +639,15 @@ def process_document_section(self, document: Document, section_id: str) -> Docum
454639 Respond with a JSON object containing each field name and its extracted value.
455640 """
456641 content = [{"text" : task_prompt }]
642+
643+ # Add image attachments to the content (limit to 20 images as per Bedrock constraints)
644+ if page_images :
645+ logger .info (
646+ f"Attaching images to prompt, for { len (page_images )} pages."
647+ )
648+ # Limit to 20 images as per Bedrock constraints
649+ for img in page_images [:20 ]:
650+ content .append (image .prepare_bedrock_image_attachment (img ))
457651 else :
458652 # Check if task prompt contains FEW_SHOT_EXAMPLES placeholder
459653 if "{FEW_SHOT_EXAMPLES}" in prompt_template :
@@ -462,26 +656,18 @@ def process_document_section(self, document: Document, section_id: str) -> Docum
462656 document_text ,
463657 class_label ,
464658 attribute_descriptions ,
659+ page_images , # Pass images to the content builder
465660 )
466661 else :
467- # Use the common format_prompt function from bedrock
468- from idp_common .bedrock import format_prompt
469-
662+ # Use the unified content builder for DOCUMENT_IMAGE placeholder support
470663 try :
471- task_prompt = format_prompt (
664+ content = self . _build_content_with_or_without_image_placeholder (
472665 prompt_template ,
473- {
474- "DOCUMENT_TEXT" : document_text ,
475- "DOCUMENT_CLASS" : class_label ,
476- "ATTRIBUTE_NAMES_AND_DESCRIPTIONS" : attribute_descriptions ,
477- },
478- required_placeholders = [
479- "DOCUMENT_TEXT" ,
480- "DOCUMENT_CLASS" ,
481- "ATTRIBUTE_NAMES_AND_DESCRIPTIONS" ,
482- ],
666+ document_text ,
667+ class_label ,
668+ attribute_descriptions ,
669+ page_images , # Pass images to the content builder
483670 )
484- content = [{"text" : task_prompt }]
485671 except ValueError as e :
486672 logger .warning (
487673 f"Error formatting prompt template: { str (e )} . Using default prompt."
@@ -499,14 +685,16 @@ def process_document_section(self, document: Document, section_id: str) -> Docum
499685 """
500686 content = [{"text" : task_prompt }]
501687
502- # Add image attachments to the content (limit to 20 images as per Bedrock constraints)
503- if page_images :
504- logger .info (
505- f"Attaching images to prompt, for { len (page_images )} pages."
506- )
507- # Limit to 20 images as per Bedrock constraints
508- for img in page_images [:20 ]:
509- content .append (image .prepare_bedrock_image_attachment (img ))
688+ # Add image attachments for fallback case
689+ if page_images :
690+ logger .info (
691+ f"Attaching images to prompt, for { len (page_images )} pages."
692+ )
693+ # Limit to 20 images as per Bedrock constraints
694+ for img in page_images [:20 ]:
695+ content .append (
696+ image .prepare_bedrock_image_attachment (img )
697+ )
510698
511699 logger .info (
512700 f"Extracting fields for { class_label } document, section { section_id } "
0 commit comments