Skip to content

Commit 0d5437f

Browse files
author
Bob Strahan
committed
Add {DOCUMENT_IMAGE} placeholder to classification prompt for multi-modal page level classification method.
1 parent 2212d2e commit 0d5437f

File tree

1 file changed

+177
-50
lines changed
  • lib/idp_common_pkg/idp_common/classification

1 file changed

+177
-50
lines changed

lib/idp_common_pkg/idp_common/classification/service.py

Lines changed: 177 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
import boto3
2121
from botocore.exceptions import ClientError
2222

23-
from idp_common import bedrock, s3, utils
23+
from idp_common import bedrock, image, s3, utils
2424
from idp_common.classification.models import (
2525
ClassificationResult,
2626
DocumentClassification,
@@ -215,71 +215,213 @@ def _prepare_prompt_from_template(
215215

216216
return format_prompt(prompt_template, substitutions, required_placeholders)
217217

218-
def _build_content_with_few_shot_examples(
218+
def _build_content_with_or_without_image_placeholder(
219219
self,
220-
task_prompt_template: str,
220+
prompt_template: str,
221221
document_text: str,
222222
class_names_and_descriptions: str,
223+
image_content: Optional[bytes] = None,
223224
) -> List[Dict[str, Any]]:
224225
"""
225-
Build content array with few-shot examples inserted at the FEW_SHOT_EXAMPLES placeholder.
226+
Build content array, automatically deciding whether to use image placeholder processing.
226227
227228
Args:
228-
task_prompt_template: The task prompt template containing {FEW_SHOT_EXAMPLES}
229+
prompt_template: The prompt template that may contain {DOCUMENT_IMAGE}
229230
document_text: The document text content
230231
class_names_and_descriptions: Formatted class names and descriptions
232+
image_content: Optional image content to insert
231233
232234
Returns:
233235
List of content items with text and image content properly ordered
234236
"""
235-
# Split the task prompt at the FEW_SHOT_EXAMPLES placeholder
236-
parts = task_prompt_template.split("{FEW_SHOT_EXAMPLES}")
237+
if "{DOCUMENT_IMAGE}" in prompt_template:
238+
return self._build_content_with_image_placeholder(
239+
prompt_template,
240+
document_text,
241+
class_names_and_descriptions,
242+
image_content,
243+
)
244+
else:
245+
return self._build_content_without_image_placeholder(
246+
prompt_template,
247+
document_text,
248+
class_names_and_descriptions,
249+
image_content,
250+
)
237251

238-
if len(parts) != 2:
239-
# Fallback to regular prompt processing if placeholder not found or malformed
240-
task_prompt = self._prepare_prompt_from_template(
241-
task_prompt_template,
252+
def _build_content_with_image_placeholder(
253+
self,
254+
prompt_template: str,
255+
document_text: str,
256+
class_names_and_descriptions: str,
257+
image_content: Optional[bytes] = None,
258+
) -> List[Dict[str, Any]]:
259+
"""
260+
Build content array with image inserted at DOCUMENT_IMAGE placeholder if present.
261+
262+
Args:
263+
prompt_template: The prompt template that may contain {DOCUMENT_IMAGE}
264+
document_text: The document text content
265+
class_names_and_descriptions: Formatted class names and descriptions
266+
image_content: Optional image content to insert
267+
268+
Returns:
269+
List of content items with text and image content properly ordered
270+
"""
271+
# Check if DOCUMENT_IMAGE placeholder is present
272+
if "{DOCUMENT_IMAGE}" in prompt_template:
273+
# Split the prompt at the DOCUMENT_IMAGE placeholder
274+
parts = prompt_template.split("{DOCUMENT_IMAGE}")
275+
276+
if len(parts) != 2:
277+
logger.warning(
278+
"Invalid DOCUMENT_IMAGE placeholder usage, falling back to standard processing"
279+
)
280+
# Fallback to standard processing
281+
return self._build_content_without_image_placeholder(
282+
prompt_template,
283+
document_text,
284+
class_names_and_descriptions,
285+
image_content,
286+
)
287+
288+
# Process the parts before and after the image placeholder
289+
before_image = self._prepare_prompt_from_template(
290+
parts[0],
242291
{
243292
"DOCUMENT_TEXT": document_text,
244293
"CLASS_NAMES_AND_DESCRIPTIONS": class_names_and_descriptions,
245294
},
246-
required_placeholders=["DOCUMENT_TEXT", "CLASS_NAMES_AND_DESCRIPTIONS"],
295+
required_placeholders=[], # Don't enforce required placeholders for partial templates
296+
)
297+
298+
after_image = self._prepare_prompt_from_template(
299+
parts[1],
300+
{
301+
"DOCUMENT_TEXT": document_text,
302+
"CLASS_NAMES_AND_DESCRIPTIONS": class_names_and_descriptions,
303+
},
304+
required_placeholders=[], # Don't enforce required placeholders for partial templates
305+
)
306+
307+
# Build content array with image in the middle
308+
content = []
309+
310+
# Add the part before the image
311+
if before_image.strip():
312+
content.append({"text": before_image})
313+
314+
# Add the image if available
315+
if image_content:
316+
content.append(image.prepare_bedrock_image_attachment(image_content))
317+
318+
# Add the part after the image
319+
if after_image.strip():
320+
content.append({"text": after_image})
321+
322+
return content
323+
else:
324+
# No DOCUMENT_IMAGE placeholder, use standard processing
325+
return self._build_content_without_image_placeholder(
326+
prompt_template,
327+
document_text,
328+
class_names_and_descriptions,
329+
image_content,
247330
)
248-
return [{"text": task_prompt}]
249331

250-
# Replace other placeholders in the prompt parts
251-
before_examples = self._prepare_prompt_from_template(
252-
parts[0],
332+
def _build_content_without_image_placeholder(
333+
self,
334+
prompt_template: str,
335+
document_text: str,
336+
class_names_and_descriptions: str,
337+
image_content: Optional[bytes] = None,
338+
) -> List[Dict[str, Any]]:
339+
"""
340+
Build content array without DOCUMENT_IMAGE placeholder (standard processing).
341+
342+
Args:
343+
prompt_template: The prompt template
344+
document_text: The document text content
345+
class_names_and_descriptions: Formatted class names and descriptions
346+
image_content: Optional image content to append at the end
347+
348+
Returns:
349+
List of content items with text and image content
350+
"""
351+
# Prepare the full prompt
352+
task_prompt = self._prepare_prompt_from_template(
353+
prompt_template,
253354
{
254355
"DOCUMENT_TEXT": document_text,
255356
"CLASS_NAMES_AND_DESCRIPTIONS": class_names_and_descriptions,
256357
},
257-
required_placeholders=[], # Don't enforce required placeholders for partial templates
358+
required_placeholders=["DOCUMENT_TEXT", "CLASS_NAMES_AND_DESCRIPTIONS"],
258359
)
259360

260-
after_examples = self._prepare_prompt_from_template(
261-
parts[1],
262-
{
263-
"DOCUMENT_TEXT": document_text,
264-
"CLASS_NAMES_AND_DESCRIPTIONS": class_names_and_descriptions,
265-
},
266-
required_placeholders=[], # Don't enforce required placeholders for partial templates
361+
content = [{"text": task_prompt}]
362+
363+
# Add image at the end if available
364+
if image_content:
365+
content.append(image.prepare_bedrock_image_attachment(image_content))
366+
367+
return content
368+
369+
def _build_content(
370+
self,
371+
task_prompt_template: str,
372+
document_text: str,
373+
class_names_and_descriptions: str,
374+
image_content: Optional[bytes] = None,
375+
) -> List[Dict[str, Any]]:
376+
"""
377+
Build content array with support for optional FEW_SHOT_EXAMPLES and DOCUMENT_IMAGE placeholders.
378+
379+
Args:
380+
task_prompt_template: The task prompt template that may contain placeholders
381+
document_text: The document text content
382+
class_names_and_descriptions: Formatted class names and descriptions
383+
image_content: Optional image content to insert
384+
385+
Returns:
386+
List of content items with text and image content properly ordered
387+
"""
388+
# Split the task prompt at the FEW_SHOT_EXAMPLES placeholder
389+
parts = task_prompt_template.split("{FEW_SHOT_EXAMPLES}")
390+
391+
if len(parts) != 2:
392+
# Fallback to regular prompt processing if placeholder not found or malformed
393+
return self._build_content_with_or_without_image_placeholder(
394+
task_prompt_template,
395+
document_text,
396+
class_names_and_descriptions,
397+
image_content,
398+
)
399+
400+
# Process both parts
401+
before_examples_content = self._build_content_with_or_without_image_placeholder(
402+
parts[0], document_text, class_names_and_descriptions, image_content
403+
)
404+
after_examples_content = self._build_content_with_or_without_image_placeholder(
405+
parts[1], document_text, class_names_and_descriptions, image_content
267406
)
268407

269408
# Build content array
270409
content = []
271410

272411
# Add the part before examples
273-
if before_examples.strip():
274-
content.append({"text": before_examples})
412+
content.extend(before_examples_content)
275413

276414
# Add few-shot examples from config
277415
examples_content = self._build_few_shot_examples_content()
278416
content.extend(examples_content)
279417

280418
# Add the part after examples
281-
if after_examples.strip():
282-
content.append({"text": after_examples})
419+
content.extend(after_examples_content)
420+
421+
# If no DOCUMENT_IMAGE placeholder was found in either part and we have image content,
422+
# append it at the end (fallback behavior)
423+
if image_content and "{DOCUMENT_IMAGE}" not in task_prompt_template:
424+
content.append(image.prepare_bedrock_image_attachment(image_content))
283425

284426
return content
285427

@@ -469,28 +611,13 @@ def classify_page_bedrock(
469611
# Get classification configuration
470612
config = self._get_classification_config()
471613

472-
# Check if task prompt contains FEW_SHOT_EXAMPLES placeholder
473-
if "{FEW_SHOT_EXAMPLES}" in config["task_prompt"]:
474-
content = self._build_content_with_few_shot_examples(
475-
config["task_prompt"], text_content or "", self._format_classes_list()
476-
)
477-
else:
478-
# Use common function to prepare prompt with required placeholder validation
479-
task_prompt = self._prepare_prompt_from_template(
480-
config["task_prompt"],
481-
{
482-
"DOCUMENT_TEXT": text_content or "",
483-
"CLASS_NAMES_AND_DESCRIPTIONS": self._format_classes_list(),
484-
},
485-
required_placeholders=["DOCUMENT_TEXT", "CLASS_NAMES_AND_DESCRIPTIONS"],
486-
)
487-
content = [{"text": task_prompt}]
488-
489-
# Add image if available
490-
if image_content:
491-
from idp_common import image
492-
493-
content.append(image.prepare_bedrock_image_attachment(image_content))
614+
# Build content with support for placeholders
615+
content = self._build_content(
616+
config["task_prompt"],
617+
text_content or "",
618+
self._format_classes_list(),
619+
image_content,
620+
)
494621

495622
logger.info(f"Classifying page {page_id} with Bedrock")
496623

0 commit comments

Comments
 (0)