|
20 | 20 | import boto3 |
21 | 21 | from botocore.exceptions import ClientError |
22 | 22 |
|
23 | | -from idp_common import bedrock, s3, utils |
| 23 | +from idp_common import bedrock, image, s3, utils |
24 | 24 | from idp_common.classification.models import ( |
25 | 25 | ClassificationResult, |
26 | 26 | DocumentClassification, |
@@ -215,71 +215,213 @@ def _prepare_prompt_from_template( |
215 | 215 |
|
216 | 216 | return format_prompt(prompt_template, substitutions, required_placeholders) |
217 | 217 |
|
218 | | - def _build_content_with_few_shot_examples( |
| 218 | + def _build_content_with_or_without_image_placeholder( |
219 | 219 | self, |
220 | | - task_prompt_template: str, |
| 220 | + prompt_template: str, |
221 | 221 | document_text: str, |
222 | 222 | class_names_and_descriptions: str, |
| 223 | + image_content: Optional[bytes] = None, |
223 | 224 | ) -> List[Dict[str, Any]]: |
224 | 225 | """ |
225 | | - Build content array with few-shot examples inserted at the FEW_SHOT_EXAMPLES placeholder. |
| 226 | + Build content array, automatically deciding whether to use image placeholder processing. |
226 | 227 |
|
227 | 228 | Args: |
228 | | - task_prompt_template: The task prompt template containing {FEW_SHOT_EXAMPLES} |
| 229 | + prompt_template: The prompt template that may contain {DOCUMENT_IMAGE} |
229 | 230 | document_text: The document text content |
230 | 231 | class_names_and_descriptions: Formatted class names and descriptions |
| 232 | + image_content: Optional image content to insert |
231 | 233 |
|
232 | 234 | Returns: |
233 | 235 | List of content items with text and image content properly ordered |
234 | 236 | """ |
235 | | - # Split the task prompt at the FEW_SHOT_EXAMPLES placeholder |
236 | | - parts = task_prompt_template.split("{FEW_SHOT_EXAMPLES}") |
| 237 | + if "{DOCUMENT_IMAGE}" in prompt_template: |
| 238 | + return self._build_content_with_image_placeholder( |
| 239 | + prompt_template, |
| 240 | + document_text, |
| 241 | + class_names_and_descriptions, |
| 242 | + image_content, |
| 243 | + ) |
| 244 | + else: |
| 245 | + return self._build_content_without_image_placeholder( |
| 246 | + prompt_template, |
| 247 | + document_text, |
| 248 | + class_names_and_descriptions, |
| 249 | + image_content, |
| 250 | + ) |
237 | 251 |
|
238 | | - if len(parts) != 2: |
239 | | - # Fallback to regular prompt processing if placeholder not found or malformed |
240 | | - task_prompt = self._prepare_prompt_from_template( |
241 | | - task_prompt_template, |
| 252 | + def _build_content_with_image_placeholder( |
| 253 | + self, |
| 254 | + prompt_template: str, |
| 255 | + document_text: str, |
| 256 | + class_names_and_descriptions: str, |
| 257 | + image_content: Optional[bytes] = None, |
| 258 | + ) -> List[Dict[str, Any]]: |
| 259 | + """ |
| 260 | + Build content array with image inserted at DOCUMENT_IMAGE placeholder if present. |
| 261 | +
|
| 262 | + Args: |
| 263 | + prompt_template: The prompt template that may contain {DOCUMENT_IMAGE} |
| 264 | + document_text: The document text content |
| 265 | + class_names_and_descriptions: Formatted class names and descriptions |
| 266 | + image_content: Optional image content to insert |
| 267 | +
|
| 268 | + Returns: |
| 269 | + List of content items with text and image content properly ordered |
| 270 | + """ |
| 271 | + # Check if DOCUMENT_IMAGE placeholder is present |
| 272 | + if "{DOCUMENT_IMAGE}" in prompt_template: |
| 273 | + # Split the prompt at the DOCUMENT_IMAGE placeholder |
| 274 | + parts = prompt_template.split("{DOCUMENT_IMAGE}") |
| 275 | + |
| 276 | + if len(parts) != 2: |
| 277 | + logger.warning( |
| 278 | + "Invalid DOCUMENT_IMAGE placeholder usage, falling back to standard processing" |
| 279 | + ) |
| 280 | + # Fallback to standard processing |
| 281 | + return self._build_content_without_image_placeholder( |
| 282 | + prompt_template, |
| 283 | + document_text, |
| 284 | + class_names_and_descriptions, |
| 285 | + image_content, |
| 286 | + ) |
| 287 | + |
| 288 | + # Process the parts before and after the image placeholder |
| 289 | + before_image = self._prepare_prompt_from_template( |
| 290 | + parts[0], |
242 | 291 | { |
243 | 292 | "DOCUMENT_TEXT": document_text, |
244 | 293 | "CLASS_NAMES_AND_DESCRIPTIONS": class_names_and_descriptions, |
245 | 294 | }, |
246 | | - required_placeholders=["DOCUMENT_TEXT", "CLASS_NAMES_AND_DESCRIPTIONS"], |
| 295 | + required_placeholders=[], # Don't enforce required placeholders for partial templates |
| 296 | + ) |
| 297 | + |
| 298 | + after_image = self._prepare_prompt_from_template( |
| 299 | + parts[1], |
| 300 | + { |
| 301 | + "DOCUMENT_TEXT": document_text, |
| 302 | + "CLASS_NAMES_AND_DESCRIPTIONS": class_names_and_descriptions, |
| 303 | + }, |
| 304 | + required_placeholders=[], # Don't enforce required placeholders for partial templates |
| 305 | + ) |
| 306 | + |
| 307 | + # Build content array with image in the middle |
| 308 | + content = [] |
| 309 | + |
| 310 | + # Add the part before the image |
| 311 | + if before_image.strip(): |
| 312 | + content.append({"text": before_image}) |
| 313 | + |
| 314 | + # Add the image if available |
| 315 | + if image_content: |
| 316 | + content.append(image.prepare_bedrock_image_attachment(image_content)) |
| 317 | + |
| 318 | + # Add the part after the image |
| 319 | + if after_image.strip(): |
| 320 | + content.append({"text": after_image}) |
| 321 | + |
| 322 | + return content |
| 323 | + else: |
| 324 | + # No DOCUMENT_IMAGE placeholder, use standard processing |
| 325 | + return self._build_content_without_image_placeholder( |
| 326 | + prompt_template, |
| 327 | + document_text, |
| 328 | + class_names_and_descriptions, |
| 329 | + image_content, |
247 | 330 | ) |
248 | | - return [{"text": task_prompt}] |
249 | 331 |
|
250 | | - # Replace other placeholders in the prompt parts |
251 | | - before_examples = self._prepare_prompt_from_template( |
252 | | - parts[0], |
| 332 | + def _build_content_without_image_placeholder( |
| 333 | + self, |
| 334 | + prompt_template: str, |
| 335 | + document_text: str, |
| 336 | + class_names_and_descriptions: str, |
| 337 | + image_content: Optional[bytes] = None, |
| 338 | + ) -> List[Dict[str, Any]]: |
| 339 | + """ |
| 340 | + Build content array without DOCUMENT_IMAGE placeholder (standard processing). |
| 341 | +
|
| 342 | + Args: |
| 343 | + prompt_template: The prompt template |
| 344 | + document_text: The document text content |
| 345 | + class_names_and_descriptions: Formatted class names and descriptions |
| 346 | + image_content: Optional image content to append at the end |
| 347 | +
|
| 348 | + Returns: |
| 349 | + List of content items with text and image content |
| 350 | + """ |
| 351 | + # Prepare the full prompt |
| 352 | + task_prompt = self._prepare_prompt_from_template( |
| 353 | + prompt_template, |
253 | 354 | { |
254 | 355 | "DOCUMENT_TEXT": document_text, |
255 | 356 | "CLASS_NAMES_AND_DESCRIPTIONS": class_names_and_descriptions, |
256 | 357 | }, |
257 | | - required_placeholders=[], # Don't enforce required placeholders for partial templates |
| 358 | + required_placeholders=["DOCUMENT_TEXT", "CLASS_NAMES_AND_DESCRIPTIONS"], |
258 | 359 | ) |
259 | 360 |
|
260 | | - after_examples = self._prepare_prompt_from_template( |
261 | | - parts[1], |
262 | | - { |
263 | | - "DOCUMENT_TEXT": document_text, |
264 | | - "CLASS_NAMES_AND_DESCRIPTIONS": class_names_and_descriptions, |
265 | | - }, |
266 | | - required_placeholders=[], # Don't enforce required placeholders for partial templates |
| 361 | + content = [{"text": task_prompt}] |
| 362 | + |
| 363 | + # Add image at the end if available |
| 364 | + if image_content: |
| 365 | + content.append(image.prepare_bedrock_image_attachment(image_content)) |
| 366 | + |
| 367 | + return content |
| 368 | + |
| 369 | + def _build_content( |
| 370 | + self, |
| 371 | + task_prompt_template: str, |
| 372 | + document_text: str, |
| 373 | + class_names_and_descriptions: str, |
| 374 | + image_content: Optional[bytes] = None, |
| 375 | + ) -> List[Dict[str, Any]]: |
| 376 | + """ |
| 377 | + Build content array with support for optional FEW_SHOT_EXAMPLES and DOCUMENT_IMAGE placeholders. |
| 378 | +
|
| 379 | + Args: |
| 380 | + task_prompt_template: The task prompt template that may contain placeholders |
| 381 | + document_text: The document text content |
| 382 | + class_names_and_descriptions: Formatted class names and descriptions |
| 383 | + image_content: Optional image content to insert |
| 384 | +
|
| 385 | + Returns: |
| 386 | + List of content items with text and image content properly ordered |
| 387 | + """ |
| 388 | + # Split the task prompt at the FEW_SHOT_EXAMPLES placeholder |
| 389 | + parts = task_prompt_template.split("{FEW_SHOT_EXAMPLES}") |
| 390 | + |
| 391 | + if len(parts) != 2: |
| 392 | + # Fallback to regular prompt processing if placeholder not found or malformed |
| 393 | + return self._build_content_with_or_without_image_placeholder( |
| 394 | + task_prompt_template, |
| 395 | + document_text, |
| 396 | + class_names_and_descriptions, |
| 397 | + image_content, |
| 398 | + ) |
| 399 | + |
| 400 | + # Process both parts |
| 401 | + before_examples_content = self._build_content_with_or_without_image_placeholder( |
| 402 | + parts[0], document_text, class_names_and_descriptions, image_content |
| 403 | + ) |
| 404 | + after_examples_content = self._build_content_with_or_without_image_placeholder( |
| 405 | + parts[1], document_text, class_names_and_descriptions, image_content |
267 | 406 | ) |
268 | 407 |
|
269 | 408 | # Build content array |
270 | 409 | content = [] |
271 | 410 |
|
272 | 411 | # Add the part before examples |
273 | | - if before_examples.strip(): |
274 | | - content.append({"text": before_examples}) |
| 412 | + content.extend(before_examples_content) |
275 | 413 |
|
276 | 414 | # Add few-shot examples from config |
277 | 415 | examples_content = self._build_few_shot_examples_content() |
278 | 416 | content.extend(examples_content) |
279 | 417 |
|
280 | 418 | # Add the part after examples |
281 | | - if after_examples.strip(): |
282 | | - content.append({"text": after_examples}) |
| 419 | + content.extend(after_examples_content) |
| 420 | + |
| 421 | + # If no DOCUMENT_IMAGE placeholder was found in either part and we have image content, |
| 422 | + # append it at the end (fallback behavior) |
| 423 | + if image_content and "{DOCUMENT_IMAGE}" not in task_prompt_template: |
| 424 | + content.append(image.prepare_bedrock_image_attachment(image_content)) |
283 | 425 |
|
284 | 426 | return content |
285 | 427 |
|
@@ -469,28 +611,13 @@ def classify_page_bedrock( |
469 | 611 | # Get classification configuration |
470 | 612 | config = self._get_classification_config() |
471 | 613 |
|
472 | | - # Check if task prompt contains FEW_SHOT_EXAMPLES placeholder |
473 | | - if "{FEW_SHOT_EXAMPLES}" in config["task_prompt"]: |
474 | | - content = self._build_content_with_few_shot_examples( |
475 | | - config["task_prompt"], text_content or "", self._format_classes_list() |
476 | | - ) |
477 | | - else: |
478 | | - # Use common function to prepare prompt with required placeholder validation |
479 | | - task_prompt = self._prepare_prompt_from_template( |
480 | | - config["task_prompt"], |
481 | | - { |
482 | | - "DOCUMENT_TEXT": text_content or "", |
483 | | - "CLASS_NAMES_AND_DESCRIPTIONS": self._format_classes_list(), |
484 | | - }, |
485 | | - required_placeholders=["DOCUMENT_TEXT", "CLASS_NAMES_AND_DESCRIPTIONS"], |
486 | | - ) |
487 | | - content = [{"text": task_prompt}] |
488 | | - |
489 | | - # Add image if available |
490 | | - if image_content: |
491 | | - from idp_common import image |
492 | | - |
493 | | - content.append(image.prepare_bedrock_image_attachment(image_content)) |
| 614 | + # Build content with support for placeholders |
| 615 | + content = self._build_content( |
| 616 | + config["task_prompt"], |
| 617 | + text_content or "", |
| 618 | + self._format_classes_list(), |
| 619 | + image_content, |
| 620 | + ) |
494 | 621 |
|
495 | 622 | logger.info(f"Classifying page {page_id} with Bedrock") |
496 | 623 |
|
|
0 commit comments