Skip to content

Commit 9387cec

Browse files
author
Bob Strahan
committed
fix: update markdown table formatting with left alignment for text confidence data
1 parent 0813f8a commit 9387cec

File tree

3 files changed

+13
-8
lines changed

3 files changed

+13
-8
lines changed

CHANGELOG.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,19 @@ SPDX-License-Identifier: MIT-0
88
### Added
99

1010
- **Text Confidence View for Document Pages**
11-
- Added support for displaying OCR text confidence data in the UI
11+
- Added support for displaying OCR text confidence data through new `TextConfidenceUri` field
12+
- New "Text Confidence View" option in the UI pages panel alongside existing Markdown and Text views
13+
- Fixed issues with view persistence - Text Confidence View button now always visible with appropriate messaging when content unavailable
14+
- Fixed view toggle behavior - switching between views no longer closes the viewer window
15+
- Reordered view buttons to: Markdown View, Text Confidence View, Text View for better user experience
1216

1317
### Changed
1418
- **Converted text confidence data format from JSON to markdown table for improved readability and reduced token usage**
1519
- Removed unnecessary "page_count" field
1620
- Changed "text_blocks" array to "text" field containing a markdown table with Text and Confidence columns
1721
- Reduces prompt size for assessment service while improving UI readability
1822
- OCR confidence values now rounded to 1 decimal point (e.g., 99.1, 87.3) for cleaner display
23+
- Markdown table headers now explicitly left-aligned using `|:-----|:-----------|` format for consistent appearance
1924

2025

2126

lib/idp_common_pkg/idp_common/ocr/service.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -624,7 +624,7 @@ def _process_single_page_bedrock(
624624
# Generate and store text confidence data
625625
# For Bedrock, we use empty markdown table since LLM OCR doesn't provide real confidence scores
626626
text_confidence_data = {
627-
"text": "| Text | Confidence |\n|------|------------|\n| *No confidence data available from LLM OCR* | N/A |"
627+
"text": "| Text | Confidence |\n|:-----|:------------|\n| *No confidence data available from LLM OCR* | N/A |"
628628
}
629629

630630
text_confidence_key = f"{prefix}/pages/{page_id}/textConfidence.json"
@@ -704,7 +704,7 @@ def _process_single_page_none(
704704

705705
# Generate minimal text confidence data (empty markdown table)
706706
text_confidence_data = {
707-
"text": "| Text | Confidence |\n|------|------------|\n| *No OCR performed* | N/A |"
707+
"text": "| Text | Confidence |\n|:-----|:------------|\n| *No OCR performed* | N/A |"
708708
}
709709

710710
text_confidence_key = f"{prefix}/pages/{page_id}/textConfidence.json"
@@ -821,8 +821,8 @@ def _generate_text_confidence_data(
821821
Returns:
822822
Text confidence data as markdown table with ~80-90% token reduction
823823
"""
824-
# Start building the markdown table
825-
markdown_lines = ["| Text | Confidence |", "|------|------------|"]
824+
# Start building the markdown table with explicit left alignment
825+
markdown_lines = ["| Text | Confidence |", "|:-----|:-----------|"]
826826

827827
blocks = raw_ocr_data.get("Blocks", [])
828828

@@ -1070,8 +1070,8 @@ def _process_converted_page(
10701070
content_type="application/json",
10711071
)
10721072

1073-
# Generate text confidence data as markdown table
1074-
markdown_lines = ["| Text | Confidence |", "|------|------------|"]
1073+
# Generate text confidence data as markdown table with explicit left alignment
1074+
markdown_lines = ["| Text | Confidence |", "|:-----|:-----------|"]
10751075
for line in page_text.split("\n"):
10761076
if line.strip():
10771077
# Escape pipe characters in text

lib/idp_common_pkg/tests/unit/ocr/test_ocr_service.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -539,7 +539,7 @@ def test_generate_text_confidence_data(self, mock_textract_response):
539539

540540
# Check header
541541
assert lines[0] == "| Text | Confidence |"
542-
assert lines[1] == "|------|------------|"
542+
assert lines[1] == "|:-----|:-----------|"
543543

544544
# Check data rows
545545
assert lines[2] == "| Sample text line 1 | 98.5 |"

0 commit comments

Comments
 (0)