fix: update markdown table formatting with left alignment for text confidence data

Bob Strahan · Bob Strahan · commit 9387cec32d68 · 2025-07-16T16:49:09.000Z
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,14 +8,19 @@ SPDX-License-Identifier: MIT-0
 ### Added
 
 - **Text Confidence View for Document Pages**
-  - Added support for displaying OCR text confidence data in the UI
+  - Added support for displaying OCR text confidence data through new `TextConfidenceUri` field
+  - New "Text Confidence View" option in the UI pages panel alongside existing Markdown and Text views
+  - Fixed issues with view persistence - Text Confidence View button now always visible with appropriate messaging when content unavailable
+  - Fixed view toggle behavior - switching between views no longer closes the viewer window
+  - Reordered view buttons to: Markdown View, Text Confidence View, Text View for better user experience
 
 ### Changed
 - **Converted text confidence data format from JSON to markdown table for improved readability and reduced token usage**
   - Removed unnecessary "page_count" field
   - Changed "text_blocks" array to "text" field containing a markdown table with Text and Confidence columns
   - Reduces prompt size for assessment service while improving UI readability
   - OCR confidence values now rounded to 1 decimal point (e.g., 99.1, 87.3) for cleaner display
+  - Markdown table headers now explicitly left-aligned using `|:-----|:-----------|` format for consistent appearance
 
 
 
diff --git a/lib/idp_common_pkg/idp_common/ocr/service.py b/lib/idp_common_pkg/idp_common/ocr/service.py
@@ -624,7 +624,7 @@ def _process_single_page_bedrock(
         # Generate and store text confidence data
         # For Bedrock, we use empty markdown table since LLM OCR doesn't provide real confidence scores
         text_confidence_data = {
-            "text": "| Text | Confidence |\n|------|------------|\n| *No confidence data available from LLM OCR* | N/A |"
+            "text": "| Text | Confidence |\n|:-----|:------------|\n| *No confidence data available from LLM OCR* | N/A |"
         }
 
         text_confidence_key = f"{prefix}/pages/{page_id}/textConfidence.json"
@@ -704,7 +704,7 @@ def _process_single_page_none(
 
         # Generate minimal text confidence data (empty markdown table)
         text_confidence_data = {
-            "text": "| Text | Confidence |\n|------|------------|\n| *No OCR performed* | N/A |"
+            "text": "| Text | Confidence |\n|:-----|:------------|\n| *No OCR performed* | N/A |"
         }
 
         text_confidence_key = f"{prefix}/pages/{page_id}/textConfidence.json"
@@ -821,8 +821,8 @@ def _generate_text_confidence_data(
         Returns:
             Text confidence data as markdown table with ~80-90% token reduction
         """
-        # Start building the markdown table
-        markdown_lines = ["| Text | Confidence |", "|------|------------|"]
+        # Start building the markdown table with explicit left alignment
+        markdown_lines = ["| Text | Confidence |", "|:-----|:-----------|"]
 
         blocks = raw_ocr_data.get("Blocks", [])
 
@@ -1070,8 +1070,8 @@ def _process_converted_page(
             content_type="application/json",
         )
 
-        # Generate text confidence data as markdown table
-        markdown_lines = ["| Text | Confidence |", "|------|------------|"]
+        # Generate text confidence data as markdown table with explicit left alignment
+        markdown_lines = ["| Text | Confidence |", "|:-----|:-----------|"]
         for line in page_text.split("\n"):
             if line.strip():
                 # Escape pipe characters in text
diff --git a/lib/idp_common_pkg/tests/unit/ocr/test_ocr_service.py b/lib/idp_common_pkg/tests/unit/ocr/test_ocr_service.py
@@ -539,7 +539,7 @@ def test_generate_text_confidence_data(self, mock_textract_response):
 
             # Check header
             assert lines[0] == "| Text | Confidence |"
-            assert lines[1] == "|------|------------|"
+            assert lines[1] == "|:-----|:-----------|"
 
             # Check data rows
             assert lines[2] == "| Sample text line 1 | 98.5 |"