🐛 options should default to None

ianardee · ianardee · commit c101b1400795 · 2025-09-03T13:38:45.000+02:00
diff --git a/mindee/input/inference_parameters.py b/mindee/input/inference_parameters.py
@@ -10,16 +10,16 @@ class InferenceParameters:
 
     model_id: str
     """ID of the model, required."""
-    rag: bool = False
-    """Use Retrieval-Augmented Generation during inference."""
-    raw_text: bool = False
-    """Extract the entire text from the document as strings, and fill the ``raw_text`` attribute."""
-    polygon: bool = False
-    """Calculate bounding box polygons for values, and fill the ``locations`` attribute of fields"""
-    confidence: bool = False
+    rag: Optional[bool] = None
+    """Enhance extraction accuracy with Retrieval-Augmented Generation."""
+    raw_text: Optional[bool] = None
+    """Extract the full text content from the document as strings, and fill the ``raw_text`` attribute."""
+    polygon: Optional[bool] = None
+    """Calculate bounding box polygons for all fields, and fill their ``locations`` attribute."""
+    confidence: Optional[bool] = None
     """
-    Calculate confidence scores for values, and fill the ``confidence`` attribute of fields.
-    Useful for automation.
+    Boost the precision and accuracy of all extractions.
+    Calculate confidence scores for all fields, and fill their ``confidence`` attribute.
     """
     alias: Optional[str] = None
     """Use an alias to link the file to your own DB. If empty, no alias will be used."""
diff --git a/mindee/mindee_http/mindee_api_v2.py b/mindee/mindee_http/mindee_api_v2.py
@@ -82,14 +82,14 @@ def req_post_inference_enqueue(
         data = {"model_id": params.model_id}
         url = f"{self.url_root}/inferences/enqueue"
 
-        if params.rag:
-            data["rag"] = "true"
-        if params.raw_text:
-            data["raw_text"] = "true"
-        if params.confidence:
-            data["confidence"] = "true"
-        if params.polygon:
-            data["polygon"] = "true"
+        if params.rag is not None:
+            data["rag"] = str(params.rag).lower()
+        if params.raw_text is not None:
+            data["raw_text"] = str(params.raw_text).lower()
+        if params.confidence is not None:
+            data["confidence"] = str(params.confidence).lower()
+        if params.polygon is not None:
+            data["polygon"] = str(params.polygon).lower()
         if params.webhook_ids and len(params.webhook_ids) > 0:
             data["webhook_ids"] = ",".join(params.webhook_ids)
         if params.alias and len(params.alias):
diff --git a/mindee/parsing/v2/raw_text.py b/mindee/parsing/v2/raw_text.py
@@ -8,10 +8,16 @@ class RawText:
     """Raw text extracted from the document."""
 
     pages: List[RawTextPage]
-    """Page the raw text was found on."""
+    """Pages of raw text content."""
 
     def __init__(self, raw_response: StringDict):
         self.pages = [RawTextPage(page) for page in raw_response.get("pages", [])]
 
     def __str__(self) -> str:
-        return "\n\n".join([page.content for page in self.pages])
+        """
+        Text content of all pages.
+
+        Each page is separated by 2 newline characters.
+        """
+        page_contents = "\n\n".join([page.content for page in self.pages])
+        return page_contents + "\n"
diff --git a/tests/data b/tests/data
@@ -1 +1 @@
-Subproject commit 11c2edc3d2778b121644317b0fc3efc0102ec83a
+Subproject commit bc8356c1ce52d60351ed3430d336f33366025012
diff --git a/tests/test_client_v2_integration.py b/tests/test_client_v2_integration.py
@@ -73,6 +73,38 @@ def test_parse_file_empty_multiple_pages_must_succeed(
     assert len(response.inference.result.raw_text.pages) == 2
 
 
+@pytest.mark.integration
+@pytest.mark.v2
+def test_parse_file_empty_single_page_options_must_succeed(
+    v2_client: ClientV2, findoc_model_id: str
+) -> None:
+    """
+    Upload a 2-page blank PDF and make sure the returned inference contains the
+    file & model metadata.
+    """
+    input_path: Path = FILE_TYPES_DIR / "pdf" / "blank_1.pdf"
+
+    input_source = PathInput(input_path)
+    params = InferenceParameters(
+        model_id=findoc_model_id,
+        rag=True,
+        raw_text=True,
+        polygon=True,
+        confidence=True,
+        webhook_ids=[],
+        alias="py_integration_empty_page_options",
+    )
+    response: InferenceResponse = v2_client.enqueue_and_get_inference(
+        input_source, params
+    )
+
+    assert response.inference.active_options is not None
+    assert response.inference.active_options.rag is True
+    assert response.inference.active_options.raw_text is True
+    assert response.inference.active_options.polygon is True
+    assert response.inference.active_options.confidence is True
+
+
 @pytest.mark.integration
 @pytest.mark.v2
 def test_parse_file_filled_single_page_must_succeed(
@@ -86,10 +118,6 @@ def test_parse_file_filled_single_page_must_succeed(
     input_source = PathInput(input_path)
     params = InferenceParameters(
         model_id=findoc_model_id,
-        rag=False,
-        raw_text=False,
-        polygon=False,
-        confidence=False,
         webhook_ids=[],
         alias="py_integration_filled_single",
     )
diff --git a/tests/v2/test_inference_response.py b/tests/v2/test_inference_response.py
@@ -186,7 +186,7 @@ def test_standard_field_simple_list():
 
 @pytest.mark.v2
 def test_raw_texts():
-    json_sample, rst_sample = _get_inference_samples("raw_texts")
+    json_sample, _ = _get_inference_samples("raw_texts")
     inference_result = InferenceResponse(json_sample)
     assert isinstance(inference_result.inference, Inference)