Skip to content

Commit c101b14

Browse files
committed
🐛 options should default to None
1 parent ef36b15 commit c101b14

File tree

6 files changed

+59
-25
lines changed

6 files changed

+59
-25
lines changed

mindee/input/inference_parameters.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,16 @@ class InferenceParameters:
1010

1111
model_id: str
1212
"""ID of the model, required."""
13-
rag: bool = False
14-
"""Use Retrieval-Augmented Generation during inference."""
15-
raw_text: bool = False
16-
"""Extract the entire text from the document as strings, and fill the ``raw_text`` attribute."""
17-
polygon: bool = False
18-
"""Calculate bounding box polygons for values, and fill the ``locations`` attribute of fields"""
19-
confidence: bool = False
13+
rag: Optional[bool] = None
14+
"""Enhance extraction accuracy with Retrieval-Augmented Generation."""
15+
raw_text: Optional[bool] = None
16+
"""Extract the full text content from the document as strings, and fill the ``raw_text`` attribute."""
17+
polygon: Optional[bool] = None
18+
"""Calculate bounding box polygons for all fields, and fill their ``locations`` attribute."""
19+
confidence: Optional[bool] = None
2020
"""
21-
Calculate confidence scores for values, and fill the ``confidence`` attribute of fields.
22-
Useful for automation.
21+
Boost the precision and accuracy of all extractions.
22+
Calculate confidence scores for all fields, and fill their ``confidence`` attribute.
2323
"""
2424
alias: Optional[str] = None
2525
"""Use an alias to link the file to your own DB. If empty, no alias will be used."""

mindee/mindee_http/mindee_api_v2.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -82,14 +82,14 @@ def req_post_inference_enqueue(
8282
data = {"model_id": params.model_id}
8383
url = f"{self.url_root}/inferences/enqueue"
8484

85-
if params.rag:
86-
data["rag"] = "true"
87-
if params.raw_text:
88-
data["raw_text"] = "true"
89-
if params.confidence:
90-
data["confidence"] = "true"
91-
if params.polygon:
92-
data["polygon"] = "true"
85+
if params.rag is not None:
86+
data["rag"] = str(params.rag).lower()
87+
if params.raw_text is not None:
88+
data["raw_text"] = str(params.raw_text).lower()
89+
if params.confidence is not None:
90+
data["confidence"] = str(params.confidence).lower()
91+
if params.polygon is not None:
92+
data["polygon"] = str(params.polygon).lower()
9393
if params.webhook_ids and len(params.webhook_ids) > 0:
9494
data["webhook_ids"] = ",".join(params.webhook_ids)
9595
if params.alias and len(params.alias):

mindee/parsing/v2/raw_text.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,16 @@ class RawText:
88
"""Raw text extracted from the document."""
99

1010
pages: List[RawTextPage]
11-
"""Page the raw text was found on."""
11+
"""Pages of raw text content."""
1212

1313
def __init__(self, raw_response: StringDict):
1414
self.pages = [RawTextPage(page) for page in raw_response.get("pages", [])]
1515

1616
def __str__(self) -> str:
17-
return "\n\n".join([page.content for page in self.pages])
17+
"""
18+
Text content of all pages.
19+
20+
Each page is separated by 2 newline characters.
21+
"""
22+
page_contents = "\n\n".join([page.content for page in self.pages])
23+
return page_contents + "\n"

tests/data

tests/test_client_v2_integration.py

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,38 @@ def test_parse_file_empty_multiple_pages_must_succeed(
7373
assert len(response.inference.result.raw_text.pages) == 2
7474

7575

76+
@pytest.mark.integration
77+
@pytest.mark.v2
78+
def test_parse_file_empty_single_page_options_must_succeed(
79+
v2_client: ClientV2, findoc_model_id: str
80+
) -> None:
81+
"""
82+
Upload a 2-page blank PDF and make sure the returned inference contains the
83+
file & model metadata.
84+
"""
85+
input_path: Path = FILE_TYPES_DIR / "pdf" / "blank_1.pdf"
86+
87+
input_source = PathInput(input_path)
88+
params = InferenceParameters(
89+
model_id=findoc_model_id,
90+
rag=True,
91+
raw_text=True,
92+
polygon=True,
93+
confidence=True,
94+
webhook_ids=[],
95+
alias="py_integration_empty_page_options",
96+
)
97+
response: InferenceResponse = v2_client.enqueue_and_get_inference(
98+
input_source, params
99+
)
100+
101+
assert response.inference.active_options is not None
102+
assert response.inference.active_options.rag is True
103+
assert response.inference.active_options.raw_text is True
104+
assert response.inference.active_options.polygon is True
105+
assert response.inference.active_options.confidence is True
106+
107+
76108
@pytest.mark.integration
77109
@pytest.mark.v2
78110
def test_parse_file_filled_single_page_must_succeed(
@@ -86,10 +118,6 @@ def test_parse_file_filled_single_page_must_succeed(
86118
input_source = PathInput(input_path)
87119
params = InferenceParameters(
88120
model_id=findoc_model_id,
89-
rag=False,
90-
raw_text=False,
91-
polygon=False,
92-
confidence=False,
93121
webhook_ids=[],
94122
alias="py_integration_filled_single",
95123
)

tests/v2/test_inference_response.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ def test_standard_field_simple_list():
186186

187187
@pytest.mark.v2
188188
def test_raw_texts():
189-
json_sample, rst_sample = _get_inference_samples("raw_texts")
189+
json_sample, _ = _get_inference_samples("raw_texts")
190190
inference_result = InferenceResponse(json_sample)
191191
assert isinstance(inference_result.inference, Inference)
192192

0 commit comments

Comments
 (0)