From b85baf7e2cfe19c31eabc73440888b76c496a027 Mon Sep 17 00:00:00 2001 From: sebastianMindee Date: Tue, 10 Dec 2024 17:28:26 +0100 Subject: [PATCH 1/3] :sparkles: add support for remote resource fetching --- mindee/input/sources/url_input_source.py | 185 +++++++++++++++++- .../test_url_input_source_integration.py | 68 +++++++ 2 files changed, 252 insertions(+), 1 deletion(-) create mode 100644 tests/input/test_url_input_source_integration.py diff --git a/mindee/input/sources/url_input_source.py b/mindee/input/sources/url_input_source.py index 4f26dc02..983343e5 100644 --- a/mindee/input/sources/url_input_source.py +++ b/mindee/input/sources/url_input_source.py @@ -1,4 +1,15 @@ +import os +import random +import string +from datetime import datetime +from pathlib import Path +from typing import Optional, Union +from urllib.parse import urlparse + +import requests + from mindee.error.mindee_error import MindeeSourceError +from mindee.input.sources.bytes_input import BytesInput from mindee.input.sources.local_input_source import InputType from mindee.logger import logger @@ -13,7 +24,7 @@ def __init__(self, url: str) -> None: """ Input document from a base64 encoded string. - :param url: URL to send, must be HTTPS + :param url: URL to send, must be HTTPS. """ if not url.lower().startswith("https"): raise MindeeSourceError("URL must be HTTPS") @@ -23,3 +34,175 @@ def __init__(self, url: str) -> None: logger.debug("URL input: %s", url) self.url = url + + def __fetch_file_content( + self, + username: Optional[str] = None, + password: Optional[str] = None, + token: Optional[str] = None, + headers: Optional[dict] = None, + max_redirects: int = 3, + ) -> bytes: + """ + Fetch the content of the file from the URL. + + :param username: Optional username for authentication. + :param password: Optional password for authentication. + :param token: Optional token for authentication. + :param headers: Optional additional headers for the request. + :param max_redirects: Maximum number of redirects to follow. + :return: The content of the file as bytes. + """ + if not headers: + headers = {} + if token: + headers["Authorization"] = f"Bearer {token}" + auth = None if not username or not password else (username, password) + + response = UrlInputSource.__make_request( + self.url, auth, headers, 0, max_redirects=max_redirects + ) + + return response + + def save_to_file( + self, + filepath: Union[Path, str], + filename: Optional[str] = None, + username: Optional[str] = None, + password: Optional[str] = None, + token: Optional[str] = None, + headers: Optional[dict] = None, + max_redirects: int = 3, + ) -> Path: + """ + Save the content of the URL to a file. + + :param filepath: Path to save the content to. + :param filename: Optional filename to give to the file. + :param username: Optional username for authentication. + :param password: Optional password for authentication. + :param token: Optional token for authentication. + :param headers: Optional additional headers for the request. + :param max_redirects: Maximum number of redirects to follow. + :return: The path to the saved file. + """ + response = self.__fetch_file_content( + username, password, token, headers, max_redirects + ) + filename = self.__fill_filename(filename) + full_path = Path(filepath) / filename + with open(full_path, "wb") as binary_file: + binary_file.write(response) + return full_path + + def as_local_input_source( + self, + filename: Optional[str] = None, + username: Optional[str] = None, + password: Optional[str] = None, + token: Optional[str] = None, + headers: Optional[dict] = None, + max_redirects: int = 3, + ) -> BytesInput: + """ + Convert the URL content to a BytesInput object. + + :param filename: Optional filename for the BytesInput. + :param username: Optional username for authentication. + :param password: Optional password for authentication. + :param token: Optional token for authentication. + :param headers: Optional additional headers for the request. + :param max_redirects: Maximum number of redirects to follow. + :return: A BytesInput object containing the file content. + """ + response = self.__fetch_file_content( + username, password, token, headers, max_redirects + ) + filename = self.__fill_filename(filename) + + return BytesInput(response, filename) + + @staticmethod + def __extract_filename_from_url(uri) -> str: + """ + Extract the filename from a given URL. + + :param uri: The URL to extract the filename from. + :return: The extracted filename or an empty string if not found. + """ + filename = os.path.basename(urlparse(uri).path) + return filename if filename else "" + + @staticmethod + def __generate_file_name(extension=".tmp") -> str: + """ + Generate a unique filename with a timestamp and random string. + + :param extension: The file extension to use (default is '.tmp'). + :return: A generated filename. + """ + random_string = "".join( + random.choices(string.ascii_lowercase + string.digits, k=8) + ) + timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + return f"mindee_temp_{timestamp}_{random_string}{extension}" + + @staticmethod + def __get_file_extension(filename) -> Optional[str]: + """ + Get the extension from a filename. + + :param filename: The filename to extract the extension from. + :return: The lowercase file extension or None if not found. + """ + ext = os.path.splitext(filename)[1] + return ext.lower() if ext else None + + def __fill_filename(self, filename=None) -> str: + """ + Fill in a filename if not provided or incomplete. + + :param filename: Optional filename to use. + :return: A complete filename. + """ + if filename is None: + filename = UrlInputSource.__extract_filename_from_url(self.url) + + if not filename or not os.path.splitext(filename)[1]: + filename = self.__generate_file_name( + extension=UrlInputSource.__get_file_extension(filename) + ) + + return filename + + @staticmethod + def __make_request(url, auth, headers, redirects, max_redirects) -> bytes: + """ + Makes an HTTP request to the given URL, while following redirections. + + :param url: The URL to request. + :param auth: Authentication tuple (username, password). + :param headers: Headers for the request. + :param redirects: Current number of redirects. + :param max_redirects: Maximum number of redirects to follow. + :return: The content of the response. + :raises MindeeSourceError: If max redirects are exceeded or the request fails. + """ + result = requests.get(url, headers=headers, timeout=120, auth=auth) + if 299 < result.status_code < 400: + if redirects == max_redirects: + raise MindeeSourceError( + f"Can't reach URL after {redirects} out of {max_redirects} redirects, " + f"aborting operation." + ) + return UrlInputSource.__make_request( + redirects.location, auth, headers, redirects + 1, max_redirects + ) + + if result.status_code >= 400 or result.status_code < 200: + raise MindeeSourceError( + f"Couldn't retrieve file from server, error code {result.status_code}." + ) + + return result.content diff --git a/tests/input/test_url_input_source_integration.py b/tests/input/test_url_input_source_integration.py new file mode 100644 index 00000000..26edbd41 --- /dev/null +++ b/tests/input/test_url_input_source_integration.py @@ -0,0 +1,68 @@ +import os +from pathlib import Path + +import pytest + +from mindee import Client +from mindee.product.invoice import InvoiceV4 + + +@pytest.fixture +def client(): + return Client() + + +@pytest.fixture +def output_file_path(): + return Path("tests/data/output/") + + +@pytest.fixture +def reference_file_path(): + return "https://github.com/mindee/client-lib-test-data/blob/main/products/invoice_splitter/invoice_5p.pdf?raw=true" + + +@pytest.mark.integration +def test_load_local_file(client, reference_file_path): + url_source = client.source_from_url(reference_file_path) + local_source = url_source.as_local_input_source() + result = client.parse(InvoiceV4, local_source) + assert result.document.n_pages == 5 + assert result.document.filename == "invoice_5p.pdf" + + +@pytest.mark.integration +def test_custom_file_name(client, reference_file_path): + url_source = client.source_from_url(reference_file_path) + local_source = url_source.as_local_input_source("customName.pdf") + result = client.parse(InvoiceV4, local_source) + assert result.document.n_pages == 5 + assert result.document.filename == "customName.pdf" + + +@pytest.mark.integration +def test_save_file(client, reference_file_path, output_file_path): + url_source = client.source_from_url(reference_file_path) + url_source.save_to_file(output_file_path) + assert os.path.exists(os.path.join(output_file_path, "invoice_5p.pdf")) + + +@pytest.mark.integration +def test_save_file_with_filename(client, reference_file_path, output_file_path): + url_source = client.source_from_url(reference_file_path) + url_source.save_to_file(output_file_path, "customFileName.pdf") + assert os.path.exists(os.path.join(output_file_path, "customFileName.pdf")) + + +@pytest.fixture(autouse=True) +def cleanup(request, output_file_path: Path): + def remove_test_files(): + generated_files = [ + Path.resolve(output_file_path / "invoice_5p.pdf"), + Path.resolve(output_file_path / "customFileName.pdf"), + ] + for filepath in generated_files: + if os.path.exists(filepath): + os.remove(filepath) + + request.addfinalizer(remove_test_files) From 40021bd375a6eff6f6542d37004ef5d83d609209 Mon Sep 17 00:00:00 2001 From: sebastianMindee Date: Tue, 10 Dec 2024 17:31:58 +0100 Subject: [PATCH 2/3] :coffin: remove support for old carte vitale --- README.md | 2 +- docs/extras/code_samples/carte_vitale_v1.txt | 17 -- docs/extras/guide/idcard_fr_v2.md | 263 ------------------ docs/product/fr/carte_vitale_v1.rst | 15 - mindee/product/fr/__init__.py | 4 - mindee/product/fr/carte_vitale/__init__.py | 4 - .../fr/carte_vitale/carte_vitale_v1.py | 39 --- .../carte_vitale/carte_vitale_v1_document.py | 59 ---- tests/product/fr/carte_vitale/__init__.py | 0 .../fr/carte_vitale/test_carte_vitale_v1.py | 49 ---- .../test_carte_vitale_v1_regression.py | 24 -- 11 files changed, 1 insertion(+), 475 deletions(-) delete mode 100644 docs/extras/code_samples/carte_vitale_v1.txt delete mode 100644 docs/extras/guide/idcard_fr_v2.md delete mode 100644 docs/product/fr/carte_vitale_v1.rst delete mode 100644 mindee/product/fr/carte_vitale/__init__.py delete mode 100644 mindee/product/fr/carte_vitale/carte_vitale_v1.py delete mode 100644 mindee/product/fr/carte_vitale/carte_vitale_v1_document.py delete mode 100644 tests/product/fr/carte_vitale/__init__.py delete mode 100644 tests/product/fr/carte_vitale/test_carte_vitale_v1.py delete mode 100644 tests/product/fr/carte_vitale/test_carte_vitale_v1_regression.py diff --git a/README.md b/README.md index 0bf10846..f9faf4ae 100644 --- a/README.md +++ b/README.md @@ -146,7 +146,7 @@ Complete details on the working of the library are available in the following gu * [Python US Driver License OCR](https://developers.mindee.com/docs/python-eu-driver-license-ocr) * [Python FR Bank Account Detail OCR](https://developers.mindee.com/docs/python-fr-bank-account-details-ocr) * [Python FR Carte Grise OCR](https://developers.mindee.com/docs/python-fr-carte-grise-ocr) -* [Python FR Carte Vitale OCR](https://developers.mindee.com/docs/python-fr-carte-vitale-ocr) +* [Python FR Health Card OCR](https://developers.mindee.com/docs/python-fr-health-card-ocr) * [Python FR ID Card OCR](https://developers.mindee.com/docs/python-fr-carte-nationale-didentite-ocr) * [Python FR Petrol Receipts OCR](https://developers.mindee.com/docs/python-fr-petrol-receipts-ocr) * [Python US Bank Check OCR](https://developers.mindee.com/docs/python-us-bank-check-ocr) diff --git a/docs/extras/code_samples/carte_vitale_v1.txt b/docs/extras/code_samples/carte_vitale_v1.txt deleted file mode 100644 index d9f4c6a8..00000000 --- a/docs/extras/code_samples/carte_vitale_v1.txt +++ /dev/null @@ -1,17 +0,0 @@ -from mindee import Client, PredictResponse, product - -# Init a new client -mindee_client = Client(api_key="my-api-key") - -# Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") - -# Load a file from disk and parse it. -# The endpoint name must be specified since it cannot be determined from the class. -result: PredictResponse = mindee_client.parse(product.fr.CarteVitaleV1, input_doc) - -# Print a summary of the API result -print(result.document) - -# Print the document-level summary -# print(result.document.inference.prediction) diff --git a/docs/extras/guide/idcard_fr_v2.md b/docs/extras/guide/idcard_fr_v2.md deleted file mode 100644 index 1ef7eed1..00000000 --- a/docs/extras/guide/idcard_fr_v2.md +++ /dev/null @@ -1,263 +0,0 @@ ---- -title: FR Carte Nationale d'Identité OCR Python -category: 622b805aaec68102ea7fcbc2 -slug: python-fr-carte-nationale-didentite-ocr -parentDoc: 609808f773b0b90051d839de ---- -The Python OCR SDK supports the [Carte Nationale d'Identité API](https://platform.mindee.com/mindee/idcard_fr). - -Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/idcard_fr/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK. -![Carte Nationale d'Identité sample](https://github.com/mindee/client-lib-test-data/blob/main/products/idcard_fr/default_sample.jpg?raw=true) - -# Quick-Start -```py -from mindee import Client, PredictResponse, product - -# Init a new client -mindee_client = Client(api_key="my-api-key") - -# Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") - -# Load a file from disk and parse it. -# The endpoint name must be specified since it cannot be determined from the class. -result: PredictResponse = mindee_client.parse(product.fr.IdCardV2, input_doc) - -# Print a summary of the API result -print(result.document) - -# Print the document-level summary -# print(result.document.inference.prediction) - -``` - -**Output (RST):** -```rst -######## -Document -######## -:Mindee ID: d33828f1-ef7e-4984-b9df-a2bfaa38a78d -:Filename: default_sample.jpg - -Inference -######### -:Product: mindee/idcard_fr v2.0 -:Rotation applied: Yes - -Prediction -========== -:Nationality: -:Card Access Number: 175775H55790 -:Document Number: -:Given Name(s): Victor - Marie -:Surname: DAMBARD -:Alternate Name: -:Date of Birth: 1994-04-24 -:Place of Birth: LYON 4E ARRONDISSEM -:Gender: M -:Expiry Date: 2030-04-02 -:Mrz Line 1: IDFRADAMBARD<<<<<<<<<<<<<<<<<<075025 -:Mrz Line 2: 170775H557903VICTOR< **Note:** A `Point` simply refers to a List of two numbers (`[float, float]`). - - -Aside from the previous attributes, all basic fields have access to a custom `__str__` method that can be used to print their value as a string. - - -### ClassificationField -The classification field `ClassificationField` does not implement all the basic `BaseField` attributes. It only implements **value**, **confidence** and **page_id**. - -> Note: a classification field's `value is always a `str`. - -### DateField -Aside from the basic `BaseField` attributes, the date field `DateField` also implements the following: - -* **date_object** (`Date`): an accessible representation of the value as a python object. Can be `None`. - -### StringField -The text field `StringField` only has one constraint: its **value** is an `Optional[str]`. - -## Page-Level Fields -Some fields are constrained to the page level, and so will not be retrievable at document level. - -# Attributes -The following fields are extracted for Carte Nationale d'Identité V2: - -## Alternate Name -**alternate_name** ([StringField](#stringfield)): The alternate name of the card holder. - -```py -print(result.document.inference.prediction.alternate_name.value) -``` - -## Issuing Authority -**authority** ([StringField](#stringfield)): The name of the issuing authority. - -```py -print(result.document.inference.prediction.authority.value) -``` - -## Date of Birth -**birth_date** ([DateField](#datefield)): The date of birth of the card holder. - -```py -print(result.document.inference.prediction.birth_date.value) -``` - -## Place of Birth -**birth_place** ([StringField](#stringfield)): The place of birth of the card holder. - -```py -print(result.document.inference.prediction.birth_place.value) -``` - -## Card Access Number -**card_access_number** ([StringField](#stringfield)): The card access number (CAN). - -```py -print(result.document.inference.prediction.card_access_number.value) -``` - -## Document Number -**document_number** ([StringField](#stringfield)): The document number. - -```py -print(result.document.inference.prediction.document_number.value) -``` - -## Document Sides -[📄](#page-level-fields "This field is only present on individual pages.")**document_side** ([ClassificationField](#classificationfield)): The sides of the document which are visible. - -#### Possible values include: - - RECTO - - VERSO - - RECTO & VERSO - -```py -for document_side_elem in result.document.document_side: - print(document_side_elem.value) -``` - -## Document Type -[📄](#page-level-fields "This field is only present on individual pages.")**document_type** ([ClassificationField](#classificationfield)): The document type or format. - -#### Possible values include: - - NEW - - OLD - -```py -for document_type_elem in result.document.document_type: - print(document_type_elem.value) -``` - -## Expiry Date -**expiry_date** ([DateField](#datefield)): The expiry date of the identification card. - -```py -print(result.document.inference.prediction.expiry_date.value) -``` - -## Gender -**gender** ([StringField](#stringfield)): The gender of the card holder. - -```py -print(result.document.inference.prediction.gender.value) -``` - -## Given Name(s) -**given_names** (List[[StringField](#stringfield)]): The given name(s) of the card holder. - -```py -for given_names_elem in result.document.inference.prediction.given_names: - print(given_names_elem.value) -``` - -## Date of Issue -**issue_date** ([DateField](#datefield)): The date of issue of the identification card. - -```py -print(result.document.inference.prediction.issue_date.value) -``` - -## Mrz Line 1 -**mrz1** ([StringField](#stringfield)): The Machine Readable Zone, first line. - -```py -print(result.document.inference.prediction.mrz1.value) -``` - -## Mrz Line 2 -**mrz2** ([StringField](#stringfield)): The Machine Readable Zone, second line. - -```py -print(result.document.inference.prediction.mrz2.value) -``` - -## Mrz Line 3 -**mrz3** ([StringField](#stringfield)): The Machine Readable Zone, third line. - -```py -print(result.document.inference.prediction.mrz3.value) -``` - -## Nationality -**nationality** ([StringField](#stringfield)): The nationality of the card holder. - -```py -print(result.document.inference.prediction.nationality.value) -``` - -## Surname -**surname** ([StringField](#stringfield)): The surname of the card holder. - -```py -print(result.document.inference.prediction.surname.value) -``` - -# Questions? -[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g) diff --git a/docs/product/fr/carte_vitale_v1.rst b/docs/product/fr/carte_vitale_v1.rst deleted file mode 100644 index b9ef47a4..00000000 --- a/docs/product/fr/carte_vitale_v1.rst +++ /dev/null @@ -1,15 +0,0 @@ -Carte Vitale V1 ---------------- - -**Sample Code:** - -.. literalinclude:: /extras/code_samples/carte_vitale_v1.txt - :language: Python - -.. autoclass:: mindee.product.fr.carte_vitale.carte_vitale_v1.CarteVitaleV1 - :members: - :inherited-members: - -.. autoclass:: mindee.product.fr.carte_vitale.carte_vitale_v1_document.CarteVitaleV1Document - :members: - :inherited-members: diff --git a/mindee/product/fr/__init__.py b/mindee/product/fr/__init__.py index 020c7657..87039785 100644 --- a/mindee/product/fr/__init__.py +++ b/mindee/product/fr/__init__.py @@ -15,10 +15,6 @@ ) from mindee.product.fr.carte_grise.carte_grise_v1 import CarteGriseV1 from mindee.product.fr.carte_grise.carte_grise_v1_document import CarteGriseV1Document -from mindee.product.fr.carte_vitale.carte_vitale_v1 import CarteVitaleV1 -from mindee.product.fr.carte_vitale.carte_vitale_v1_document import ( - CarteVitaleV1Document, -) from mindee.product.fr.energy_bill.energy_bill_v1 import EnergyBillV1 from mindee.product.fr.energy_bill.energy_bill_v1_document import EnergyBillV1Document from mindee.product.fr.energy_bill.energy_bill_v1_energy_consumer import ( diff --git a/mindee/product/fr/carte_vitale/__init__.py b/mindee/product/fr/carte_vitale/__init__.py deleted file mode 100644 index 2c20894f..00000000 --- a/mindee/product/fr/carte_vitale/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from mindee.product.fr.carte_vitale.carte_vitale_v1 import CarteVitaleV1 -from mindee.product.fr.carte_vitale.carte_vitale_v1_document import ( - CarteVitaleV1Document, -) diff --git a/mindee/product/fr/carte_vitale/carte_vitale_v1.py b/mindee/product/fr/carte_vitale/carte_vitale_v1.py deleted file mode 100644 index a70184b1..00000000 --- a/mindee/product/fr/carte_vitale/carte_vitale_v1.py +++ /dev/null @@ -1,39 +0,0 @@ -from typing import List - -from mindee.parsing.common.inference import Inference -from mindee.parsing.common.page import Page -from mindee.parsing.common.string_dict import StringDict -from mindee.product.fr.carte_vitale.carte_vitale_v1_document import ( - CarteVitaleV1Document, -) - - -class CarteVitaleV1(Inference): - """Carte Vitale API version 1 inference prediction.""" - - prediction: CarteVitaleV1Document - """Document-level prediction.""" - pages: List[Page[CarteVitaleV1Document]] - """Page-level prediction(s).""" - endpoint_name = "carte_vitale" - """Name of the endpoint.""" - endpoint_version = "1" - """Version of the endpoint.""" - - def __init__(self, raw_prediction: StringDict): - """ - Carte Vitale v1 inference. - - :param raw_prediction: Raw prediction from the HTTP response. - """ - super().__init__(raw_prediction) - - self.prediction = CarteVitaleV1Document(raw_prediction["prediction"]) - self.pages = [] - for page in raw_prediction["pages"]: - try: - page_prediction = page["prediction"] - except KeyError: - continue - if page_prediction: - self.pages.append(Page(CarteVitaleV1Document, page)) diff --git a/mindee/product/fr/carte_vitale/carte_vitale_v1_document.py b/mindee/product/fr/carte_vitale/carte_vitale_v1_document.py deleted file mode 100644 index 0120f23d..00000000 --- a/mindee/product/fr/carte_vitale/carte_vitale_v1_document.py +++ /dev/null @@ -1,59 +0,0 @@ -from typing import List, Optional - -from mindee.parsing.common.prediction import Prediction -from mindee.parsing.common.string_dict import StringDict -from mindee.parsing.common.summary_helper import clean_out_string -from mindee.parsing.standard.date import DateField -from mindee.parsing.standard.text import StringField - - -class CarteVitaleV1Document(Prediction): - """Carte Vitale API version 1.1 document data.""" - - given_names: List[StringField] - """The given name(s) of the card holder.""" - issuance_date: DateField - """The date the card was issued.""" - social_security: StringField - """The Social Security Number (Numéro de Sécurité Sociale) of the card holder""" - surname: StringField - """The surname of the card holder.""" - - def __init__( - self, - raw_prediction: StringDict, - page_id: Optional[int] = None, - ): - """ - Carte Vitale document. - - :param raw_prediction: Raw prediction from HTTP response - :param page_id: Page number for multi pages pdf input - """ - super().__init__(raw_prediction, page_id) - self.given_names = [ - StringField(prediction, page_id=page_id) - for prediction in raw_prediction["given_names"] - ] - self.issuance_date = DateField( - raw_prediction["issuance_date"], - page_id=page_id, - ) - self.social_security = StringField( - raw_prediction["social_security"], - page_id=page_id, - ) - self.surname = StringField( - raw_prediction["surname"], - page_id=page_id, - ) - - def __str__(self) -> str: - given_names = f"\n { ' ' * 15 }".join( - [str(item) for item in self.given_names], - ) - out_str: str = f":Given Name(s): {given_names}\n" - out_str += f":Surname: {self.surname}\n" - out_str += f":Social Security Number: {self.social_security}\n" - out_str += f":Issuance Date: {self.issuance_date}\n" - return clean_out_string(out_str) diff --git a/tests/product/fr/carte_vitale/__init__.py b/tests/product/fr/carte_vitale/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/product/fr/carte_vitale/test_carte_vitale_v1.py b/tests/product/fr/carte_vitale/test_carte_vitale_v1.py deleted file mode 100644 index 514d1c49..00000000 --- a/tests/product/fr/carte_vitale/test_carte_vitale_v1.py +++ /dev/null @@ -1,49 +0,0 @@ -import json - -import pytest - -from mindee.parsing.common.document import Document -from mindee.parsing.common.page import Page -from mindee.product.fr.carte_vitale.carte_vitale_v1 import CarteVitaleV1 -from mindee.product.fr.carte_vitale.carte_vitale_v1_document import ( - CarteVitaleV1Document, -) -from tests.product import PRODUCT_DATA_DIR - -RESPONSE_DIR = PRODUCT_DATA_DIR / "carte_vitale" / "response_v1" - -CarteVitaleV1DocumentType = Document[ - CarteVitaleV1Document, - Page[CarteVitaleV1Document], -] - - -@pytest.fixture -def complete_doc() -> CarteVitaleV1DocumentType: - file_path = RESPONSE_DIR / "complete.json" - with open(file_path, "r", encoding="utf-8") as open_file: - json_data = json.load(open_file) - return Document(CarteVitaleV1, json_data["document"]) - - -@pytest.fixture -def empty_doc() -> CarteVitaleV1DocumentType: - file_path = RESPONSE_DIR / "empty.json" - with open(file_path, "r", encoding="utf-8") as open_file: - json_data = json.load(open_file) - return Document(CarteVitaleV1, json_data["document"]) - - -def test_complete_doc(complete_doc: CarteVitaleV1DocumentType): - file_path = RESPONSE_DIR / "summary_full.rst" - with open(file_path, "r", encoding="utf-8") as open_file: - reference_str = open_file.read() - assert str(complete_doc) == reference_str - - -def test_empty_doc(empty_doc: CarteVitaleV1DocumentType): - prediction = empty_doc.inference.prediction - assert len(prediction.given_names) == 0 - assert prediction.surname.value is None - assert prediction.social_security.value is None - assert prediction.issuance_date.value is None diff --git a/tests/product/fr/carte_vitale/test_carte_vitale_v1_regression.py b/tests/product/fr/carte_vitale/test_carte_vitale_v1_regression.py deleted file mode 100644 index 8e9e3294..00000000 --- a/tests/product/fr/carte_vitale/test_carte_vitale_v1_regression.py +++ /dev/null @@ -1,24 +0,0 @@ -import pytest - -from mindee.client import Client -from mindee.product.fr.carte_vitale.carte_vitale_v1 import CarteVitaleV1 -from tests.product import PRODUCT_DATA_DIR, get_id, get_version - - -@pytest.mark.regression -def test_default_sample(): - client = Client() - with open( - PRODUCT_DATA_DIR / "carte_vitale" / "response_v1" / "default_sample.rst", - encoding="utf-8", - ) as rst_file: - rst_ref = rst_file.read() - - sample = client.source_from_path( - PRODUCT_DATA_DIR / "carte_vitale" / "default_sample.jpg", - ) - response = client.parse(CarteVitaleV1, sample) - doc_response = response.document - doc_response.id = get_id(rst_ref) - doc_response.inference.product.version = get_version(rst_ref) - assert str(doc_response) == rst_ref From c6a0c3ce44c034343273c58d097367a14e07d029 Mon Sep 17 00:00:00 2001 From: sebastianMindee Date: Tue, 10 Dec 2024 17:41:57 +0100 Subject: [PATCH 3/3] restore idcard --- docs/extras/guide/idcard_fr_v2.md | 263 ++++++++++++++++++++++++++++++ 1 file changed, 263 insertions(+) create mode 100644 docs/extras/guide/idcard_fr_v2.md diff --git a/docs/extras/guide/idcard_fr_v2.md b/docs/extras/guide/idcard_fr_v2.md new file mode 100644 index 00000000..1ef7eed1 --- /dev/null +++ b/docs/extras/guide/idcard_fr_v2.md @@ -0,0 +1,263 @@ +--- +title: FR Carte Nationale d'Identité OCR Python +category: 622b805aaec68102ea7fcbc2 +slug: python-fr-carte-nationale-didentite-ocr +parentDoc: 609808f773b0b90051d839de +--- +The Python OCR SDK supports the [Carte Nationale d'Identité API](https://platform.mindee.com/mindee/idcard_fr). + +Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/idcard_fr/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK. +![Carte Nationale d'Identité sample](https://github.com/mindee/client-lib-test-data/blob/main/products/idcard_fr/default_sample.jpg?raw=true) + +# Quick-Start +```py +from mindee import Client, PredictResponse, product + +# Init a new client +mindee_client = Client(api_key="my-api-key") + +# Load a file from disk +input_doc = mindee_client.source_from_path("/path/to/the/file.ext") + +# Load a file from disk and parse it. +# The endpoint name must be specified since it cannot be determined from the class. +result: PredictResponse = mindee_client.parse(product.fr.IdCardV2, input_doc) + +# Print a summary of the API result +print(result.document) + +# Print the document-level summary +# print(result.document.inference.prediction) + +``` + +**Output (RST):** +```rst +######## +Document +######## +:Mindee ID: d33828f1-ef7e-4984-b9df-a2bfaa38a78d +:Filename: default_sample.jpg + +Inference +######### +:Product: mindee/idcard_fr v2.0 +:Rotation applied: Yes + +Prediction +========== +:Nationality: +:Card Access Number: 175775H55790 +:Document Number: +:Given Name(s): Victor + Marie +:Surname: DAMBARD +:Alternate Name: +:Date of Birth: 1994-04-24 +:Place of Birth: LYON 4E ARRONDISSEM +:Gender: M +:Expiry Date: 2030-04-02 +:Mrz Line 1: IDFRADAMBARD<<<<<<<<<<<<<<<<<<075025 +:Mrz Line 2: 170775H557903VICTOR< **Note:** A `Point` simply refers to a List of two numbers (`[float, float]`). + + +Aside from the previous attributes, all basic fields have access to a custom `__str__` method that can be used to print their value as a string. + + +### ClassificationField +The classification field `ClassificationField` does not implement all the basic `BaseField` attributes. It only implements **value**, **confidence** and **page_id**. + +> Note: a classification field's `value is always a `str`. + +### DateField +Aside from the basic `BaseField` attributes, the date field `DateField` also implements the following: + +* **date_object** (`Date`): an accessible representation of the value as a python object. Can be `None`. + +### StringField +The text field `StringField` only has one constraint: its **value** is an `Optional[str]`. + +## Page-Level Fields +Some fields are constrained to the page level, and so will not be retrievable at document level. + +# Attributes +The following fields are extracted for Carte Nationale d'Identité V2: + +## Alternate Name +**alternate_name** ([StringField](#stringfield)): The alternate name of the card holder. + +```py +print(result.document.inference.prediction.alternate_name.value) +``` + +## Issuing Authority +**authority** ([StringField](#stringfield)): The name of the issuing authority. + +```py +print(result.document.inference.prediction.authority.value) +``` + +## Date of Birth +**birth_date** ([DateField](#datefield)): The date of birth of the card holder. + +```py +print(result.document.inference.prediction.birth_date.value) +``` + +## Place of Birth +**birth_place** ([StringField](#stringfield)): The place of birth of the card holder. + +```py +print(result.document.inference.prediction.birth_place.value) +``` + +## Card Access Number +**card_access_number** ([StringField](#stringfield)): The card access number (CAN). + +```py +print(result.document.inference.prediction.card_access_number.value) +``` + +## Document Number +**document_number** ([StringField](#stringfield)): The document number. + +```py +print(result.document.inference.prediction.document_number.value) +``` + +## Document Sides +[📄](#page-level-fields "This field is only present on individual pages.")**document_side** ([ClassificationField](#classificationfield)): The sides of the document which are visible. + +#### Possible values include: + - RECTO + - VERSO + - RECTO & VERSO + +```py +for document_side_elem in result.document.document_side: + print(document_side_elem.value) +``` + +## Document Type +[📄](#page-level-fields "This field is only present on individual pages.")**document_type** ([ClassificationField](#classificationfield)): The document type or format. + +#### Possible values include: + - NEW + - OLD + +```py +for document_type_elem in result.document.document_type: + print(document_type_elem.value) +``` + +## Expiry Date +**expiry_date** ([DateField](#datefield)): The expiry date of the identification card. + +```py +print(result.document.inference.prediction.expiry_date.value) +``` + +## Gender +**gender** ([StringField](#stringfield)): The gender of the card holder. + +```py +print(result.document.inference.prediction.gender.value) +``` + +## Given Name(s) +**given_names** (List[[StringField](#stringfield)]): The given name(s) of the card holder. + +```py +for given_names_elem in result.document.inference.prediction.given_names: + print(given_names_elem.value) +``` + +## Date of Issue +**issue_date** ([DateField](#datefield)): The date of issue of the identification card. + +```py +print(result.document.inference.prediction.issue_date.value) +``` + +## Mrz Line 1 +**mrz1** ([StringField](#stringfield)): The Machine Readable Zone, first line. + +```py +print(result.document.inference.prediction.mrz1.value) +``` + +## Mrz Line 2 +**mrz2** ([StringField](#stringfield)): The Machine Readable Zone, second line. + +```py +print(result.document.inference.prediction.mrz2.value) +``` + +## Mrz Line 3 +**mrz3** ([StringField](#stringfield)): The Machine Readable Zone, third line. + +```py +print(result.document.inference.prediction.mrz3.value) +``` + +## Nationality +**nationality** ([StringField](#stringfield)): The nationality of the card holder. + +```py +print(result.document.inference.prediction.nationality.value) +``` + +## Surname +**surname** ([StringField](#stringfield)): The surname of the card holder. + +```py +print(result.document.inference.prediction.surname.value) +``` + +# Questions? +[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g)