From a3c4acf3c429830586fd748267c2cefb4a0e412a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ianar=C3=A9=20S=C3=A9vi?= Date: Tue, 26 Nov 2024 17:20:36 +0100 Subject: [PATCH] :sparkles: add french_healthcard, driver_license, payslip_fra v3 --- .../code_samples/driver_license_v1_async.txt | 16 ++ .../french_healthcard_v1_async.txt | 16 ++ .../code_samples/payslip_fra_v3_async.txt | 16 ++ docs/extras/guide/driver_license_v1.md | 186 ++++++++++++ docs/extras/guide/eu_driver_license_v1.md | 225 --------------- ...e_vitale_v1.md => french_healthcard_v1.md} | 49 ++-- .../{payslip_fra_v2.md => payslip_fra_v3.md} | 146 ++++++---- docs/extras/guide/proof_of_address_v1.md | 208 -------------- docs/extras/guide/us_driver_license_v1.md | 270 ------------------ docs/product/driver_license_v1.rst | 15 + docs/product/fr/health_card_v1.rst | 15 + docs/product/fr/payslip_v3.rst | 47 +++ docs/product/international_id_v1.rst | 15 - mindee/cli.py | 44 +-- mindee/product/__init__.py | 1 + mindee/product/driver_license/__init__.py | 4 + .../driver_license/driver_license_v1.py | 39 +++ .../driver_license_v1_document.py | 119 ++++++++ mindee/product/fr/__init__.py | 14 + mindee/product/fr/health_card/__init__.py | 2 + .../product/fr/health_card/health_card_v1.py | 39 +++ .../fr/health_card/health_card_v1_document.py | 59 ++++ mindee/product/fr/payslip/__init__.py | 12 + mindee/product/fr/payslip/payslip_v3.py | 39 +++ .../payslip/payslip_v3_bank_account_detail.py | 62 ++++ .../product/fr/payslip/payslip_v3_document.py | 152 ++++++++++ .../product/fr/payslip/payslip_v3_employee.py | 88 ++++++ .../product/fr/payslip/payslip_v3_employer.py | 86 ++++++ .../fr/payslip/payslip_v3_employment.py | 86 ++++++ .../fr/payslip/payslip_v3_paid_time_off.py | 89 ++++++ .../fr/payslip/payslip_v3_pay_detail.py | 115 ++++++++ .../fr/payslip/payslip_v3_pay_period.py | 74 +++++ .../fr/payslip/payslip_v3_salary_detail.py | 89 ++++++ .../indian_passport_v1_document.py | 2 +- mindee/product/resume/resume_v1_document.py | 2 +- tests/data | 2 +- tests/product/driver_license/__init__.py | 0 .../driver_license/test_driver_license_v1.py | 58 ++++ tests/product/fr/health_card/__init__.py | 0 .../fr/health_card/test_health_card_v1.py | 49 ++++ tests/product/fr/payslip/test_payslip_v3.py | 86 ++++++ 41 files changed, 1802 insertions(+), 834 deletions(-) create mode 100644 docs/extras/code_samples/driver_license_v1_async.txt create mode 100644 docs/extras/code_samples/french_healthcard_v1_async.txt create mode 100644 docs/extras/code_samples/payslip_fra_v3_async.txt create mode 100644 docs/extras/guide/driver_license_v1.md delete mode 100644 docs/extras/guide/eu_driver_license_v1.md rename docs/extras/guide/{carte_vitale_v1.md => french_healthcard_v1.md} (66%) rename docs/extras/guide/{payslip_fra_v2.md => payslip_fra_v3.md} (73%) delete mode 100644 docs/extras/guide/proof_of_address_v1.md delete mode 100644 docs/extras/guide/us_driver_license_v1.md create mode 100644 docs/product/driver_license_v1.rst create mode 100644 docs/product/fr/health_card_v1.rst create mode 100644 docs/product/fr/payslip_v3.rst delete mode 100644 docs/product/international_id_v1.rst create mode 100644 mindee/product/driver_license/__init__.py create mode 100644 mindee/product/driver_license/driver_license_v1.py create mode 100644 mindee/product/driver_license/driver_license_v1_document.py create mode 100644 mindee/product/fr/health_card/__init__.py create mode 100644 mindee/product/fr/health_card/health_card_v1.py create mode 100644 mindee/product/fr/health_card/health_card_v1_document.py create mode 100644 mindee/product/fr/payslip/payslip_v3.py create mode 100644 mindee/product/fr/payslip/payslip_v3_bank_account_detail.py create mode 100644 mindee/product/fr/payslip/payslip_v3_document.py create mode 100644 mindee/product/fr/payslip/payslip_v3_employee.py create mode 100644 mindee/product/fr/payslip/payslip_v3_employer.py create mode 100644 mindee/product/fr/payslip/payslip_v3_employment.py create mode 100644 mindee/product/fr/payslip/payslip_v3_paid_time_off.py create mode 100644 mindee/product/fr/payslip/payslip_v3_pay_detail.py create mode 100644 mindee/product/fr/payslip/payslip_v3_pay_period.py create mode 100644 mindee/product/fr/payslip/payslip_v3_salary_detail.py create mode 100644 tests/product/driver_license/__init__.py create mode 100644 tests/product/driver_license/test_driver_license_v1.py create mode 100644 tests/product/fr/health_card/__init__.py create mode 100644 tests/product/fr/health_card/test_health_card_v1.py create mode 100644 tests/product/fr/payslip/test_payslip_v3.py diff --git a/docs/extras/code_samples/driver_license_v1_async.txt b/docs/extras/code_samples/driver_license_v1_async.txt new file mode 100644 index 00000000..5a34e4af --- /dev/null +++ b/docs/extras/code_samples/driver_license_v1_async.txt @@ -0,0 +1,16 @@ +from mindee import Client, product, AsyncPredictResponse + +# Init a new client +mindee_client = Client(api_key="my-api-key") + +# Load a file from disk +input_doc = mindee_client.source_from_path("/path/to/the/file.ext") + +# Load a file from disk and enqueue it. +result: AsyncPredictResponse = mindee_client.enqueue_and_parse( + product.DriverLicenseV1, + input_doc, +) + +# Print a brief summary of the parsed data +print(result.document) diff --git a/docs/extras/code_samples/french_healthcard_v1_async.txt b/docs/extras/code_samples/french_healthcard_v1_async.txt new file mode 100644 index 00000000..12ff2b1f --- /dev/null +++ b/docs/extras/code_samples/french_healthcard_v1_async.txt @@ -0,0 +1,16 @@ +from mindee import Client, product, AsyncPredictResponse + +# Init a new client +mindee_client = Client(api_key="my-api-key") + +# Load a file from disk +input_doc = mindee_client.source_from_path("/path/to/the/file.ext") + +# Load a file from disk and enqueue it. +result: AsyncPredictResponse = mindee_client.enqueue_and_parse( + product.fr.HealthCardV1, + input_doc, +) + +# Print a brief summary of the parsed data +print(result.document) diff --git a/docs/extras/code_samples/payslip_fra_v3_async.txt b/docs/extras/code_samples/payslip_fra_v3_async.txt new file mode 100644 index 00000000..b226f5e3 --- /dev/null +++ b/docs/extras/code_samples/payslip_fra_v3_async.txt @@ -0,0 +1,16 @@ +from mindee import Client, product, AsyncPredictResponse + +# Init a new client +mindee_client = Client(api_key="my-api-key") + +# Load a file from disk +input_doc = mindee_client.source_from_path("/path/to/the/file.ext") + +# Load a file from disk and enqueue it. +result: AsyncPredictResponse = mindee_client.enqueue_and_parse( + product.fr.PayslipV3, + input_doc, +) + +# Print a brief summary of the parsed data +print(result.document) diff --git a/docs/extras/guide/driver_license_v1.md b/docs/extras/guide/driver_license_v1.md new file mode 100644 index 00000000..321e776b --- /dev/null +++ b/docs/extras/guide/driver_license_v1.md @@ -0,0 +1,186 @@ +--- +title: Driver License OCR Python +category: 622b805aaec68102ea7fcbc2 +slug: python-driver-license-ocr +parentDoc: 609808f773b0b90051d839de +--- +The Python OCR SDK supports the [Driver License API](https://platform.mindee.com/mindee/driver_license). + +Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/driver_license/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK. +![Driver License sample](https://github.com/mindee/client-lib-test-data/blob/main/products/driver_license/default_sample.jpg?raw=true) + +# Quick-Start +```py +from mindee import Client, product, AsyncPredictResponse + +# Init a new client +mindee_client = Client(api_key="my-api-key") + +# Load a file from disk +input_doc = mindee_client.source_from_path("/path/to/the/file.ext") + +# Load a file from disk and enqueue it. +result: AsyncPredictResponse = mindee_client.enqueue_and_parse( + product.DriverLicenseV1, + input_doc, +) + +# Print a brief summary of the parsed data +print(result.document) + +``` + +**Output (RST):** +```rst +######## +Document +######## +:Mindee ID: fbdeae38-ada3-43ac-aa58-e01a3d47e474 +:Filename: default_sample.jpg + +Inference +######### +:Product: mindee/driver_license v1.0 +:Rotation applied: Yes + +Prediction +========== +:Country Code: USA +:State: AZ +:ID: D12345678 +:Category: D +:Last Name: Sample +:First Name: Jelani +:Date of Birth: 1957-02-01 +:Place of Birth: +:Expiry Date: 2018-02-01 +:Issued Date: 2013-01-10 +:Issuing Authority: +:MRZ: +:DD Number: DD1234567890123456 +``` + +# Field Types +## Standard Fields +These fields are generic and used in several products. + +### BaseField +Each prediction object contains a set of fields that inherit from the generic `BaseField` class. +A typical `BaseField` object will have the following attributes: + +* **value** (`Union[float, str]`): corresponds to the field value. Can be `None` if no value was extracted. +* **confidence** (`float`): the confidence score of the field prediction. +* **bounding_box** (`[Point, Point, Point, Point]`): contains exactly 4 relative vertices (points) coordinates of a right rectangle containing the field in the document. +* **polygon** (`List[Point]`): contains the relative vertices coordinates (`Point`) of a polygon containing the field in the image. +* **page_id** (`int`): the ID of the page, always `None` when at document-level. +* **reconstructed** (`bool`): indicates whether an object was reconstructed (not extracted as the API gave it). + +> **Note:** A `Point` simply refers to a List of two numbers (`[float, float]`). + + +Aside from the previous attributes, all basic fields have access to a custom `__str__` method that can be used to print their value as a string. + +### DateField +Aside from the basic `BaseField` attributes, the date field `DateField` also implements the following: + +* **date_object** (`Date`): an accessible representation of the value as a python object. Can be `None`. + +### StringField +The text field `StringField` only has one constraint: its **value** is an `Optional[str]`. + +# Attributes +The following fields are extracted for Driver License V1: + +## Category +**category** ([StringField](#stringfield)): The category or class of the driver license. + +```py +print(result.document.inference.prediction.category.value) +``` + +## Country Code +**country_code** ([StringField](#stringfield)): The alpha-3 ISO 3166 code of the country where the driver license was issued. + +```py +print(result.document.inference.prediction.country_code.value) +``` + +## Date of Birth +**date_of_birth** ([DateField](#datefield)): The date of birth of the driver license holder. + +```py +print(result.document.inference.prediction.date_of_birth.value) +``` + +## DD Number +**dd_number** ([StringField](#stringfield)): The DD number of the driver license. + +```py +print(result.document.inference.prediction.dd_number.value) +``` + +## Expiry Date +**expiry_date** ([DateField](#datefield)): The expiry date of the driver license. + +```py +print(result.document.inference.prediction.expiry_date.value) +``` + +## First Name +**first_name** ([StringField](#stringfield)): The first name of the driver license holder. + +```py +print(result.document.inference.prediction.first_name.value) +``` + +## ID +**id** ([StringField](#stringfield)): The unique identifier of the driver license. + +```py +print(result.document.inference.prediction.id.value) +``` + +## Issued Date +**issued_date** ([DateField](#datefield)): The date when the driver license was issued. + +```py +print(result.document.inference.prediction.issued_date.value) +``` + +## Issuing Authority +**issuing_authority** ([StringField](#stringfield)): The authority that issued the driver license. + +```py +print(result.document.inference.prediction.issuing_authority.value) +``` + +## Last Name +**last_name** ([StringField](#stringfield)): The last name of the driver license holder. + +```py +print(result.document.inference.prediction.last_name.value) +``` + +## MRZ +**mrz** ([StringField](#stringfield)): The Machine Readable Zone (MRZ) of the driver license. + +```py +print(result.document.inference.prediction.mrz.value) +``` + +## Place of Birth +**place_of_birth** ([StringField](#stringfield)): The place of birth of the driver license holder. + +```py +print(result.document.inference.prediction.place_of_birth.value) +``` + +## State +**state** ([StringField](#stringfield)): Second part of the ISO 3166-2 code, consisting of two letters indicating the US State. + +```py +print(result.document.inference.prediction.state.value) +``` + +# Questions? +[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g) diff --git a/docs/extras/guide/eu_driver_license_v1.md b/docs/extras/guide/eu_driver_license_v1.md deleted file mode 100644 index a7533519..00000000 --- a/docs/extras/guide/eu_driver_license_v1.md +++ /dev/null @@ -1,225 +0,0 @@ ---- -title: EU Driver License OCR Python -category: 622b805aaec68102ea7fcbc2 -slug: python-eu-driver-license-ocr -parentDoc: 609808f773b0b90051d839de ---- -The Python OCR SDK supports the [Driver License API](https://platform.mindee.com/mindee/eu_driver_license). - -Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/eu_driver_license/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK. -![Driver License sample](https://github.com/mindee/client-lib-test-data/blob/main/products/eu_driver_license/default_sample.jpg?raw=true) - -# Quick-Start -```py -from mindee import Client, PredictResponse, product - -# Init a new client -mindee_client = Client(api_key="my-api-key") - -# Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") - -# Load a file from disk and parse it. -# The endpoint name must be specified since it cannot be determined from the class. -result: PredictResponse = mindee_client.parse(product.eu.DriverLicenseV1, input_doc) - -# Print a summary of the API result -print(result.document) - -# Print the document-level summary -# print(result.document.inference.prediction) - -``` - -**Output (RST):** -```rst -######## -Document -######## -:Mindee ID: b19cc32e-b3e6-4ff9-bdc7-619199355d54 -:Filename: default_sample.jpg - -Inference -######### -:Product: mindee/eu_driver_license v1.0 -:Rotation applied: Yes - -Prediction -========== -:Country Code: FR -:Document ID: 13AA00002 -:Driver License Category: AM A1 B1 B D BE DE -:Last Name: MARTIN -:First Name: PAUL -:Date Of Birth: 1981-07-14 -:Place Of Birth: Utopiacity -:Expiry Date: 2018-12-31 -:Issue Date: 2013-01-01 -:Issue Authority: 99999UpiaCity -:MRZ: D1FRA13AA000026181231MARTIN<<9 -:Address: - -Page Predictions -================ - -Page 0 ------- -:Photo: Polygon with 4 points. -:Signature: Polygon with 4 points. -:Country Code: FR -:Document ID: 13AA00002 -:Driver License Category: AM A1 B1 B D BE DE -:Last Name: MARTIN -:First Name: PAUL -:Date Of Birth: 1981-07-14 -:Place Of Birth: Utopiacity -:Expiry Date: 2018-12-31 -:Issue Date: 2013-01-01 -:Issue Authority: 99999UpiaCity -:MRZ: D1FRA13AA000026181231MARTIN<<9 -:Address: -``` - -# Field Types -## Standard Fields -These fields are generic and used in several products. - -### BaseField -Each prediction object contains a set of fields that inherit from the generic `BaseField` class. -A typical `BaseField` object will have the following attributes: - -* **value** (`Union[float, str]`): corresponds to the field value. Can be `None` if no value was extracted. -* **confidence** (`float`): the confidence score of the field prediction. -* **bounding_box** (`[Point, Point, Point, Point]`): contains exactly 4 relative vertices (points) coordinates of a right rectangle containing the field in the document. -* **polygon** (`List[Point]`): contains the relative vertices coordinates (`Point`) of a polygon containing the field in the image. -* **page_id** (`int`): the ID of the page, always `None` when at document-level. -* **reconstructed** (`bool`): indicates whether an object was reconstructed (not extracted as the API gave it). - -> **Note:** A `Point` simply refers to a List of two numbers (`[float, float]`). - - -Aside from the previous attributes, all basic fields have access to a custom `__str__` method that can be used to print their value as a string. - -### DateField -Aside from the basic `BaseField` attributes, the date field `DateField` also implements the following: - -* **date_object** (`Date`): an accessible representation of the value as a python object. Can be `None`. - - -### PositionField -The position field `PositionField` does not implement all the basic `BaseField` attributes, only **bounding_box**, **polygon** and **page_id**. On top of these, it has access to: - -* **rectangle** (`[Point, Point, Point, Point]`): a Polygon with four points that may be oriented (even beyond canvas). -* **quadrangle** (`[Point, Point, Point, Point]`): a free polygon made up of four points. - -### StringField -The text field `StringField` only has one constraint: its **value** is an `Optional[str]`. - -## Page-Level Fields -Some fields are constrained to the page level, and so will not be retrievable at document level. - -# Attributes -The following fields are extracted for Driver License V1: - -## Address -**address** ([StringField](#stringfield)): EU driver license holders address - -```py -print(result.document.inference.prediction.address.value) -``` - -## Driver License Category -**category** ([StringField](#stringfield)): EU driver license holders categories - -```py -print(result.document.inference.prediction.category.value) -``` - -## Country Code -**country_code** ([StringField](#stringfield)): Country code extracted as a string. - -```py -print(result.document.inference.prediction.country_code.value) -``` - -## Date Of Birth -**date_of_birth** ([DateField](#datefield)): The date of birth of the document holder - -```py -print(result.document.inference.prediction.date_of_birth.value) -``` - -## Document ID -**document_id** ([StringField](#stringfield)): ID number of the Document. - -```py -print(result.document.inference.prediction.document_id.value) -``` - -## Expiry Date -**expiry_date** ([DateField](#datefield)): Date the document expires - -```py -print(result.document.inference.prediction.expiry_date.value) -``` - -## First Name -**first_name** ([StringField](#stringfield)): First name(s) of the driver license holder - -```py -print(result.document.inference.prediction.first_name.value) -``` - -## Issue Authority -**issue_authority** ([StringField](#stringfield)): Authority that issued the document - -```py -print(result.document.inference.prediction.issue_authority.value) -``` - -## Issue Date -**issue_date** ([DateField](#datefield)): Date the document was issued - -```py -print(result.document.inference.prediction.issue_date.value) -``` - -## Last Name -**last_name** ([StringField](#stringfield)): Last name of the driver license holder. - -```py -print(result.document.inference.prediction.last_name.value) -``` - -## MRZ -**mrz** ([StringField](#stringfield)): Machine-readable license number - -```py -print(result.document.inference.prediction.mrz.value) -``` - -## Photo -[📄](#page-level-fields "This field is only present on individual pages.")**photo** ([PositionField](#positionfield)): Has a photo of the EU driver license holder - -```py -for photo_elem in result.document.photo: - print(photo_elem.polygon) -``` - -## Place Of Birth -**place_of_birth** ([StringField](#stringfield)): Place where the driver license holder was born - -```py -print(result.document.inference.prediction.place_of_birth.value) -``` - -## Signature -[📄](#page-level-fields "This field is only present on individual pages.")**signature** ([PositionField](#positionfield)): Has a signature of the EU driver license holder - -```py -for signature_elem in result.document.signature: - print(signature_elem.polygon) -``` - -# Questions? -[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g) diff --git a/docs/extras/guide/carte_vitale_v1.md b/docs/extras/guide/french_healthcard_v1.md similarity index 66% rename from docs/extras/guide/carte_vitale_v1.md rename to docs/extras/guide/french_healthcard_v1.md index 75375af2..62725e69 100644 --- a/docs/extras/guide/carte_vitale_v1.md +++ b/docs/extras/guide/french_healthcard_v1.md @@ -1,17 +1,17 @@ --- -title: FR Carte Vitale OCR Python +title: FR Health Card OCR Python category: 622b805aaec68102ea7fcbc2 -slug: python-fr-carte-vitale-ocr +slug: python-fr-health-card-ocr parentDoc: 609808f773b0b90051d839de --- -The Python OCR SDK supports the [Carte Vitale API](https://platform.mindee.com/mindee/carte_vitale). +The Python OCR SDK supports the [Health Card API](https://platform.mindee.com/mindee/french_healthcard). -Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/carte_vitale/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK. -![Carte Vitale sample](https://github.com/mindee/client-lib-test-data/blob/main/products/carte_vitale/default_sample.jpg?raw=true) +Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/french_healthcard/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK. +![Health Card sample](https://github.com/mindee/client-lib-test-data/blob/main/products/french_healthcard/default_sample.jpg?raw=true) # Quick-Start ```py -from mindee import Client, PredictResponse, product +from mindee import Client, product, AsyncPredictResponse # Init a new client mindee_client = Client(api_key="my-api-key") @@ -19,16 +19,15 @@ mindee_client = Client(api_key="my-api-key") # Load a file from disk input_doc = mindee_client.source_from_path("/path/to/the/file.ext") -# Load a file from disk and parse it. -# The endpoint name must be specified since it cannot be determined from the class. -result: PredictResponse = mindee_client.parse(product.fr.CarteVitaleV1, input_doc) +# Load a file from disk and enqueue it. +result: AsyncPredictResponse = mindee_client.enqueue_and_parse( + product.fr.HealthCardV1, + input_doc, +) -# Print a summary of the API result +# Print a brief summary of the parsed data print(result.document) -# Print the document-level summary -# print(result.document.inference.prediction) - ``` **Output (RST):** @@ -36,29 +35,19 @@ print(result.document) ######## Document ######## -:Mindee ID: 8c25cc63-212b-4537-9c9b-3fbd3bd0ee20 +:Mindee ID: 9ee2733d-933a-4dcd-a73a-a31395e3b288 :Filename: default_sample.jpg Inference ######### -:Product: mindee/carte_vitale v1.0 +:Product: mindee/french_healthcard v1.0 :Rotation applied: Yes Prediction ========== :Given Name(s): NATHALIE :Surname: DURAND -:Social Security Number: 269054958815780 -:Issuance Date: 2007-01-01 - -Page Predictions -================ - -Page 0 ------- -:Given Name(s): NATHALIE -:Surname: DURAND -:Social Security Number: 269054958815780 +:Social Security Number: 2 69 05 49 588 157 80 :Issuance Date: 2007-01-01 ``` @@ -91,10 +80,10 @@ Aside from the basic `BaseField` attributes, the date field `DateField` also imp The text field `StringField` only has one constraint: its **value** is an `Optional[str]`. # Attributes -The following fields are extracted for Carte Vitale V1: +The following fields are extracted for Health Card V1: ## Given Name(s) -**given_names** (List[[StringField](#stringfield)]): The given name(s) of the card holder. +**given_names** (List[[StringField](#stringfield)]): The given names of the card holder. ```py for given_names_elem in result.document.inference.prediction.given_names: @@ -102,14 +91,14 @@ for given_names_elem in result.document.inference.prediction.given_names: ``` ## Issuance Date -**issuance_date** ([DateField](#datefield)): The date the card was issued. +**issuance_date** ([DateField](#datefield)): The date when the carte vitale document was issued. ```py print(result.document.inference.prediction.issuance_date.value) ``` ## Social Security Number -**social_security** ([StringField](#stringfield)): The Social Security Number (Numéro de Sécurité Sociale) of the card holder +**social_security** ([StringField](#stringfield)): The social security number of the card holder. ```py print(result.document.inference.prediction.social_security.value) diff --git a/docs/extras/guide/payslip_fra_v2.md b/docs/extras/guide/payslip_fra_v3.md similarity index 73% rename from docs/extras/guide/payslip_fra_v2.md rename to docs/extras/guide/payslip_fra_v3.md index 670dcb95..b1c59ad5 100644 --- a/docs/extras/guide/payslip_fra_v2.md +++ b/docs/extras/guide/payslip_fra_v3.md @@ -21,7 +21,7 @@ input_doc = mindee_client.source_from_path("/path/to/the/file.ext") # Load a file from disk and enqueue it. result: AsyncPredictResponse = mindee_client.enqueue_and_parse( - product.fr.PayslipV2, + product.fr.PayslipV3, input_doc, ) @@ -35,16 +35,22 @@ print(result.document) ######## Document ######## -:Mindee ID: 972edba5-25aa-49d0-8431-e2557ddd788e +:Mindee ID: a479e3e7-6838-4e82-9a7d-99289f34ec7f :Filename: default_sample.jpg Inference ######### -:Product: mindee/payslip_fra v2.0 -:Rotation applied: No +:Product: mindee/payslip_fra v3.0 +:Rotation applied: Yes Prediction ========== +:Pay Period: + :End Date: 2023-03-31 + :Month: 03 + :Payment Date: 2023-03-29 + :Start Date: 2023-03-01 + :Year: 2023 :Employee: :Address: 52 RUE DES FLEURS 33500 LIBOURNE FRANCE :Date of Birth: @@ -67,21 +73,22 @@ Prediction :SWIFT: :Employment: :Category: Cadre - :Coefficient: 600.00 + :Coefficient: 600,000 :Collective Agreement: Construction -- Promotion :Job Title: Directeur Régional du Développement - :Position Level: + :Position Level: Niveau 5 Echelon 3 + :Seniority Date: :Start Date: 2022-05-01 :Salary Details: - +--------------+-----------+--------------------------------------+-----------+ - | Amount | Base | Description | Rate | - +==============+===========+======================================+===========+ - | 6666.67 | | Salaire de base | | - +--------------+-----------+--------------------------------------+-----------+ - | 9.30 | | Part patronale Mutuelle NR | | - +--------------+-----------+--------------------------------------+-----------+ - | 508.30 | | Avantages en nature voiture | | - +--------------+-----------+--------------------------------------+-----------+ + +--------------+-----------+--------------------------------------+--------+-----------+ + | Amount | Base | Description | Number | Rate | + +==============+===========+======================================+========+===========+ + | 6666.67 | | Salaire de base | | | + +--------------+-----------+--------------------------------------+--------+-----------+ + | 9.30 | | Part patronale Mutuelle NR | | | + +--------------+-----------+--------------------------------------+--------+-----------+ + | 508.30 | | Avantages en nature voiture | | | + +--------------+-----------+--------------------------------------+--------+-----------+ :Pay Detail: :Gross Salary: 7184.27 :Gross Salary YTD: 18074.81 @@ -93,16 +100,16 @@ Prediction :Net Taxable YTD: 14752.73 :Total Cost Employer: 10486.94 :Total Taxes and Deductions: 1650.36 -:PTO: - :Accrued This Period: 6.17 - :Balance End of Period: 6.17 - :Used This Period: -:Pay Period: - :End Date: 2023-03-31 - :Month: 03 - :Payment Date: 2023-03-29 - :Start Date: 2023-03-01 - :Year: 2023 +:Paid Time Off: + +-----------+--------+-------------+-----------+-----------+ + | Accrued | Period | Type | Remaining | Used | + +===========+========+=============+===========+===========+ + | | N-1 | VACATION | | | + +-----------+--------+-------------+-----------+-----------+ + | 6.17 | N | VACATION | 6.17 | | + +-----------+--------+-------------+-----------+-----------+ + | 2.01 | N | RTT | 2.01 | | + +-----------+--------+-------------+-----------+-----------+ ``` # Field Types @@ -131,7 +138,7 @@ Fields which are specific to this product; they are not used in any other produc ### Bank Account Details Field Information about the employee's bank account. -A `PayslipV2BankAccountDetail` implements the following attributes: +A `PayslipV3BankAccountDetail` implements the following attributes: * **bank_name** (`str`): The name of the bank. * **iban** (`str`): The IBAN of the bank account. @@ -141,7 +148,7 @@ Fields which are specific to this product; they are not used in any other produc ### Employee Field Information about the employee. -A `PayslipV2Employee` implements the following attributes: +A `PayslipV3Employee` implements the following attributes: * **address** (`str`): The address of the employee. * **date_of_birth** (`str`): The date of birth of the employee. @@ -155,7 +162,7 @@ Fields which are specific to this product; they are not used in any other produc ### Employer Field Information about the employer. -A `PayslipV2Employer` implements the following attributes: +A `PayslipV3Employer` implements the following attributes: * **address** (`str`): The address of the employer. * **company_id** (`str`): The company ID of the employer. @@ -169,20 +176,45 @@ Fields which are specific to this product; they are not used in any other produc ### Employment Field Information about the employment. -A `PayslipV2Employment` implements the following attributes: +A `PayslipV3Employment` implements the following attributes: * **category** (`str`): The category of the employment. -* **coefficient** (`float`): The coefficient of the employment. +* **coefficient** (`str`): The coefficient of the employment. * **collective_agreement** (`str`): The collective agreement of the employment. * **job_title** (`str`): The job title of the employee. * **position_level** (`str`): The position level of the employment. +* **seniority_date** (`str`): The seniority date of the employment. * **start_date** (`str`): The start date of the employment. Fields which are specific to this product; they are not used in any other product. +### Paid Time Off Field +Information about paid time off. + +A `PayslipV3PaidTimeOff` implements the following attributes: + +* **accrued** (`float`): The amount of paid time off accrued in the period. +* **period** (`str`): The paid time off period. + +#### Possible values include: + - N + - N-1 + - N-2 + +* **pto_type** (`str`): The type of paid time off. + +#### Possible values include: + - VACATION + - RTT + - COMPENSATORY + +* **remaining** (`float`): The remaining amount of paid time off at the end of the period. +* **used** (`float`): The amount of paid time off used in the period. +Fields which are specific to this product; they are not used in any other product. + ### Pay Detail Field Detailed information about the pay. -A `PayslipV2PayDetail` implements the following attributes: +A `PayslipV3PayDetail` implements the following attributes: * **gross_salary** (`float`): The gross salary of the employee. * **gross_salary_ytd** (`float`): The year-to-date gross salary of the employee. @@ -199,7 +231,7 @@ Fields which are specific to this product; they are not used in any other produc ### Pay Period Field Information about the pay period. -A `PayslipV2PayPeriod` implements the following attributes: +A `PayslipV3PayPeriod` implements the following attributes: * **end_date** (`str`): The end date of the pay period. * **month** (`str`): The month of the pay period. @@ -208,80 +240,72 @@ A `PayslipV2PayPeriod` implements the following attributes: * **year** (`str`): The year of the pay period. Fields which are specific to this product; they are not used in any other product. -### PTO Field -Information about paid time off. - -A `PayslipV2Pto` implements the following attributes: - -* **accrued_this_period** (`float`): The amount of paid time off accrued in this period. -* **balance_end_of_period** (`float`): The balance of paid time off at the end of the period. -* **used_this_period** (`float`): The amount of paid time off used in this period. -Fields which are specific to this product; they are not used in any other product. - ### Salary Details Field Detailed information about the earnings. -A `PayslipV2SalaryDetail` implements the following attributes: +A `PayslipV3SalaryDetail` implements the following attributes: -* **amount** (`float`): The amount of the earnings. -* **base** (`float`): The base value of the earnings. +* **amount** (`float`): The amount of the earning. +* **base** (`float`): The base rate value of the earning. * **description** (`str`): The description of the earnings. -* **rate** (`float`): The rate of the earnings. +* **number** (`float`): The number of units in the earning. +* **rate** (`float`): The rate of the earning. # Attributes -The following fields are extracted for Payslip V2: +The following fields are extracted for Payslip V3: ## Bank Account Details -**bank_account_details** ([PayslipV2BankAccountDetail](#bank-account-details-field)): Information about the employee's bank account. +**bank_account_details** ([PayslipV3BankAccountDetail](#bank-account-details-field)): Information about the employee's bank account. ```py print(result.document.inference.prediction.bank_account_details.value) ``` ## Employee -**employee** ([PayslipV2Employee](#employee-field)): Information about the employee. +**employee** ([PayslipV3Employee](#employee-field)): Information about the employee. ```py print(result.document.inference.prediction.employee.value) ``` ## Employer -**employer** ([PayslipV2Employer](#employer-field)): Information about the employer. +**employer** ([PayslipV3Employer](#employer-field)): Information about the employer. ```py print(result.document.inference.prediction.employer.value) ``` ## Employment -**employment** ([PayslipV2Employment](#employment-field)): Information about the employment. +**employment** ([PayslipV3Employment](#employment-field)): Information about the employment. ```py print(result.document.inference.prediction.employment.value) ``` -## Pay Detail -**pay_detail** ([PayslipV2PayDetail](#pay-detail-field)): Detailed information about the pay. +## Paid Time Off +**paid_time_off** (List[[PayslipV3PaidTimeOff](#paid-time-off-field)]): Information about paid time off. ```py -print(result.document.inference.prediction.pay_detail.value) +for paid_time_off_elem in result.document.inference.prediction.paid_time_off: + print(paid_time_off_elem.value) ``` -## Pay Period -**pay_period** ([PayslipV2PayPeriod](#pay-period-field)): Information about the pay period. +## Pay Detail +**pay_detail** ([PayslipV3PayDetail](#pay-detail-field)): Detailed information about the pay. ```py -print(result.document.inference.prediction.pay_period.value) +print(result.document.inference.prediction.pay_detail.value) ``` -## PTO -**pto** ([PayslipV2Pto](#pto-field)): Information about paid time off. +## Pay Period +**pay_period** ([PayslipV3PayPeriod](#pay-period-field)): Information about the pay period. ```py -print(result.document.inference.prediction.pto.value) +print(result.document.inference.prediction.pay_period.value) ``` ## Salary Details -**salary_details** (List[[PayslipV2SalaryDetail](#salary-details-field)]): Detailed information about the earnings. +**salary_details** (List[[PayslipV3SalaryDetail](#salary-details-field)]): Detailed information about the earnings. ```py for salary_details_elem in result.document.inference.prediction.salary_details: diff --git a/docs/extras/guide/proof_of_address_v1.md b/docs/extras/guide/proof_of_address_v1.md deleted file mode 100644 index 8099d5b2..00000000 --- a/docs/extras/guide/proof_of_address_v1.md +++ /dev/null @@ -1,208 +0,0 @@ ---- -title: Proof of Address OCR Python -category: 622b805aaec68102ea7fcbc2 -slug: python-proof-of-address-ocr -parentDoc: 609808f773b0b90051d839de ---- -The Python OCR SDK supports the [Proof of Address API](https://platform.mindee.com/mindee/proof_of_address). - -Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/proof_of_address/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK. -![Proof of Address sample](https://github.com/mindee/client-lib-test-data/blob/main/products/proof_of_address/default_sample.jpg?raw=true) - -# Quick-Start -```py -from mindee import Client, PredictResponse, product - -# Init a new client -mindee_client = Client(api_key="my-api-key") - -# Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") - -# Load a file from disk and parse it. -# The endpoint name must be specified since it cannot be determined from the class. -result: PredictResponse = mindee_client.parse(product.ProofOfAddressV1, input_doc) - -# Print a summary of the API result -print(result.document) - -# Print the document-level summary -# print(result.document.inference.prediction) - -``` - -**Output (RST):** -```rst -######## -Document -######## -:Mindee ID: 5d2361e9-405e-4fc1-8531-f92a3aef0c38 -:Filename: default_sample.jpg - -Inference -######### -:Product: mindee/proof_of_address v1.1 -:Rotation applied: Yes - -Prediction -========== -:Locale: en; en; USD; -:Issuer Name: PPL ELECTRIC UTILITIES -:Issuer Company Registrations: -:Issuer Address: 2 NORTH 9TH STREET CPC-GENN1 ALLENTOWN.PA 18101-1175 -:Recipient Name: -:Recipient Company Registrations: -:Recipient Address: 123 MAIN ST ANYTOWN,PA 18062 -:Dates: 2011-07-27 - 2011-07-06 - 2011-08-03 - 2011-07-27 - 2011-06-01 - 2011-07-01 - 2010-07-01 - 2010-08-01 - 2011-07-01 - 2009-08-01 - 2010-07-01 - 2011-07-27 -:Date of Issue: 2011-07-27 - -Page Predictions -================ - -Page 0 ------- -:Locale: en; en; USD; -:Issuer Name: PPL ELECTRIC UTILITIES -:Issuer Company Registrations: -:Issuer Address: 2 NORTH 9TH STREET CPC-GENN1 ALLENTOWN.PA 18101-1175 -:Recipient Name: -:Recipient Company Registrations: -:Recipient Address: 123 MAIN ST ANYTOWN,PA 18062 -:Dates: 2011-07-27 - 2011-07-06 - 2011-08-03 - 2011-07-27 - 2011-06-01 - 2011-07-01 - 2010-07-01 - 2010-08-01 - 2011-07-01 - 2009-08-01 - 2010-07-01 - 2011-07-27 -:Date of Issue: 2011-07-27 -``` - -# Field Types -## Standard Fields -These fields are generic and used in several products. - -### BaseField -Each prediction object contains a set of fields that inherit from the generic `BaseField` class. -A typical `BaseField` object will have the following attributes: - -* **value** (`Union[float, str]`): corresponds to the field value. Can be `None` if no value was extracted. -* **confidence** (`float`): the confidence score of the field prediction. -* **bounding_box** (`[Point, Point, Point, Point]`): contains exactly 4 relative vertices (points) coordinates of a right rectangle containing the field in the document. -* **polygon** (`List[Point]`): contains the relative vertices coordinates (`Point`) of a polygon containing the field in the image. -* **page_id** (`int`): the ID of the page, always `None` when at document-level. -* **reconstructed** (`bool`): indicates whether an object was reconstructed (not extracted as the API gave it). - -> **Note:** A `Point` simply refers to a List of two numbers (`[float, float]`). - - -Aside from the previous attributes, all basic fields have access to a custom `__str__` method that can be used to print their value as a string. - - -### CompanyRegistrationField -Aside from the basic `BaseField` attributes, the company registration field `CompanyRegistrationField` also implements the following: - -* **type** (`str`): the type of company. - -### DateField -Aside from the basic `BaseField` attributes, the date field `DateField` also implements the following: - -* **date_object** (`Date`): an accessible representation of the value as a python object. Can be `None`. - -### LocaleField -The locale field `LocaleField` only implements the **value**, **confidence** and **page_id** base `BaseField` attributes, but it comes with its own: - -* **language** (`str`): ISO 639-1 language code (e.g.: `en` for English). Can be `None`. -* **country** (`str`): ISO 3166-1 alpha-2 or ISO 3166-1 alpha-3 code for countries (e.g.: `GRB` or `GB` for "Great Britain"). Can be `None`. -* **currency** (`str`): ISO 4217 code for currencies (e.g.: `USD` for "US Dollars"). Can be `None`. - -### StringField -The text field `StringField` only has one constraint: its **value** is an `Optional[str]`. - -# Attributes -The following fields are extracted for Proof of Address V1: - -## Date of Issue -**date** ([DateField](#datefield)): The date the document was issued. - -```py -print(result.document.inference.prediction.date.value) -``` - -## Dates -**dates** (List[[DateField](#datefield)]): List of dates found on the document. - -```py -for dates_elem in result.document.inference.prediction.dates: - print(dates_elem.value) -``` - -## Issuer Address -**issuer_address** ([StringField](#stringfield)): The address of the document's issuer. - -```py -print(result.document.inference.prediction.issuer_address.value) -``` - -## Issuer Company Registrations -**issuer_company_registration** (List[[CompanyRegistrationField](#companyregistrationfield)]): List of company registrations found for the issuer. - -```py -for issuer_company_registration_elem in result.document.inference.prediction.issuer_company_registration: - print(issuer_company_registration_elem.value) -``` - -## Issuer Name -**issuer_name** ([StringField](#stringfield)): The name of the person or company issuing the document. - -```py -print(result.document.inference.prediction.issuer_name.value) -``` - -## Locale -**locale** ([LocaleField](#localefield)): The locale detected on the document. - -```py -print(result.document.inference.prediction.locale.value) -``` - -## Recipient Address -**recipient_address** ([StringField](#stringfield)): The address of the recipient. - -```py -print(result.document.inference.prediction.recipient_address.value) -``` - -## Recipient Company Registrations -**recipient_company_registration** (List[[CompanyRegistrationField](#companyregistrationfield)]): List of company registrations found for the recipient. - -```py -for recipient_company_registration_elem in result.document.inference.prediction.recipient_company_registration: - print(recipient_company_registration_elem.value) -``` - -## Recipient Name -**recipient_name** ([StringField](#stringfield)): The name of the person or company receiving the document. - -```py -print(result.document.inference.prediction.recipient_name.value) -``` - -# Questions? -[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g) diff --git a/docs/extras/guide/us_driver_license_v1.md b/docs/extras/guide/us_driver_license_v1.md deleted file mode 100644 index fb1f7d5b..00000000 --- a/docs/extras/guide/us_driver_license_v1.md +++ /dev/null @@ -1,270 +0,0 @@ ---- -title: US Driver License OCR Python -category: 622b805aaec68102ea7fcbc2 -slug: python-us-driver-license-ocr -parentDoc: 609808f773b0b90051d839de ---- -The Python OCR SDK supports the [Driver License API](https://platform.mindee.com/mindee/us_driver_license). - -Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/us_driver_license/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK. -![Driver License sample](https://github.com/mindee/client-lib-test-data/blob/main/products/us_driver_license/default_sample.jpg?raw=true) - -# Quick-Start -```py -from mindee import Client, PredictResponse, product - -# Init a new client -mindee_client = Client(api_key="my-api-key") - -# Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") - -# Load a file from disk and parse it. -# The endpoint name must be specified since it cannot be determined from the class. -result: PredictResponse = mindee_client.parse(product.us.DriverLicenseV1, input_doc) - -# Print a summary of the API result -print(result.document) - -# Print the document-level summary -# print(result.document.inference.prediction) - -``` - -**Output (RST):** -```rst -######## -Document -######## -:Mindee ID: bf70068d-d3d6-49dc-b93a-b4b7d156fc3d -:Filename: default_sample.jpg - -Inference -######### -:Product: mindee/us_driver_license v1.0 -:Rotation applied: Yes - -Prediction -========== -:State: AZ -:Driver License ID: D12345678 -:Expiry Date: 2018-02-01 -:Date Of Issue: 2013-01-10 -:Last Name: SAMPLE -:First Name: JELANI -:Address: 123 MAIN STREET PHOENIX AZ 85007 -:Date Of Birth: 1957-02-01 -:Restrictions: NONE -:Endorsements: NONE -:Driver License Class: D -:Sex: M -:Height: 5-08 -:Weight: 185 -:Hair Color: BRO -:Eye Color: BRO -:Document Discriminator: 1234567890123456 - -Page Predictions -================ - -Page 0 ------- -:Photo: Polygon with 4 points. -:Signature: Polygon with 4 points. -:State: AZ -:Driver License ID: D12345678 -:Expiry Date: 2018-02-01 -:Date Of Issue: 2013-01-10 -:Last Name: SAMPLE -:First Name: JELANI -:Address: 123 MAIN STREET PHOENIX AZ 85007 -:Date Of Birth: 1957-02-01 -:Restrictions: NONE -:Endorsements: NONE -:Driver License Class: D -:Sex: M -:Height: 5-08 -:Weight: 185 -:Hair Color: BRO -:Eye Color: BRO -:Document Discriminator: 1234567890123456 -``` - -# Field Types -## Standard Fields -These fields are generic and used in several products. - -### BaseField -Each prediction object contains a set of fields that inherit from the generic `BaseField` class. -A typical `BaseField` object will have the following attributes: - -* **value** (`Union[float, str]`): corresponds to the field value. Can be `None` if no value was extracted. -* **confidence** (`float`): the confidence score of the field prediction. -* **bounding_box** (`[Point, Point, Point, Point]`): contains exactly 4 relative vertices (points) coordinates of a right rectangle containing the field in the document. -* **polygon** (`List[Point]`): contains the relative vertices coordinates (`Point`) of a polygon containing the field in the image. -* **page_id** (`int`): the ID of the page, always `None` when at document-level. -* **reconstructed** (`bool`): indicates whether an object was reconstructed (not extracted as the API gave it). - -> **Note:** A `Point` simply refers to a List of two numbers (`[float, float]`). - - -Aside from the previous attributes, all basic fields have access to a custom `__str__` method that can be used to print their value as a string. - -### DateField -Aside from the basic `BaseField` attributes, the date field `DateField` also implements the following: - -* **date_object** (`Date`): an accessible representation of the value as a python object. Can be `None`. - - -### PositionField -The position field `PositionField` does not implement all the basic `BaseField` attributes, only **bounding_box**, **polygon** and **page_id**. On top of these, it has access to: - -* **rectangle** (`[Point, Point, Point, Point]`): a Polygon with four points that may be oriented (even beyond canvas). -* **quadrangle** (`[Point, Point, Point, Point]`): a free polygon made up of four points. - -### StringField -The text field `StringField` only has one constraint: its **value** is an `Optional[str]`. - -## Page-Level Fields -Some fields are constrained to the page level, and so will not be retrievable at document level. - -# Attributes -The following fields are extracted for Driver License V1: - -## Address -**address** ([StringField](#stringfield)): US driver license holders address - -```py -print(result.document.inference.prediction.address.value) -``` - -## Date Of Birth -**date_of_birth** ([DateField](#datefield)): US driver license holders date of birth - -```py -print(result.document.inference.prediction.date_of_birth.value) -``` - -## Document Discriminator -**dd_number** ([StringField](#stringfield)): Document Discriminator Number of the US Driver License - -```py -print(result.document.inference.prediction.dd_number.value) -``` - -## Driver License Class -**dl_class** ([StringField](#stringfield)): US driver license holders class - -```py -print(result.document.inference.prediction.dl_class.value) -``` - -## Driver License ID -**driver_license_id** ([StringField](#stringfield)): ID number of the US Driver License. - -```py -print(result.document.inference.prediction.driver_license_id.value) -``` - -## Endorsements -**endorsements** ([StringField](#stringfield)): US driver license holders endorsements - -```py -print(result.document.inference.prediction.endorsements.value) -``` - -## Expiry Date -**expiry_date** ([DateField](#datefield)): Date on which the documents expires. - -```py -print(result.document.inference.prediction.expiry_date.value) -``` - -## Eye Color -**eye_color** ([StringField](#stringfield)): US driver license holders eye colour - -```py -print(result.document.inference.prediction.eye_color.value) -``` - -## First Name -**first_name** ([StringField](#stringfield)): US driver license holders first name(s) - -```py -print(result.document.inference.prediction.first_name.value) -``` - -## Hair Color -**hair_color** ([StringField](#stringfield)): US driver license holders hair colour - -```py -print(result.document.inference.prediction.hair_color.value) -``` - -## Height -**height** ([StringField](#stringfield)): US driver license holders hight - -```py -print(result.document.inference.prediction.height.value) -``` - -## Date Of Issue -**issued_date** ([DateField](#datefield)): Date on which the documents was issued. - -```py -print(result.document.inference.prediction.issued_date.value) -``` - -## Last Name -**last_name** ([StringField](#stringfield)): US driver license holders last name - -```py -print(result.document.inference.prediction.last_name.value) -``` - -## Photo -[📄](#page-level-fields "This field is only present on individual pages.")**photo** ([PositionField](#positionfield)): Has a photo of the US driver license holder - -```py -for photo_elem in result.document.photo: - print(photo_elem.polygon) -``` - -## Restrictions -**restrictions** ([StringField](#stringfield)): US driver license holders restrictions - -```py -print(result.document.inference.prediction.restrictions.value) -``` - -## Sex -**sex** ([StringField](#stringfield)): US driver license holders gender - -```py -print(result.document.inference.prediction.sex.value) -``` - -## Signature -[📄](#page-level-fields "This field is only present on individual pages.")**signature** ([PositionField](#positionfield)): Has a signature of the US driver license holder - -```py -for signature_elem in result.document.signature: - print(signature_elem.polygon) -``` - -## State -**state** ([StringField](#stringfield)): US State - -```py -print(result.document.inference.prediction.state.value) -``` - -## Weight -**weight** ([StringField](#stringfield)): US driver license holders weight - -```py -print(result.document.inference.prediction.weight.value) -``` - -# Questions? -[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g) diff --git a/docs/product/driver_license_v1.rst b/docs/product/driver_license_v1.rst new file mode 100644 index 00000000..d3eed17f --- /dev/null +++ b/docs/product/driver_license_v1.rst @@ -0,0 +1,15 @@ +Driver License V1 +----------------- + +**Sample Code:** + +.. literalinclude:: /extras/code_samples/driver_license_v1.txt + :language: Python + +.. autoclass:: mindee.product.driver_license.driver_license_v1.DriverLicenseV1 + :members: + :inherited-members: + +.. autoclass:: mindee.product.driver_license.driver_license_v1_document.DriverLicenseV1Document + :members: + :inherited-members: diff --git a/docs/product/fr/health_card_v1.rst b/docs/product/fr/health_card_v1.rst new file mode 100644 index 00000000..dceb3d4f --- /dev/null +++ b/docs/product/fr/health_card_v1.rst @@ -0,0 +1,15 @@ +Health Card V1 +-------------- + +**Sample Code:** + +.. literalinclude:: /extras/code_samples/french_healthcard_v1.txt + :language: Python + +.. autoclass:: mindee.product.fr.health_card.health_card_v1.HealthCardV1 + :members: + :inherited-members: + +.. autoclass:: mindee.product.fr.health_card.health_card_v1_document.HealthCardV1Document + :members: + :inherited-members: diff --git a/docs/product/fr/payslip_v3.rst b/docs/product/fr/payslip_v3.rst new file mode 100644 index 00000000..fa27c4f1 --- /dev/null +++ b/docs/product/fr/payslip_v3.rst @@ -0,0 +1,47 @@ +Payslip V3 +---------- + +**Sample Code:** + +.. literalinclude:: /extras/code_samples/payslip_fra_v3.txt + :language: Python + +.. autoclass:: mindee.product.fr.payslip.payslip_v3.PayslipV3 + :members: + :inherited-members: + +.. autoclass:: mindee.product.fr.payslip.payslip_v3_document.PayslipV3Document + :members: + :inherited-members: + +.. autoclass:: mindee.product.fr.payslip.payslip_v3_pay_period.PayslipV3PayPeriod + :members: + :inherited-members: + +.. autoclass:: mindee.product.fr.payslip.payslip_v3_employee.PayslipV3Employee + :members: + :inherited-members: + +.. autoclass:: mindee.product.fr.payslip.payslip_v3_employer.PayslipV3Employer + :members: + :inherited-members: + +.. autoclass:: mindee.product.fr.payslip.payslip_v3_bank_account_detail.PayslipV3BankAccountDetail + :members: + :inherited-members: + +.. autoclass:: mindee.product.fr.payslip.payslip_v3_employment.PayslipV3Employment + :members: + :inherited-members: + +.. autoclass:: mindee.product.fr.payslip.payslip_v3_salary_detail.PayslipV3SalaryDetail + :members: + :inherited-members: + +.. autoclass:: mindee.product.fr.payslip.payslip_v3_pay_detail.PayslipV3PayDetail + :members: + :inherited-members: + +.. autoclass:: mindee.product.fr.payslip.payslip_v3_paid_time_off.PayslipV3PaidTimeOff + :members: + :inherited-members: diff --git a/docs/product/international_id_v1.rst b/docs/product/international_id_v1.rst deleted file mode 100644 index 2d37bb8f..00000000 --- a/docs/product/international_id_v1.rst +++ /dev/null @@ -1,15 +0,0 @@ -International ID V1 -------------------- - -**Sample Code:** - -.. literalinclude:: /extras/code_samples/international_id_v1.txt - :language: Python - -.. autoclass:: mindee.product.international_id.international_id_v1.InternationalIdV1 - :members: - :inherited-members: - -.. autoclass:: mindee.product.international_id.international_id_v1_document.InternationalIdV1Document - :members: - :inherited-members: diff --git a/mindee/cli.py b/mindee/cli.py index 45d2d848..500a8839 100644 --- a/mindee/cli.py +++ b/mindee/cli.py @@ -27,6 +27,12 @@ class CommandConfig(Generic[TypeInference]): DOCUMENTS: Dict[str, CommandConfig] = { + # "address-proof": CommandConfig( + # help="Address Proof", + # doc_class=product.AddressProofV1, + # is_sync=False, + # is_async=True, + # ), "barcode-reader": CommandConfig( help="Barcode-reader tool", doc_class=product.BarcodeReaderV1, @@ -51,11 +57,11 @@ class CommandConfig(Generic[TypeInference]): is_sync=True, is_async=False, ), - "eu-driver-license": CommandConfig( - help="EU Driver License", - doc_class=product.eu.DriverLicenseV1, - is_sync=True, - is_async=False, + "driver-license": CommandConfig( + help="Driver License", + doc_class=product.DriverLicenseV1, + is_sync=False, + is_async=True, ), "financial-document": CommandConfig( help="Financial Document (receipt or invoice)", @@ -75,11 +81,11 @@ class CommandConfig(Generic[TypeInference]): is_sync=True, is_async=False, ), - "fr-carte-vitale": CommandConfig( - help="FR Carte Vitale", - doc_class=product.fr.CarteVitaleV1, - is_sync=True, - is_async=False, + "fr-health-card": CommandConfig( + help="FR Health Card", + doc_class=product.fr.HealthCardV1, + is_sync=False, + is_async=True, ), "fr-id-card": CommandConfig( help="FR ID Card", @@ -87,6 +93,12 @@ class CommandConfig(Generic[TypeInference]): is_sync=True, is_async=False, ), + "fr-payslip": CommandConfig( + help="FR Payslip", + doc_class=product.fr.PayslipV3, + is_sync=False, + is_async=True, + ), "fr-petrol-receipt": CommandConfig( help="FR Petrol Receipt", doc_class=product.fr.PetrolReceiptV1, @@ -135,12 +147,6 @@ class CommandConfig(Generic[TypeInference]): is_sync=True, is_async=False, ), - "proof-of-address": CommandConfig( - help="Proof of Address", - doc_class=product.ProofOfAddressV1, - is_sync=True, - is_async=False, - ), "receipt": CommandConfig( help="Expense Receipt", doc_class=product.ReceiptV5, @@ -159,12 +165,6 @@ class CommandConfig(Generic[TypeInference]): is_sync=True, is_async=False, ), - "us-driver-license": CommandConfig( - help="US Driver License", - doc_class=product.us.DriverLicenseV1, - is_sync=True, - is_async=False, - ), "us-healthcare-card": CommandConfig( help="US Healthcare Card", doc_class=product.us.HealthcareCardV1, diff --git a/mindee/product/__init__.py b/mindee/product/__init__.py index 2ffcd5f7..84b61c23 100644 --- a/mindee/product/__init__.py +++ b/mindee/product/__init__.py @@ -13,6 +13,7 @@ from mindee.product.cropper import CropperV1, CropperV1Document from mindee.product.custom import CustomV1, CustomV1Document, CustomV1Page from mindee.product.delivery_note import DeliveryNoteV1, DeliveryNoteV1Document +from mindee.product.driver_license import DriverLicenseV1, DriverLicenseV1Document from mindee.product.financial_document import ( FinancialDocumentV1, FinancialDocumentV1Document, diff --git a/mindee/product/driver_license/__init__.py b/mindee/product/driver_license/__init__.py new file mode 100644 index 00000000..027a0e0a --- /dev/null +++ b/mindee/product/driver_license/__init__.py @@ -0,0 +1,4 @@ +from mindee.product.driver_license.driver_license_v1 import DriverLicenseV1 +from mindee.product.driver_license.driver_license_v1_document import ( + DriverLicenseV1Document, +) diff --git a/mindee/product/driver_license/driver_license_v1.py b/mindee/product/driver_license/driver_license_v1.py new file mode 100644 index 00000000..2e75992b --- /dev/null +++ b/mindee/product/driver_license/driver_license_v1.py @@ -0,0 +1,39 @@ +from typing import List + +from mindee.parsing.common.inference import Inference +from mindee.parsing.common.page import Page +from mindee.parsing.common.string_dict import StringDict +from mindee.product.driver_license.driver_license_v1_document import ( + DriverLicenseV1Document, +) + + +class DriverLicenseV1(Inference): + """Driver License API version 1 inference prediction.""" + + prediction: DriverLicenseV1Document + """Document-level prediction.""" + pages: List[Page[DriverLicenseV1Document]] + """Page-level prediction(s).""" + endpoint_name = "driver_license" + """Name of the endpoint.""" + endpoint_version = "1" + """Version of the endpoint.""" + + def __init__(self, raw_prediction: StringDict): + """ + Driver License v1 inference. + + :param raw_prediction: Raw prediction from the HTTP response. + """ + super().__init__(raw_prediction) + + self.prediction = DriverLicenseV1Document(raw_prediction["prediction"]) + self.pages = [] + for page in raw_prediction["pages"]: + try: + page_prediction = page["prediction"] + except KeyError: + continue + if page_prediction: + self.pages.append(Page(DriverLicenseV1Document, page)) diff --git a/mindee/product/driver_license/driver_license_v1_document.py b/mindee/product/driver_license/driver_license_v1_document.py new file mode 100644 index 00000000..dda62a9a --- /dev/null +++ b/mindee/product/driver_license/driver_license_v1_document.py @@ -0,0 +1,119 @@ +from typing import Optional + +from mindee.parsing.common.prediction import Prediction +from mindee.parsing.common.string_dict import StringDict +from mindee.parsing.common.summary_helper import clean_out_string +from mindee.parsing.standard.date import DateField +from mindee.parsing.standard.text import StringField + + +class DriverLicenseV1Document(Prediction): + """Driver License API version 1.0 document data.""" + + category: StringField + """The category or class of the driver license.""" + country_code: StringField + """The alpha-3 ISO 3166 code of the country where the driver license was issued.""" + date_of_birth: DateField + """The date of birth of the driver license holder.""" + dd_number: StringField + """The DD number of the driver license.""" + expiry_date: DateField + """The expiry date of the driver license.""" + first_name: StringField + """The first name of the driver license holder.""" + id: StringField + """The unique identifier of the driver license.""" + issued_date: DateField + """The date when the driver license was issued.""" + issuing_authority: StringField + """The authority that issued the driver license.""" + last_name: StringField + """The last name of the driver license holder.""" + mrz: StringField + """The Machine Readable Zone (MRZ) of the driver license.""" + place_of_birth: StringField + """The place of birth of the driver license holder.""" + state: StringField + """Second part of the ISO 3166-2 code, consisting of two letters indicating the US State.""" + + def __init__( + self, + raw_prediction: StringDict, + page_id: Optional[int] = None, + ): + """ + Driver License document. + + :param raw_prediction: Raw prediction from HTTP response + :param page_id: Page number for multi pages pdf input + """ + super().__init__(raw_prediction, page_id) + self.category = StringField( + raw_prediction["category"], + page_id=page_id, + ) + self.country_code = StringField( + raw_prediction["country_code"], + page_id=page_id, + ) + self.date_of_birth = DateField( + raw_prediction["date_of_birth"], + page_id=page_id, + ) + self.dd_number = StringField( + raw_prediction["dd_number"], + page_id=page_id, + ) + self.expiry_date = DateField( + raw_prediction["expiry_date"], + page_id=page_id, + ) + self.first_name = StringField( + raw_prediction["first_name"], + page_id=page_id, + ) + self.id = StringField( + raw_prediction["id"], + page_id=page_id, + ) + self.issued_date = DateField( + raw_prediction["issued_date"], + page_id=page_id, + ) + self.issuing_authority = StringField( + raw_prediction["issuing_authority"], + page_id=page_id, + ) + self.last_name = StringField( + raw_prediction["last_name"], + page_id=page_id, + ) + self.mrz = StringField( + raw_prediction["mrz"], + page_id=page_id, + ) + self.place_of_birth = StringField( + raw_prediction["place_of_birth"], + page_id=page_id, + ) + self.state = StringField( + raw_prediction["state"], + page_id=page_id, + ) + + def __str__(self) -> str: + out_str: str = f":Country Code: {self.country_code}\n" + out_str += f":State: {self.state}\n" + out_str += f":ID: {self.id}\n" + out_str += f":Category: {self.category}\n" + out_str += f":Last Name: {self.last_name}\n" + out_str += f":First Name: {self.first_name}\n" + out_str += f":Date of Birth: {self.date_of_birth}\n" + out_str += f":Place of Birth: {self.place_of_birth}\n" + out_str += f":Expiry Date: {self.expiry_date}\n" + out_str += f":Issued Date: {self.issued_date}\n" + out_str += f":Issuing Authority: {self.issuing_authority}\n" + out_str += f":MRZ: {self.mrz}\n" + out_str += f":DD Number: {self.dd_number}\n" + return clean_out_string(out_str) diff --git a/mindee/product/fr/__init__.py b/mindee/product/fr/__init__.py index 45f04df9..020c7657 100644 --- a/mindee/product/fr/__init__.py +++ b/mindee/product/fr/__init__.py @@ -39,6 +39,8 @@ from mindee.product.fr.energy_bill.energy_bill_v1_taxes_and_contribution import ( EnergyBillV1TaxesAndContribution, ) +from mindee.product.fr.health_card.health_card_v1 import HealthCardV1 +from mindee.product.fr.health_card.health_card_v1_document import HealthCardV1Document from mindee.product.fr.id_card.id_card_v1 import IdCardV1 from mindee.product.fr.id_card.id_card_v1_document import IdCardV1Document from mindee.product.fr.id_card.id_card_v1_page import IdCardV1Page @@ -57,6 +59,18 @@ from mindee.product.fr.payslip.payslip_v2_pay_period import PayslipV2PayPeriod from mindee.product.fr.payslip.payslip_v2_pto import PayslipV2Pto from mindee.product.fr.payslip.payslip_v2_salary_detail import PayslipV2SalaryDetail +from mindee.product.fr.payslip.payslip_v3 import PayslipV3 +from mindee.product.fr.payslip.payslip_v3_bank_account_detail import ( + PayslipV3BankAccountDetail, +) +from mindee.product.fr.payslip.payslip_v3_document import PayslipV3Document +from mindee.product.fr.payslip.payslip_v3_employee import PayslipV3Employee +from mindee.product.fr.payslip.payslip_v3_employer import PayslipV3Employer +from mindee.product.fr.payslip.payslip_v3_employment import PayslipV3Employment +from mindee.product.fr.payslip.payslip_v3_paid_time_off import PayslipV3PaidTimeOff +from mindee.product.fr.payslip.payslip_v3_pay_detail import PayslipV3PayDetail +from mindee.product.fr.payslip.payslip_v3_pay_period import PayslipV3PayPeriod +from mindee.product.fr.payslip.payslip_v3_salary_detail import PayslipV3SalaryDetail from mindee.product.fr.petrol_receipt.petrol_receipt_v1 import PetrolReceiptV1 from mindee.product.fr.petrol_receipt.petrol_receipt_v1_document import ( PetrolReceiptV1Document, diff --git a/mindee/product/fr/health_card/__init__.py b/mindee/product/fr/health_card/__init__.py new file mode 100644 index 00000000..e8be9b7b --- /dev/null +++ b/mindee/product/fr/health_card/__init__.py @@ -0,0 +1,2 @@ +from mindee.product.fr.health_card.health_card_v1 import HealthCardV1 +from mindee.product.fr.health_card.health_card_v1_document import HealthCardV1Document diff --git a/mindee/product/fr/health_card/health_card_v1.py b/mindee/product/fr/health_card/health_card_v1.py new file mode 100644 index 00000000..d3772ccf --- /dev/null +++ b/mindee/product/fr/health_card/health_card_v1.py @@ -0,0 +1,39 @@ +from typing import List + +from mindee.parsing.common.inference import Inference +from mindee.parsing.common.page import Page +from mindee.parsing.common.string_dict import StringDict +from mindee.product.fr.health_card.health_card_v1_document import ( + HealthCardV1Document, +) + + +class HealthCardV1(Inference): + """Health Card API version 1 inference prediction.""" + + prediction: HealthCardV1Document + """Document-level prediction.""" + pages: List[Page[HealthCardV1Document]] + """Page-level prediction(s).""" + endpoint_name = "french_healthcard" + """Name of the endpoint.""" + endpoint_version = "1" + """Version of the endpoint.""" + + def __init__(self, raw_prediction: StringDict): + """ + Health Card v1 inference. + + :param raw_prediction: Raw prediction from the HTTP response. + """ + super().__init__(raw_prediction) + + self.prediction = HealthCardV1Document(raw_prediction["prediction"]) + self.pages = [] + for page in raw_prediction["pages"]: + try: + page_prediction = page["prediction"] + except KeyError: + continue + if page_prediction: + self.pages.append(Page(HealthCardV1Document, page)) diff --git a/mindee/product/fr/health_card/health_card_v1_document.py b/mindee/product/fr/health_card/health_card_v1_document.py new file mode 100644 index 00000000..1c9e2eab --- /dev/null +++ b/mindee/product/fr/health_card/health_card_v1_document.py @@ -0,0 +1,59 @@ +from typing import List, Optional + +from mindee.parsing.common.prediction import Prediction +from mindee.parsing.common.string_dict import StringDict +from mindee.parsing.common.summary_helper import clean_out_string +from mindee.parsing.standard.date import DateField +from mindee.parsing.standard.text import StringField + + +class HealthCardV1Document(Prediction): + """Health Card API version 1.0 document data.""" + + given_names: List[StringField] + """The given names of the card holder.""" + issuance_date: DateField + """The date when the carte vitale document was issued.""" + social_security: StringField + """The social security number of the card holder.""" + surname: StringField + """The surname of the card holder.""" + + def __init__( + self, + raw_prediction: StringDict, + page_id: Optional[int] = None, + ): + """ + Health Card document. + + :param raw_prediction: Raw prediction from HTTP response + :param page_id: Page number for multi pages pdf input + """ + super().__init__(raw_prediction, page_id) + self.given_names = [ + StringField(prediction, page_id=page_id) + for prediction in raw_prediction["given_names"] + ] + self.issuance_date = DateField( + raw_prediction["issuance_date"], + page_id=page_id, + ) + self.social_security = StringField( + raw_prediction["social_security"], + page_id=page_id, + ) + self.surname = StringField( + raw_prediction["surname"], + page_id=page_id, + ) + + def __str__(self) -> str: + given_names = f"\n { ' ' * 15 }".join( + [str(item) for item in self.given_names], + ) + out_str: str = f":Given Name(s): {given_names}\n" + out_str += f":Surname: {self.surname}\n" + out_str += f":Social Security Number: {self.social_security}\n" + out_str += f":Issuance Date: {self.issuance_date}\n" + return clean_out_string(out_str) diff --git a/mindee/product/fr/payslip/__init__.py b/mindee/product/fr/payslip/__init__.py index 81cde194..4a193d78 100644 --- a/mindee/product/fr/payslip/__init__.py +++ b/mindee/product/fr/payslip/__init__.py @@ -10,3 +10,15 @@ from mindee.product.fr.payslip.payslip_v2_pay_period import PayslipV2PayPeriod from mindee.product.fr.payslip.payslip_v2_pto import PayslipV2Pto from mindee.product.fr.payslip.payslip_v2_salary_detail import PayslipV2SalaryDetail +from mindee.product.fr.payslip.payslip_v3 import PayslipV3 +from mindee.product.fr.payslip.payslip_v3_bank_account_detail import ( + PayslipV3BankAccountDetail, +) +from mindee.product.fr.payslip.payslip_v3_document import PayslipV3Document +from mindee.product.fr.payslip.payslip_v3_employee import PayslipV3Employee +from mindee.product.fr.payslip.payslip_v3_employer import PayslipV3Employer +from mindee.product.fr.payslip.payslip_v3_employment import PayslipV3Employment +from mindee.product.fr.payslip.payslip_v3_paid_time_off import PayslipV3PaidTimeOff +from mindee.product.fr.payslip.payslip_v3_pay_detail import PayslipV3PayDetail +from mindee.product.fr.payslip.payslip_v3_pay_period import PayslipV3PayPeriod +from mindee.product.fr.payslip.payslip_v3_salary_detail import PayslipV3SalaryDetail diff --git a/mindee/product/fr/payslip/payslip_v3.py b/mindee/product/fr/payslip/payslip_v3.py new file mode 100644 index 00000000..4b2eeec0 --- /dev/null +++ b/mindee/product/fr/payslip/payslip_v3.py @@ -0,0 +1,39 @@ +from typing import List + +from mindee.parsing.common.inference import Inference +from mindee.parsing.common.page import Page +from mindee.parsing.common.string_dict import StringDict +from mindee.product.fr.payslip.payslip_v3_document import ( + PayslipV3Document, +) + + +class PayslipV3(Inference): + """Payslip API version 3 inference prediction.""" + + prediction: PayslipV3Document + """Document-level prediction.""" + pages: List[Page[PayslipV3Document]] + """Page-level prediction(s).""" + endpoint_name = "payslip_fra" + """Name of the endpoint.""" + endpoint_version = "3" + """Version of the endpoint.""" + + def __init__(self, raw_prediction: StringDict): + """ + Payslip v3 inference. + + :param raw_prediction: Raw prediction from the HTTP response. + """ + super().__init__(raw_prediction) + + self.prediction = PayslipV3Document(raw_prediction["prediction"]) + self.pages = [] + for page in raw_prediction["pages"]: + try: + page_prediction = page["prediction"] + except KeyError: + continue + if page_prediction: + self.pages.append(Page(PayslipV3Document, page)) diff --git a/mindee/product/fr/payslip/payslip_v3_bank_account_detail.py b/mindee/product/fr/payslip/payslip_v3_bank_account_detail.py new file mode 100644 index 00000000..7e7fc39b --- /dev/null +++ b/mindee/product/fr/payslip/payslip_v3_bank_account_detail.py @@ -0,0 +1,62 @@ +from typing import Dict, Optional + +from mindee.parsing.common.string_dict import StringDict +from mindee.parsing.common.summary_helper import clean_out_string, format_for_display +from mindee.parsing.standard.base import FieldConfidenceMixin, FieldPositionMixin + + +class PayslipV3BankAccountDetail(FieldPositionMixin, FieldConfidenceMixin): + """Information about the employee's bank account.""" + + bank_name: Optional[str] + """The name of the bank.""" + iban: Optional[str] + """The IBAN of the bank account.""" + swift: Optional[str] + """The SWIFT code of the bank.""" + page_n: int + """The document page on which the information was found.""" + + def __init__( + self, + raw_prediction: StringDict, + page_id: Optional[int] = None, + ): + self._set_confidence(raw_prediction) + self._set_position(raw_prediction) + + if page_id is None: + try: + self.page_n = raw_prediction["page_id"] + except KeyError: + pass + else: + self.page_n = page_id + + self.bank_name = raw_prediction["bank_name"] + self.iban = raw_prediction["iban"] + self.swift = raw_prediction["swift"] + + def _printable_values(self) -> Dict[str, str]: + """Return values for printing.""" + out_dict: Dict[str, str] = {} + out_dict["bank_name"] = format_for_display(self.bank_name) + out_dict["iban"] = format_for_display(self.iban) + out_dict["swift"] = format_for_display(self.swift) + return out_dict + + def to_field_list(self) -> str: + """Output the object in a format suitable for inclusion in an rST field list.""" + printable = self._printable_values() + out_str: str = f" :Bank Name: {printable['bank_name']}\n" + out_str += f" :IBAN: {printable['iban']}\n" + out_str += f" :SWIFT: {printable['swift']}\n" + return out_str.rstrip() + + def __str__(self) -> str: + """Default string representation.""" + printable = self._printable_values() + out_str: str = f"Bank Name: {printable['bank_name']}, \n" + out_str += f"IBAN: {printable['iban']}, \n" + out_str += f"SWIFT: {printable['swift']}, \n" + return clean_out_string(out_str) diff --git a/mindee/product/fr/payslip/payslip_v3_document.py b/mindee/product/fr/payslip/payslip_v3_document.py new file mode 100644 index 00000000..fad4b82b --- /dev/null +++ b/mindee/product/fr/payslip/payslip_v3_document.py @@ -0,0 +1,152 @@ +from typing import List, Optional + +from mindee.parsing.common.prediction import Prediction +from mindee.parsing.common.string_dict import StringDict +from mindee.parsing.common.summary_helper import clean_out_string +from mindee.product.fr.payslip.payslip_v3_bank_account_detail import ( + PayslipV3BankAccountDetail, +) +from mindee.product.fr.payslip.payslip_v3_employee import PayslipV3Employee +from mindee.product.fr.payslip.payslip_v3_employer import PayslipV3Employer +from mindee.product.fr.payslip.payslip_v3_employment import PayslipV3Employment +from mindee.product.fr.payslip.payslip_v3_paid_time_off import PayslipV3PaidTimeOff +from mindee.product.fr.payslip.payslip_v3_pay_detail import PayslipV3PayDetail +from mindee.product.fr.payslip.payslip_v3_pay_period import PayslipV3PayPeriod +from mindee.product.fr.payslip.payslip_v3_salary_detail import PayslipV3SalaryDetail + + +class PayslipV3Document(Prediction): + """Payslip API version 3.0 document data.""" + + bank_account_details: PayslipV3BankAccountDetail + """Information about the employee's bank account.""" + employee: PayslipV3Employee + """Information about the employee.""" + employer: PayslipV3Employer + """Information about the employer.""" + employment: PayslipV3Employment + """Information about the employment.""" + paid_time_off: List[PayslipV3PaidTimeOff] + """Information about paid time off.""" + pay_detail: PayslipV3PayDetail + """Detailed information about the pay.""" + pay_period: PayslipV3PayPeriod + """Information about the pay period.""" + salary_details: List[PayslipV3SalaryDetail] + """Detailed information about the earnings.""" + + def __init__( + self, + raw_prediction: StringDict, + page_id: Optional[int] = None, + ): + """ + Payslip document. + + :param raw_prediction: Raw prediction from HTTP response + :param page_id: Page number for multi pages pdf input + """ + super().__init__(raw_prediction, page_id) + self.bank_account_details = PayslipV3BankAccountDetail( + raw_prediction["bank_account_details"], + page_id=page_id, + ) + self.employee = PayslipV3Employee( + raw_prediction["employee"], + page_id=page_id, + ) + self.employer = PayslipV3Employer( + raw_prediction["employer"], + page_id=page_id, + ) + self.employment = PayslipV3Employment( + raw_prediction["employment"], + page_id=page_id, + ) + self.paid_time_off = [ + PayslipV3PaidTimeOff(prediction, page_id=page_id) + for prediction in raw_prediction["paid_time_off"] + ] + self.pay_detail = PayslipV3PayDetail( + raw_prediction["pay_detail"], + page_id=page_id, + ) + self.pay_period = PayslipV3PayPeriod( + raw_prediction["pay_period"], + page_id=page_id, + ) + self.salary_details = [ + PayslipV3SalaryDetail(prediction, page_id=page_id) + for prediction in raw_prediction["salary_details"] + ] + + @staticmethod + def _salary_details_separator(char: str) -> str: + out_str = " " + out_str += f"+{char * 14}" + out_str += f"+{char * 11}" + out_str += f"+{char * 38}" + out_str += f"+{char * 8}" + out_str += f"+{char * 11}" + return out_str + "+" + + def _salary_details_to_str(self) -> str: + if not self.salary_details: + return "" + + lines = f"\n{self._salary_details_separator('-')}\n ".join( + [item.to_table_line() for item in self.salary_details] + ) + out_str = "" + out_str += f"\n{self._salary_details_separator('-')}\n " + out_str += " | Amount " + out_str += " | Base " + out_str += " | Description " + out_str += " | Number" + out_str += " | Rate " + out_str += f" |\n{self._salary_details_separator('=')}" + out_str += f"\n {lines}" + out_str += f"\n{self._salary_details_separator('-')}" + return out_str + + @staticmethod + def _paid_time_off_separator(char: str) -> str: + out_str = " " + out_str += f"+{char * 11}" + out_str += f"+{char * 8}" + out_str += f"+{char * 13}" + out_str += f"+{char * 11}" + out_str += f"+{char * 11}" + return out_str + "+" + + def _paid_time_off_to_str(self) -> str: + if not self.paid_time_off: + return "" + + lines = f"\n{self._paid_time_off_separator('-')}\n ".join( + [item.to_table_line() for item in self.paid_time_off] + ) + out_str = "" + out_str += f"\n{self._paid_time_off_separator('-')}\n " + out_str += " | Accrued " + out_str += " | Period" + out_str += " | Type " + out_str += " | Remaining" + out_str += " | Used " + out_str += f" |\n{self._paid_time_off_separator('=')}" + out_str += f"\n {lines}" + out_str += f"\n{self._paid_time_off_separator('-')}" + return out_str + + def __str__(self) -> str: + out_str: str = f":Pay Period:\n{self.pay_period.to_field_list()}\n" + out_str += f":Employee:\n{self.employee.to_field_list()}\n" + out_str += f":Employer:\n{self.employer.to_field_list()}\n" + out_str += ( + f":Bank Account Details:\n{self.bank_account_details.to_field_list()}\n" + ) + out_str += f":Employment:\n{self.employment.to_field_list()}\n" + out_str += f":Salary Details: {self._salary_details_to_str()}\n" + out_str += f":Pay Detail:\n{self.pay_detail.to_field_list()}\n" + out_str += f":Paid Time Off: {self._paid_time_off_to_str()}\n" + return clean_out_string(out_str) diff --git a/mindee/product/fr/payslip/payslip_v3_employee.py b/mindee/product/fr/payslip/payslip_v3_employee.py new file mode 100644 index 00000000..d7f42251 --- /dev/null +++ b/mindee/product/fr/payslip/payslip_v3_employee.py @@ -0,0 +1,88 @@ +from typing import Dict, Optional + +from mindee.parsing.common.string_dict import StringDict +from mindee.parsing.common.summary_helper import clean_out_string, format_for_display +from mindee.parsing.standard.base import FieldConfidenceMixin, FieldPositionMixin + + +class PayslipV3Employee(FieldPositionMixin, FieldConfidenceMixin): + """Information about the employee.""" + + address: Optional[str] + """The address of the employee.""" + date_of_birth: Optional[str] + """The date of birth of the employee.""" + first_name: Optional[str] + """The first name of the employee.""" + last_name: Optional[str] + """The last name of the employee.""" + phone_number: Optional[str] + """The phone number of the employee.""" + registration_number: Optional[str] + """The registration number of the employee.""" + social_security_number: Optional[str] + """The social security number of the employee.""" + page_n: int + """The document page on which the information was found.""" + + def __init__( + self, + raw_prediction: StringDict, + page_id: Optional[int] = None, + ): + self._set_confidence(raw_prediction) + self._set_position(raw_prediction) + + if page_id is None: + try: + self.page_n = raw_prediction["page_id"] + except KeyError: + pass + else: + self.page_n = page_id + + self.address = raw_prediction["address"] + self.date_of_birth = raw_prediction["date_of_birth"] + self.first_name = raw_prediction["first_name"] + self.last_name = raw_prediction["last_name"] + self.phone_number = raw_prediction["phone_number"] + self.registration_number = raw_prediction["registration_number"] + self.social_security_number = raw_prediction["social_security_number"] + + def _printable_values(self) -> Dict[str, str]: + """Return values for printing.""" + out_dict: Dict[str, str] = {} + out_dict["address"] = format_for_display(self.address) + out_dict["date_of_birth"] = format_for_display(self.date_of_birth) + out_dict["first_name"] = format_for_display(self.first_name) + out_dict["last_name"] = format_for_display(self.last_name) + out_dict["phone_number"] = format_for_display(self.phone_number) + out_dict["registration_number"] = format_for_display(self.registration_number) + out_dict["social_security_number"] = format_for_display( + self.social_security_number + ) + return out_dict + + def to_field_list(self) -> str: + """Output the object in a format suitable for inclusion in an rST field list.""" + printable = self._printable_values() + out_str: str = f" :Address: {printable['address']}\n" + out_str += f" :Date of Birth: {printable['date_of_birth']}\n" + out_str += f" :First Name: {printable['first_name']}\n" + out_str += f" :Last Name: {printable['last_name']}\n" + out_str += f" :Phone Number: {printable['phone_number']}\n" + out_str += f" :Registration Number: {printable['registration_number']}\n" + out_str += f" :Social Security Number: {printable['social_security_number']}\n" + return out_str.rstrip() + + def __str__(self) -> str: + """Default string representation.""" + printable = self._printable_values() + out_str: str = f"Address: {printable['address']}, \n" + out_str += f"Date of Birth: {printable['date_of_birth']}, \n" + out_str += f"First Name: {printable['first_name']}, \n" + out_str += f"Last Name: {printable['last_name']}, \n" + out_str += f"Phone Number: {printable['phone_number']}, \n" + out_str += f"Registration Number: {printable['registration_number']}, \n" + out_str += f"Social Security Number: {printable['social_security_number']}, \n" + return clean_out_string(out_str) diff --git a/mindee/product/fr/payslip/payslip_v3_employer.py b/mindee/product/fr/payslip/payslip_v3_employer.py new file mode 100644 index 00000000..f53d1c61 --- /dev/null +++ b/mindee/product/fr/payslip/payslip_v3_employer.py @@ -0,0 +1,86 @@ +from typing import Dict, Optional + +from mindee.parsing.common.string_dict import StringDict +from mindee.parsing.common.summary_helper import clean_out_string, format_for_display +from mindee.parsing.standard.base import FieldConfidenceMixin, FieldPositionMixin + + +class PayslipV3Employer(FieldPositionMixin, FieldConfidenceMixin): + """Information about the employer.""" + + address: Optional[str] + """The address of the employer.""" + company_id: Optional[str] + """The company ID of the employer.""" + company_site: Optional[str] + """The site of the company.""" + naf_code: Optional[str] + """The NAF code of the employer.""" + name: Optional[str] + """The name of the employer.""" + phone_number: Optional[str] + """The phone number of the employer.""" + urssaf_number: Optional[str] + """The URSSAF number of the employer.""" + page_n: int + """The document page on which the information was found.""" + + def __init__( + self, + raw_prediction: StringDict, + page_id: Optional[int] = None, + ): + self._set_confidence(raw_prediction) + self._set_position(raw_prediction) + + if page_id is None: + try: + self.page_n = raw_prediction["page_id"] + except KeyError: + pass + else: + self.page_n = page_id + + self.address = raw_prediction["address"] + self.company_id = raw_prediction["company_id"] + self.company_site = raw_prediction["company_site"] + self.naf_code = raw_prediction["naf_code"] + self.name = raw_prediction["name"] + self.phone_number = raw_prediction["phone_number"] + self.urssaf_number = raw_prediction["urssaf_number"] + + def _printable_values(self) -> Dict[str, str]: + """Return values for printing.""" + out_dict: Dict[str, str] = {} + out_dict["address"] = format_for_display(self.address) + out_dict["company_id"] = format_for_display(self.company_id) + out_dict["company_site"] = format_for_display(self.company_site) + out_dict["naf_code"] = format_for_display(self.naf_code) + out_dict["name"] = format_for_display(self.name) + out_dict["phone_number"] = format_for_display(self.phone_number) + out_dict["urssaf_number"] = format_for_display(self.urssaf_number) + return out_dict + + def to_field_list(self) -> str: + """Output the object in a format suitable for inclusion in an rST field list.""" + printable = self._printable_values() + out_str: str = f" :Address: {printable['address']}\n" + out_str += f" :Company ID: {printable['company_id']}\n" + out_str += f" :Company Site: {printable['company_site']}\n" + out_str += f" :NAF Code: {printable['naf_code']}\n" + out_str += f" :Name: {printable['name']}\n" + out_str += f" :Phone Number: {printable['phone_number']}\n" + out_str += f" :URSSAF Number: {printable['urssaf_number']}\n" + return out_str.rstrip() + + def __str__(self) -> str: + """Default string representation.""" + printable = self._printable_values() + out_str: str = f"Address: {printable['address']}, \n" + out_str += f"Company ID: {printable['company_id']}, \n" + out_str += f"Company Site: {printable['company_site']}, \n" + out_str += f"NAF Code: {printable['naf_code']}, \n" + out_str += f"Name: {printable['name']}, \n" + out_str += f"Phone Number: {printable['phone_number']}, \n" + out_str += f"URSSAF Number: {printable['urssaf_number']}, \n" + return clean_out_string(out_str) diff --git a/mindee/product/fr/payslip/payslip_v3_employment.py b/mindee/product/fr/payslip/payslip_v3_employment.py new file mode 100644 index 00000000..92c67e32 --- /dev/null +++ b/mindee/product/fr/payslip/payslip_v3_employment.py @@ -0,0 +1,86 @@ +from typing import Dict, Optional + +from mindee.parsing.common.string_dict import StringDict +from mindee.parsing.common.summary_helper import clean_out_string, format_for_display +from mindee.parsing.standard.base import FieldConfidenceMixin, FieldPositionMixin + + +class PayslipV3Employment(FieldPositionMixin, FieldConfidenceMixin): + """Information about the employment.""" + + category: Optional[str] + """The category of the employment.""" + coefficient: Optional[str] + """The coefficient of the employment.""" + collective_agreement: Optional[str] + """The collective agreement of the employment.""" + job_title: Optional[str] + """The job title of the employee.""" + position_level: Optional[str] + """The position level of the employment.""" + seniority_date: Optional[str] + """The seniority date of the employment.""" + start_date: Optional[str] + """The start date of the employment.""" + page_n: int + """The document page on which the information was found.""" + + def __init__( + self, + raw_prediction: StringDict, + page_id: Optional[int] = None, + ): + self._set_confidence(raw_prediction) + self._set_position(raw_prediction) + + if page_id is None: + try: + self.page_n = raw_prediction["page_id"] + except KeyError: + pass + else: + self.page_n = page_id + + self.category = raw_prediction["category"] + self.coefficient = raw_prediction["coefficient"] + self.collective_agreement = raw_prediction["collective_agreement"] + self.job_title = raw_prediction["job_title"] + self.position_level = raw_prediction["position_level"] + self.seniority_date = raw_prediction["seniority_date"] + self.start_date = raw_prediction["start_date"] + + def _printable_values(self) -> Dict[str, str]: + """Return values for printing.""" + out_dict: Dict[str, str] = {} + out_dict["category"] = format_for_display(self.category) + out_dict["coefficient"] = format_for_display(self.coefficient) + out_dict["collective_agreement"] = format_for_display(self.collective_agreement) + out_dict["job_title"] = format_for_display(self.job_title) + out_dict["position_level"] = format_for_display(self.position_level) + out_dict["seniority_date"] = format_for_display(self.seniority_date) + out_dict["start_date"] = format_for_display(self.start_date) + return out_dict + + def to_field_list(self) -> str: + """Output the object in a format suitable for inclusion in an rST field list.""" + printable = self._printable_values() + out_str: str = f" :Category: {printable['category']}\n" + out_str += f" :Coefficient: {printable['coefficient']}\n" + out_str += f" :Collective Agreement: {printable['collective_agreement']}\n" + out_str += f" :Job Title: {printable['job_title']}\n" + out_str += f" :Position Level: {printable['position_level']}\n" + out_str += f" :Seniority Date: {printable['seniority_date']}\n" + out_str += f" :Start Date: {printable['start_date']}\n" + return out_str.rstrip() + + def __str__(self) -> str: + """Default string representation.""" + printable = self._printable_values() + out_str: str = f"Category: {printable['category']}, \n" + out_str += f"Coefficient: {printable['coefficient']}, \n" + out_str += f"Collective Agreement: {printable['collective_agreement']}, \n" + out_str += f"Job Title: {printable['job_title']}, \n" + out_str += f"Position Level: {printable['position_level']}, \n" + out_str += f"Seniority Date: {printable['seniority_date']}, \n" + out_str += f"Start Date: {printable['start_date']}, \n" + return clean_out_string(out_str) diff --git a/mindee/product/fr/payslip/payslip_v3_paid_time_off.py b/mindee/product/fr/payslip/payslip_v3_paid_time_off.py new file mode 100644 index 00000000..fe4e6f7f --- /dev/null +++ b/mindee/product/fr/payslip/payslip_v3_paid_time_off.py @@ -0,0 +1,89 @@ +from typing import Dict, Optional + +from mindee.parsing.common.string_dict import StringDict +from mindee.parsing.common.summary_helper import clean_out_string, format_for_display +from mindee.parsing.standard.base import ( + FieldConfidenceMixin, + FieldPositionMixin, + float_to_string, + to_opt_float, +) + + +class PayslipV3PaidTimeOff(FieldPositionMixin, FieldConfidenceMixin): + """Information about paid time off.""" + + accrued: Optional[float] + """The amount of paid time off accrued in the period.""" + period: Optional[str] + """The paid time off period.""" + pto_type: Optional[str] + """The type of paid time off.""" + remaining: Optional[float] + """The remaining amount of paid time off at the end of the period.""" + used: Optional[float] + """The amount of paid time off used in the period.""" + page_n: int + """The document page on which the information was found.""" + + def __init__( + self, + raw_prediction: StringDict, + page_id: Optional[int] = None, + ): + self._set_confidence(raw_prediction) + self._set_position(raw_prediction) + + if page_id is None: + try: + self.page_n = raw_prediction["page_id"] + except KeyError: + pass + else: + self.page_n = page_id + + self.accrued = to_opt_float(raw_prediction, "accrued") + self.period = raw_prediction["period"] + self.pto_type = raw_prediction["pto_type"] + self.remaining = to_opt_float(raw_prediction, "remaining") + self.used = to_opt_float(raw_prediction, "used") + + def _printable_values(self) -> Dict[str, str]: + """Return values for printing.""" + out_dict: Dict[str, str] = {} + out_dict["accrued"] = float_to_string(self.accrued) + out_dict["period"] = format_for_display(self.period) + out_dict["pto_type"] = format_for_display(self.pto_type) + out_dict["remaining"] = float_to_string(self.remaining) + out_dict["used"] = float_to_string(self.used) + return out_dict + + def _table_printable_values(self) -> Dict[str, str]: + """Return values for printing inside an RST table.""" + out_dict: Dict[str, str] = {} + out_dict["accrued"] = float_to_string(self.accrued) + out_dict["period"] = format_for_display(self.period, 6) + out_dict["pto_type"] = format_for_display(self.pto_type, 11) + out_dict["remaining"] = float_to_string(self.remaining) + out_dict["used"] = float_to_string(self.used) + return out_dict + + def to_table_line(self) -> str: + """Output in a format suitable for inclusion in an rST table.""" + printable = self._table_printable_values() + out_str: str = f"| {printable['accrued']:<9} | " + out_str += f"{printable['period']:<6} | " + out_str += f"{printable['pto_type']:<11} | " + out_str += f"{printable['remaining']:<9} | " + out_str += f"{printable['used']:<9} | " + return clean_out_string(out_str) + + def __str__(self) -> str: + """Default string representation.""" + printable = self._printable_values() + out_str: str = f"Accrued: {printable['accrued']}, \n" + out_str += f"Period: {printable['period']}, \n" + out_str += f"Type: {printable['pto_type']}, \n" + out_str += f"Remaining: {printable['remaining']}, \n" + out_str += f"Used: {printable['used']}, \n" + return clean_out_string(out_str) diff --git a/mindee/product/fr/payslip/payslip_v3_pay_detail.py b/mindee/product/fr/payslip/payslip_v3_pay_detail.py new file mode 100644 index 00000000..a9bd183f --- /dev/null +++ b/mindee/product/fr/payslip/payslip_v3_pay_detail.py @@ -0,0 +1,115 @@ +from typing import Dict, Optional + +from mindee.parsing.common.string_dict import StringDict +from mindee.parsing.common.summary_helper import clean_out_string +from mindee.parsing.standard.base import ( + FieldConfidenceMixin, + FieldPositionMixin, + float_to_string, + to_opt_float, +) + + +class PayslipV3PayDetail(FieldPositionMixin, FieldConfidenceMixin): + """Detailed information about the pay.""" + + gross_salary: Optional[float] + """The gross salary of the employee.""" + gross_salary_ytd: Optional[float] + """The year-to-date gross salary of the employee.""" + income_tax_rate: Optional[float] + """The income tax rate of the employee.""" + income_tax_withheld: Optional[float] + """The income tax withheld from the employee's pay.""" + net_paid: Optional[float] + """The net paid amount of the employee.""" + net_paid_before_tax: Optional[float] + """The net paid amount before tax of the employee.""" + net_taxable: Optional[float] + """The net taxable amount of the employee.""" + net_taxable_ytd: Optional[float] + """The year-to-date net taxable amount of the employee.""" + total_cost_employer: Optional[float] + """The total cost to the employer.""" + total_taxes_and_deductions: Optional[float] + """The total taxes and deductions of the employee.""" + page_n: int + """The document page on which the information was found.""" + + def __init__( + self, + raw_prediction: StringDict, + page_id: Optional[int] = None, + ): + self._set_confidence(raw_prediction) + self._set_position(raw_prediction) + + if page_id is None: + try: + self.page_n = raw_prediction["page_id"] + except KeyError: + pass + else: + self.page_n = page_id + + self.gross_salary = to_opt_float(raw_prediction, "gross_salary") + self.gross_salary_ytd = to_opt_float(raw_prediction, "gross_salary_ytd") + self.income_tax_rate = to_opt_float(raw_prediction, "income_tax_rate") + self.income_tax_withheld = to_opt_float(raw_prediction, "income_tax_withheld") + self.net_paid = to_opt_float(raw_prediction, "net_paid") + self.net_paid_before_tax = to_opt_float(raw_prediction, "net_paid_before_tax") + self.net_taxable = to_opt_float(raw_prediction, "net_taxable") + self.net_taxable_ytd = to_opt_float(raw_prediction, "net_taxable_ytd") + self.total_cost_employer = to_opt_float(raw_prediction, "total_cost_employer") + self.total_taxes_and_deductions = to_opt_float( + raw_prediction, "total_taxes_and_deductions" + ) + + def _printable_values(self) -> Dict[str, str]: + """Return values for printing.""" + out_dict: Dict[str, str] = {} + out_dict["gross_salary"] = float_to_string(self.gross_salary) + out_dict["gross_salary_ytd"] = float_to_string(self.gross_salary_ytd) + out_dict["income_tax_rate"] = float_to_string(self.income_tax_rate) + out_dict["income_tax_withheld"] = float_to_string(self.income_tax_withheld) + out_dict["net_paid"] = float_to_string(self.net_paid) + out_dict["net_paid_before_tax"] = float_to_string(self.net_paid_before_tax) + out_dict["net_taxable"] = float_to_string(self.net_taxable) + out_dict["net_taxable_ytd"] = float_to_string(self.net_taxable_ytd) + out_dict["total_cost_employer"] = float_to_string(self.total_cost_employer) + out_dict["total_taxes_and_deductions"] = float_to_string( + self.total_taxes_and_deductions + ) + return out_dict + + def to_field_list(self) -> str: + """Output the object in a format suitable for inclusion in an rST field list.""" + printable = self._printable_values() + out_str: str = f" :Gross Salary: {printable['gross_salary']}\n" + out_str += f" :Gross Salary YTD: {printable['gross_salary_ytd']}\n" + out_str += f" :Income Tax Rate: {printable['income_tax_rate']}\n" + out_str += f" :Income Tax Withheld: {printable['income_tax_withheld']}\n" + out_str += f" :Net Paid: {printable['net_paid']}\n" + out_str += f" :Net Paid Before Tax: {printable['net_paid_before_tax']}\n" + out_str += f" :Net Taxable: {printable['net_taxable']}\n" + out_str += f" :Net Taxable YTD: {printable['net_taxable_ytd']}\n" + out_str += f" :Total Cost Employer: {printable['total_cost_employer']}\n" + out_str += f" :Total Taxes and Deductions: {printable['total_taxes_and_deductions']}\n" + return out_str.rstrip() + + def __str__(self) -> str: + """Default string representation.""" + printable = self._printable_values() + out_str: str = f"Gross Salary: {printable['gross_salary']}, \n" + out_str += f"Gross Salary YTD: {printable['gross_salary_ytd']}, \n" + out_str += f"Income Tax Rate: {printable['income_tax_rate']}, \n" + out_str += f"Income Tax Withheld: {printable['income_tax_withheld']}, \n" + out_str += f"Net Paid: {printable['net_paid']}, \n" + out_str += f"Net Paid Before Tax: {printable['net_paid_before_tax']}, \n" + out_str += f"Net Taxable: {printable['net_taxable']}, \n" + out_str += f"Net Taxable YTD: {printable['net_taxable_ytd']}, \n" + out_str += f"Total Cost Employer: {printable['total_cost_employer']}, \n" + out_str += ( + f"Total Taxes and Deductions: {printable['total_taxes_and_deductions']}, \n" + ) + return clean_out_string(out_str) diff --git a/mindee/product/fr/payslip/payslip_v3_pay_period.py b/mindee/product/fr/payslip/payslip_v3_pay_period.py new file mode 100644 index 00000000..c7ed38f9 --- /dev/null +++ b/mindee/product/fr/payslip/payslip_v3_pay_period.py @@ -0,0 +1,74 @@ +from typing import Dict, Optional + +from mindee.parsing.common.string_dict import StringDict +from mindee.parsing.common.summary_helper import clean_out_string, format_for_display +from mindee.parsing.standard.base import FieldConfidenceMixin, FieldPositionMixin + + +class PayslipV3PayPeriod(FieldPositionMixin, FieldConfidenceMixin): + """Information about the pay period.""" + + end_date: Optional[str] + """The end date of the pay period.""" + month: Optional[str] + """The month of the pay period.""" + payment_date: Optional[str] + """The date of payment for the pay period.""" + start_date: Optional[str] + """The start date of the pay period.""" + year: Optional[str] + """The year of the pay period.""" + page_n: int + """The document page on which the information was found.""" + + def __init__( + self, + raw_prediction: StringDict, + page_id: Optional[int] = None, + ): + self._set_confidence(raw_prediction) + self._set_position(raw_prediction) + + if page_id is None: + try: + self.page_n = raw_prediction["page_id"] + except KeyError: + pass + else: + self.page_n = page_id + + self.end_date = raw_prediction["end_date"] + self.month = raw_prediction["month"] + self.payment_date = raw_prediction["payment_date"] + self.start_date = raw_prediction["start_date"] + self.year = raw_prediction["year"] + + def _printable_values(self) -> Dict[str, str]: + """Return values for printing.""" + out_dict: Dict[str, str] = {} + out_dict["end_date"] = format_for_display(self.end_date) + out_dict["month"] = format_for_display(self.month) + out_dict["payment_date"] = format_for_display(self.payment_date) + out_dict["start_date"] = format_for_display(self.start_date) + out_dict["year"] = format_for_display(self.year) + return out_dict + + def to_field_list(self) -> str: + """Output the object in a format suitable for inclusion in an rST field list.""" + printable = self._printable_values() + out_str: str = f" :End Date: {printable['end_date']}\n" + out_str += f" :Month: {printable['month']}\n" + out_str += f" :Payment Date: {printable['payment_date']}\n" + out_str += f" :Start Date: {printable['start_date']}\n" + out_str += f" :Year: {printable['year']}\n" + return out_str.rstrip() + + def __str__(self) -> str: + """Default string representation.""" + printable = self._printable_values() + out_str: str = f"End Date: {printable['end_date']}, \n" + out_str += f"Month: {printable['month']}, \n" + out_str += f"Payment Date: {printable['payment_date']}, \n" + out_str += f"Start Date: {printable['start_date']}, \n" + out_str += f"Year: {printable['year']}, \n" + return clean_out_string(out_str) diff --git a/mindee/product/fr/payslip/payslip_v3_salary_detail.py b/mindee/product/fr/payslip/payslip_v3_salary_detail.py new file mode 100644 index 00000000..324ab105 --- /dev/null +++ b/mindee/product/fr/payslip/payslip_v3_salary_detail.py @@ -0,0 +1,89 @@ +from typing import Dict, Optional + +from mindee.parsing.common.string_dict import StringDict +from mindee.parsing.common.summary_helper import clean_out_string, format_for_display +from mindee.parsing.standard.base import ( + FieldConfidenceMixin, + FieldPositionMixin, + float_to_string, + to_opt_float, +) + + +class PayslipV3SalaryDetail(FieldPositionMixin, FieldConfidenceMixin): + """Detailed information about the earnings.""" + + amount: Optional[float] + """The amount of the earning.""" + base: Optional[float] + """The base rate value of the earning.""" + description: Optional[str] + """The description of the earnings.""" + number: Optional[float] + """The number of units in the earning.""" + rate: Optional[float] + """The rate of the earning.""" + page_n: int + """The document page on which the information was found.""" + + def __init__( + self, + raw_prediction: StringDict, + page_id: Optional[int] = None, + ): + self._set_confidence(raw_prediction) + self._set_position(raw_prediction) + + if page_id is None: + try: + self.page_n = raw_prediction["page_id"] + except KeyError: + pass + else: + self.page_n = page_id + + self.amount = to_opt_float(raw_prediction, "amount") + self.base = to_opt_float(raw_prediction, "base") + self.description = raw_prediction["description"] + self.number = to_opt_float(raw_prediction, "number") + self.rate = to_opt_float(raw_prediction, "rate") + + def _printable_values(self) -> Dict[str, str]: + """Return values for printing.""" + out_dict: Dict[str, str] = {} + out_dict["amount"] = float_to_string(self.amount) + out_dict["base"] = float_to_string(self.base) + out_dict["description"] = format_for_display(self.description) + out_dict["number"] = float_to_string(self.number) + out_dict["rate"] = float_to_string(self.rate) + return out_dict + + def _table_printable_values(self) -> Dict[str, str]: + """Return values for printing inside an RST table.""" + out_dict: Dict[str, str] = {} + out_dict["amount"] = float_to_string(self.amount) + out_dict["base"] = float_to_string(self.base) + out_dict["description"] = format_for_display(self.description, 36) + out_dict["number"] = float_to_string(self.number) + out_dict["rate"] = float_to_string(self.rate) + return out_dict + + def to_table_line(self) -> str: + """Output in a format suitable for inclusion in an rST table.""" + printable = self._table_printable_values() + out_str: str = f"| {printable['amount']:<12} | " + out_str += f"{printable['base']:<9} | " + out_str += f"{printable['description']:<36} | " + out_str += f"{printable['number']:<6} | " + out_str += f"{printable['rate']:<9} | " + return clean_out_string(out_str) + + def __str__(self) -> str: + """Default string representation.""" + printable = self._printable_values() + out_str: str = f"Amount: {printable['amount']}, \n" + out_str += f"Base: {printable['base']}, \n" + out_str += f"Description: {printable['description']}, \n" + out_str += f"Number: {printable['number']}, \n" + out_str += f"Rate: {printable['rate']}, \n" + return clean_out_string(out_str) diff --git a/mindee/product/ind/indian_passport/indian_passport_v1_document.py b/mindee/product/ind/indian_passport/indian_passport_v1_document.py index 7758ac20..9eaeedbd 100644 --- a/mindee/product/ind/indian_passport/indian_passport_v1_document.py +++ b/mindee/product/ind/indian_passport/indian_passport_v1_document.py @@ -9,7 +9,7 @@ class IndianPassportV1Document(Prediction): - """Passport - India API version 1.0 document data.""" + """Passport - India API version 1.1 document data.""" address1: StringField """The first line of the address of the passport holder.""" diff --git a/mindee/product/resume/resume_v1_document.py b/mindee/product/resume/resume_v1_document.py index 902868e2..ff3fa7dc 100644 --- a/mindee/product/resume/resume_v1_document.py +++ b/mindee/product/resume/resume_v1_document.py @@ -17,7 +17,7 @@ class ResumeV1Document(Prediction): - """Resume API version 1.1 document data.""" + """Resume API version 1.2 document data.""" address: StringField """The location information of the candidate, including city, state, and country.""" diff --git a/tests/data b/tests/data index 96f73126..984162b6 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit 96f73126330ffffe6911d03b8c7fc13b8d301dfe +Subproject commit 984162b6c231125583ab42899ac1e3d7b46825f3 diff --git a/tests/product/driver_license/__init__.py b/tests/product/driver_license/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/product/driver_license/test_driver_license_v1.py b/tests/product/driver_license/test_driver_license_v1.py new file mode 100644 index 00000000..94747360 --- /dev/null +++ b/tests/product/driver_license/test_driver_license_v1.py @@ -0,0 +1,58 @@ +import json + +import pytest + +from mindee.parsing.common.document import Document +from mindee.parsing.common.page import Page +from mindee.product.driver_license.driver_license_v1 import DriverLicenseV1 +from mindee.product.driver_license.driver_license_v1_document import ( + DriverLicenseV1Document, +) +from tests.product import PRODUCT_DATA_DIR + +RESPONSE_DIR = PRODUCT_DATA_DIR / "driver_license" / "response_v1" + +DriverLicenseV1DocumentType = Document[ + DriverLicenseV1Document, + Page[DriverLicenseV1Document], +] + + +@pytest.fixture +def complete_doc() -> DriverLicenseV1DocumentType: + file_path = RESPONSE_DIR / "complete.json" + with open(file_path, "r", encoding="utf-8") as open_file: + json_data = json.load(open_file) + return Document(DriverLicenseV1, json_data["document"]) + + +@pytest.fixture +def empty_doc() -> DriverLicenseV1DocumentType: + file_path = RESPONSE_DIR / "empty.json" + with open(file_path, "r", encoding="utf-8") as open_file: + json_data = json.load(open_file) + return Document(DriverLicenseV1, json_data["document"]) + + +def test_complete_doc(complete_doc: DriverLicenseV1DocumentType): + file_path = RESPONSE_DIR / "summary_full.rst" + with open(file_path, "r", encoding="utf-8") as open_file: + reference_str = open_file.read() + assert str(complete_doc) == reference_str + + +def test_empty_doc(empty_doc: DriverLicenseV1DocumentType): + prediction = empty_doc.inference.prediction + assert prediction.country_code.value is None + assert prediction.state.value is None + assert prediction.id.value is None + assert prediction.category.value is None + assert prediction.last_name.value is None + assert prediction.first_name.value is None + assert prediction.date_of_birth.value is None + assert prediction.place_of_birth.value is None + assert prediction.expiry_date.value is None + assert prediction.issued_date.value is None + assert prediction.issuing_authority.value is None + assert prediction.mrz.value is None + assert prediction.dd_number.value is None diff --git a/tests/product/fr/health_card/__init__.py b/tests/product/fr/health_card/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/product/fr/health_card/test_health_card_v1.py b/tests/product/fr/health_card/test_health_card_v1.py new file mode 100644 index 00000000..e1dc3302 --- /dev/null +++ b/tests/product/fr/health_card/test_health_card_v1.py @@ -0,0 +1,49 @@ +import json + +import pytest + +from mindee.parsing.common.document import Document +from mindee.parsing.common.page import Page +from mindee.product.fr.health_card.health_card_v1 import HealthCardV1 +from mindee.product.fr.health_card.health_card_v1_document import ( + HealthCardV1Document, +) +from tests.product import PRODUCT_DATA_DIR + +RESPONSE_DIR = PRODUCT_DATA_DIR / "french_healthcard" / "response_v1" + +HealthCardV1DocumentType = Document[ + HealthCardV1Document, + Page[HealthCardV1Document], +] + + +@pytest.fixture +def complete_doc() -> HealthCardV1DocumentType: + file_path = RESPONSE_DIR / "complete.json" + with open(file_path, "r", encoding="utf-8") as open_file: + json_data = json.load(open_file) + return Document(HealthCardV1, json_data["document"]) + + +@pytest.fixture +def empty_doc() -> HealthCardV1DocumentType: + file_path = RESPONSE_DIR / "empty.json" + with open(file_path, "r", encoding="utf-8") as open_file: + json_data = json.load(open_file) + return Document(HealthCardV1, json_data["document"]) + + +def test_complete_doc(complete_doc: HealthCardV1DocumentType): + file_path = RESPONSE_DIR / "summary_full.rst" + with open(file_path, "r", encoding="utf-8") as open_file: + reference_str = open_file.read() + assert str(complete_doc) == reference_str + + +def test_empty_doc(empty_doc: HealthCardV1DocumentType): + prediction = empty_doc.inference.prediction + assert len(prediction.given_names) == 0 + assert prediction.surname.value is None + assert prediction.social_security.value is None + assert prediction.issuance_date.value is None diff --git a/tests/product/fr/payslip/test_payslip_v3.py b/tests/product/fr/payslip/test_payslip_v3.py new file mode 100644 index 00000000..cdc426d8 --- /dev/null +++ b/tests/product/fr/payslip/test_payslip_v3.py @@ -0,0 +1,86 @@ +import json + +import pytest + +from mindee.parsing.common.document import Document +from mindee.parsing.common.page import Page +from mindee.product.fr.payslip.payslip_v3 import PayslipV3 +from mindee.product.fr.payslip.payslip_v3_document import ( + PayslipV3Document, +) +from tests.product import PRODUCT_DATA_DIR + +RESPONSE_DIR = PRODUCT_DATA_DIR / "payslip_fra" / "response_v3" + +PayslipV3DocumentType = Document[ + PayslipV3Document, + Page[PayslipV3Document], +] + + +@pytest.fixture +def complete_doc() -> PayslipV3DocumentType: + file_path = RESPONSE_DIR / "complete.json" + with open(file_path, "r", encoding="utf-8") as open_file: + json_data = json.load(open_file) + return Document(PayslipV3, json_data["document"]) + + +@pytest.fixture +def empty_doc() -> PayslipV3DocumentType: + file_path = RESPONSE_DIR / "empty.json" + with open(file_path, "r", encoding="utf-8") as open_file: + json_data = json.load(open_file) + return Document(PayslipV3, json_data["document"]) + + +def test_complete_doc(complete_doc: PayslipV3DocumentType): + file_path = RESPONSE_DIR / "summary_full.rst" + with open(file_path, "r", encoding="utf-8") as open_file: + reference_str = open_file.read() + assert str(complete_doc) == reference_str + + +def test_empty_doc(empty_doc: PayslipV3DocumentType): + prediction = empty_doc.inference.prediction + assert prediction.pay_period.end_date is None + assert prediction.pay_period.month is None + assert prediction.pay_period.payment_date is None + assert prediction.pay_period.start_date is None + assert prediction.pay_period.year is None + assert prediction.employee.address is None + assert prediction.employee.date_of_birth is None + assert prediction.employee.first_name is None + assert prediction.employee.last_name is None + assert prediction.employee.phone_number is None + assert prediction.employee.registration_number is None + assert prediction.employee.social_security_number is None + assert prediction.employer.address is None + assert prediction.employer.company_id is None + assert prediction.employer.company_site is None + assert prediction.employer.naf_code is None + assert prediction.employer.name is None + assert prediction.employer.phone_number is None + assert prediction.employer.urssaf_number is None + assert prediction.bank_account_details.bank_name is None + assert prediction.bank_account_details.iban is None + assert prediction.bank_account_details.swift is None + assert prediction.employment.category is None + assert prediction.employment.coefficient is None + assert prediction.employment.collective_agreement is None + assert prediction.employment.job_title is None + assert prediction.employment.position_level is None + assert prediction.employment.seniority_date is None + assert prediction.employment.start_date is None + assert len(prediction.salary_details) == 0 + assert prediction.pay_detail.gross_salary is None + assert prediction.pay_detail.gross_salary_ytd is None + assert prediction.pay_detail.income_tax_rate is None + assert prediction.pay_detail.income_tax_withheld is None + assert prediction.pay_detail.net_paid is None + assert prediction.pay_detail.net_paid_before_tax is None + assert prediction.pay_detail.net_taxable is None + assert prediction.pay_detail.net_taxable_ytd is None + assert prediction.pay_detail.total_cost_employer is None + assert prediction.pay_detail.total_taxes_and_deductions is None + assert len(prediction.paid_time_off) == 0