Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ Complete details on the working of the library are available in the following gu
* [Python US Driver License OCR](https://developers.mindee.com/docs/python-eu-driver-license-ocr)
* [Python FR Bank Account Detail OCR](https://developers.mindee.com/docs/python-fr-bank-account-details-ocr)
* [Python FR Carte Grise OCR](https://developers.mindee.com/docs/python-fr-carte-grise-ocr)
* [Python FR Carte Vitale OCR](https://developers.mindee.com/docs/python-fr-carte-vitale-ocr)
* [Python FR Health Card OCR](https://developers.mindee.com/docs/python-fr-health-card-ocr)
* [Python FR ID Card OCR](https://developers.mindee.com/docs/python-fr-carte-nationale-didentite-ocr)
* [Python FR Petrol Receipts OCR](https://developers.mindee.com/docs/python-fr-petrol-receipts-ocr)
* [Python US Bank Check OCR](https://developers.mindee.com/docs/python-us-bank-check-ocr)
Expand Down
17 changes: 0 additions & 17 deletions docs/extras/code_samples/carte_vitale_v1.txt

This file was deleted.

15 changes: 0 additions & 15 deletions docs/product/fr/carte_vitale_v1.rst

This file was deleted.

185 changes: 184 additions & 1 deletion mindee/input/sources/url_input_source.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,15 @@
import os
import random
import string
from datetime import datetime
from pathlib import Path
from typing import Optional, Union
from urllib.parse import urlparse

import requests

from mindee.error.mindee_error import MindeeSourceError
from mindee.input.sources.bytes_input import BytesInput
from mindee.input.sources.local_input_source import InputType
from mindee.logger import logger

Expand All @@ -13,7 +24,7 @@ def __init__(self, url: str) -> None:
"""
Input document from a base64 encoded string.

:param url: URL to send, must be HTTPS
:param url: URL to send, must be HTTPS.
"""
if not url.lower().startswith("https"):
raise MindeeSourceError("URL must be HTTPS")
Expand All @@ -23,3 +34,175 @@ def __init__(self, url: str) -> None:
logger.debug("URL input: %s", url)

self.url = url

def __fetch_file_content(
self,
username: Optional[str] = None,
password: Optional[str] = None,
token: Optional[str] = None,
headers: Optional[dict] = None,
max_redirects: int = 3,
) -> bytes:
"""
Fetch the content of the file from the URL.

:param username: Optional username for authentication.
:param password: Optional password for authentication.
:param token: Optional token for authentication.
:param headers: Optional additional headers for the request.
:param max_redirects: Maximum number of redirects to follow.
:return: The content of the file as bytes.
"""
if not headers:
headers = {}
if token:
headers["Authorization"] = f"Bearer {token}"
auth = None if not username or not password else (username, password)

response = UrlInputSource.__make_request(
self.url, auth, headers, 0, max_redirects=max_redirects
)

return response

def save_to_file(
self,
filepath: Union[Path, str],
filename: Optional[str] = None,
username: Optional[str] = None,
password: Optional[str] = None,
token: Optional[str] = None,
headers: Optional[dict] = None,
max_redirects: int = 3,
) -> Path:
"""
Save the content of the URL to a file.

:param filepath: Path to save the content to.
:param filename: Optional filename to give to the file.
:param username: Optional username for authentication.
:param password: Optional password for authentication.
:param token: Optional token for authentication.
:param headers: Optional additional headers for the request.
:param max_redirects: Maximum number of redirects to follow.
:return: The path to the saved file.
"""
response = self.__fetch_file_content(
username, password, token, headers, max_redirects
)
filename = self.__fill_filename(filename)
full_path = Path(filepath) / filename
with open(full_path, "wb") as binary_file:
binary_file.write(response)
return full_path

def as_local_input_source(
self,
filename: Optional[str] = None,
username: Optional[str] = None,
password: Optional[str] = None,
token: Optional[str] = None,
headers: Optional[dict] = None,
max_redirects: int = 3,
) -> BytesInput:
"""
Convert the URL content to a BytesInput object.

:param filename: Optional filename for the BytesInput.
:param username: Optional username for authentication.
:param password: Optional password for authentication.
:param token: Optional token for authentication.
:param headers: Optional additional headers for the request.
:param max_redirects: Maximum number of redirects to follow.
:return: A BytesInput object containing the file content.
"""
response = self.__fetch_file_content(
username, password, token, headers, max_redirects
)
filename = self.__fill_filename(filename)

return BytesInput(response, filename)

@staticmethod
def __extract_filename_from_url(uri) -> str:
"""
Extract the filename from a given URL.

:param uri: The URL to extract the filename from.
:return: The extracted filename or an empty string if not found.
"""
filename = os.path.basename(urlparse(uri).path)
return filename if filename else ""

@staticmethod
def __generate_file_name(extension=".tmp") -> str:
"""
Generate a unique filename with a timestamp and random string.

:param extension: The file extension to use (default is '.tmp').
:return: A generated filename.
"""
random_string = "".join(
random.choices(string.ascii_lowercase + string.digits, k=8)
)
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
return f"mindee_temp_{timestamp}_{random_string}{extension}"

@staticmethod
def __get_file_extension(filename) -> Optional[str]:
"""
Get the extension from a filename.

:param filename: The filename to extract the extension from.
:return: The lowercase file extension or None if not found.
"""
ext = os.path.splitext(filename)[1]
return ext.lower() if ext else None

def __fill_filename(self, filename=None) -> str:
"""
Fill in a filename if not provided or incomplete.

:param filename: Optional filename to use.
:return: A complete filename.
"""
if filename is None:
filename = UrlInputSource.__extract_filename_from_url(self.url)

if not filename or not os.path.splitext(filename)[1]:
filename = self.__generate_file_name(
extension=UrlInputSource.__get_file_extension(filename)
)

return filename

@staticmethod
def __make_request(url, auth, headers, redirects, max_redirects) -> bytes:
"""
Makes an HTTP request to the given URL, while following redirections.

:param url: The URL to request.
:param auth: Authentication tuple (username, password).
:param headers: Headers for the request.
:param redirects: Current number of redirects.
:param max_redirects: Maximum number of redirects to follow.
:return: The content of the response.
:raises MindeeSourceError: If max redirects are exceeded or the request fails.
"""
result = requests.get(url, headers=headers, timeout=120, auth=auth)
if 299 < result.status_code < 400:
if redirects == max_redirects:
raise MindeeSourceError(
f"Can't reach URL after {redirects} out of {max_redirects} redirects, "
f"aborting operation."
)
return UrlInputSource.__make_request(
redirects.location, auth, headers, redirects + 1, max_redirects
)

if result.status_code >= 400 or result.status_code < 200:
raise MindeeSourceError(
f"Couldn't retrieve file from server, error code {result.status_code}."
)

return result.content
4 changes: 0 additions & 4 deletions mindee/product/fr/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,6 @@
)
from mindee.product.fr.carte_grise.carte_grise_v1 import CarteGriseV1
from mindee.product.fr.carte_grise.carte_grise_v1_document import CarteGriseV1Document
from mindee.product.fr.carte_vitale.carte_vitale_v1 import CarteVitaleV1
from mindee.product.fr.carte_vitale.carte_vitale_v1_document import (
CarteVitaleV1Document,
)
from mindee.product.fr.energy_bill.energy_bill_v1 import EnergyBillV1
from mindee.product.fr.energy_bill.energy_bill_v1_document import EnergyBillV1Document
from mindee.product.fr.energy_bill.energy_bill_v1_energy_consumer import (
Expand Down
4 changes: 0 additions & 4 deletions mindee/product/fr/carte_vitale/__init__.py

This file was deleted.

39 changes: 0 additions & 39 deletions mindee/product/fr/carte_vitale/carte_vitale_v1.py

This file was deleted.

59 changes: 0 additions & 59 deletions mindee/product/fr/carte_vitale/carte_vitale_v1_document.py

This file was deleted.

Loading
Loading