|
1 | | -import base64 |
2 | 1 | import io |
3 | 2 | import mimetypes |
4 | | -import os |
5 | 3 | import tempfile |
6 | 4 | from enum import Enum |
7 | | -from pathlib import Path |
8 | | -from typing import BinaryIO, Optional, Sequence, Tuple, Union |
| 5 | +from typing import BinaryIO, Optional, Sequence, Tuple |
9 | 6 |
|
10 | 7 | import pypdfium2 as pdfium |
11 | 8 |
|
@@ -205,91 +202,3 @@ def read_contents(self, close_file: bool) -> Tuple[str, bytes]: |
205 | 202 | def close(self) -> None: |
206 | 203 | """Close the file object.""" |
207 | 204 | self.file_object.close() |
208 | | - |
209 | | - |
210 | | -class FileInput(LocalInputSource): |
211 | | - """A binary file input.""" |
212 | | - |
213 | | - def __init__(self, file: BinaryIO) -> None: |
214 | | - """ |
215 | | - Input document from a Python binary file object. |
216 | | -
|
217 | | - Note: the calling function is responsible for closing the file. |
218 | | -
|
219 | | - :param file: FileIO object |
220 | | - """ |
221 | | - assert file.name, "File name must be set" |
222 | | - |
223 | | - self.file_object = file |
224 | | - self.filename = os.path.basename(file.name) |
225 | | - self.filepath = file.name |
226 | | - super().__init__(input_type=InputType.FILE) |
227 | | - |
228 | | - |
229 | | -class PathInput(LocalInputSource): |
230 | | - """A local path input.""" |
231 | | - |
232 | | - def __init__(self, filepath: Union[Path, str]) -> None: |
233 | | - """ |
234 | | - Input document from a path. |
235 | | -
|
236 | | - :param filepath: Path to open |
237 | | - """ |
238 | | - self.file_object = open(filepath, "rb") # pylint: disable=consider-using-with |
239 | | - self.filename = os.path.basename(filepath) |
240 | | - self.filepath = str(filepath) |
241 | | - super().__init__(input_type=InputType.PATH) |
242 | | - |
243 | | - |
244 | | -class BytesInput(LocalInputSource): |
245 | | - """Raw bytes input.""" |
246 | | - |
247 | | - def __init__(self, raw_bytes: bytes, filename: str) -> None: |
248 | | - """ |
249 | | - Input document from raw bytes (no buffer). |
250 | | -
|
251 | | - :param raw_bytes: Raw data as bytes |
252 | | - :param filename: File name of the input |
253 | | - """ |
254 | | - self.file_object = io.BytesIO(raw_bytes) |
255 | | - self.filename = filename |
256 | | - self.filepath = None |
257 | | - super().__init__(input_type=InputType.BYTES) |
258 | | - |
259 | | - |
260 | | -class Base64Input(LocalInputSource): |
261 | | - """Base64-encoded text input.""" |
262 | | - |
263 | | - def __init__(self, base64_string: str, filename: str) -> None: |
264 | | - """ |
265 | | - Input document from a base64 encoded string. |
266 | | -
|
267 | | - :param base64_string: Raw data as a base64 encoded string |
268 | | - :param filename: File name of the input |
269 | | - """ |
270 | | - self.file_object = io.BytesIO(base64.standard_b64decode(base64_string)) |
271 | | - self.filename = filename |
272 | | - self.filepath = None |
273 | | - super().__init__(input_type=InputType.BASE64) |
274 | | - |
275 | | - |
276 | | -class UrlInputSource: |
277 | | - """A local or distant URL input.""" |
278 | | - |
279 | | - url: str |
280 | | - """The Uniform Resource Locator.""" |
281 | | - |
282 | | - def __init__(self, url: str) -> None: |
283 | | - """ |
284 | | - Input document from a base64 encoded string. |
285 | | -
|
286 | | - :param url: URL to send, must be HTTPS |
287 | | - """ |
288 | | - if not url.lower().startswith("https"): |
289 | | - raise MindeeSourceError("URL must be HTTPS") |
290 | | - |
291 | | - self.input_type = InputType.URL |
292 | | - |
293 | | - logger.debug("URL input: %s", url) |
294 | | - |
295 | | - self.url = url |
0 commit comments