Skip to content

Commit 7aa4fb5

Browse files
refactor image module
1 parent f31114d commit 7aa4fb5

25 files changed

+333
-214
lines changed

examples/auto_invoice_splitter_extraction.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def parse_single_page(mindee_client, input_source)
2222
end
2323

2424
def parse_multi_page(mindee_client, input_source)
25-
pdf_extractor = Mindee::Extraction::PdfExtractor::PdfExtractor.new(input_source)
25+
pdf_extractor = Mindee::Image::PdfExtractor::PdfExtractor.new(input_source)
2626
invoice_splitter_response = mindee_client.enqueue_and_parse(
2727
input_source,
2828
Mindee::Product::InvoiceSplitter::InvoiceSplitterV1,

examples/auto_multi_receipts_detector_extraction.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ def multi_receipts_detection(file_path, mindee_client)
1313
close_file: false
1414
)
1515

16-
images = Mindee::Extraction::MultiReceiptsExtractor.extract_receipts(input_source, result_split.document.inference)
16+
images = Mindee::Image::MultiReceiptsExtractor.extract_receipts(input_source, result_split.document.inference)
1717
images.each do |sub_image|
1818
# Optional: Save the files locally
1919
# sub_image.write_to_file("/path/to/my/extracted/file/folder")

lib/mindee.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ module ImageCompressor
3434
end
3535

3636
# Custom extraction module
37-
module Extraction
37+
module Image
3838
end
3939

4040
# Parsing internals and fields.

lib/mindee/extraction.rb

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,4 @@
22

33
require_relative 'extraction/tax_extractor'
44
require_relative 'extraction/multi_receipts_extractor'
5-
require_relative 'extraction/common'
65
require_relative 'extraction/pdf_extractor'

lib/mindee/extraction/common.rb

Lines changed: 0 additions & 4 deletions
This file was deleted.

lib/mindee/extraction/multi_receipts_extractor/multi_receipts_extractor.rb

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
# frozen_string_literal: true
22

3-
require_relative '../common/image_extractor'
3+
require_relative '../../image/image_extractor'
44

55
module Mindee
66
# Image Extraction Module.
7-
module Extraction
7+
module Image
88
# Multi-receipts extraction class wrapper.
99
class MultiReceiptsExtractor
1010
def self.extract_receipts(input_source, inference)
@@ -23,8 +23,8 @@ def self.extract_receipts(input_source, inference)
2323
(0...input_source.count_pdf_pages).each do |page_id|
2424
receipt_positions = inference.pages[page_id].prediction.receipts.map(&:bounding_box)
2525
images.concat(
26-
Mindee::Extraction::ImageExtractor.extract_multiple_images_from_source(input_source, page_id + 1,
27-
receipt_positions)
26+
Mindee::Image::ImageExtractor.extract_multiple_images_from_source(input_source, page_id + 1,
27+
receipt_positions)
2828
)
2929
end
3030

lib/mindee/extraction/pdf_extractor/extracted_pdf.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
module Mindee
44
# Pdf Extraction Module.
5-
module Extraction
5+
module Image
66
module PdfExtractor
77
# An extracted sub-Pdf.
88
class ExtractedPdf

lib/mindee/extraction/pdf_extractor/pdf_extractor.rb

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
module Mindee
44
# Pdf Extraction Module.
5-
module Extraction
5+
module Image
66
# Pdf Extraction class.
77
module PdfExtractor
88
# Pdf extraction class.
@@ -13,7 +13,7 @@ def initialize(local_input)
1313
if local_input.pdf?
1414
@source_pdf = local_input.io_stream
1515
else
16-
pdf_image = Extraction::ImageExtractor.attach_image_as_new_file(local_input.io_stream)
16+
pdf_image = Image::ImageExtractor.attach_image_as_new_file(local_input.io_stream)
1717
io_buffer = StringIO.new
1818
pdf_image.save(io_buffer)
1919

@@ -40,7 +40,7 @@ def cut_pages(page_indexes)
4040

4141
# Extract the sub-documents from the main pdf, based on the given list of page indexes.
4242
# @param page_indexes [Array<Array<Integer>>] List of page number to use for merging in the original Pdf.
43-
# @return [Array<Mindee::Extraction::PdfExtractor::ExtractedPdf>] The buffer containing the new Pdf.
43+
# @return [Array<Mindee::Image::PdfExtractor::ExtractedPdf>] The buffer containing the new Pdf.
4444
def extract_sub_documents(page_indexes)
4545
extracted_pdfs = []
4646
extension = File.extname(@filename)
@@ -59,8 +59,8 @@ def extract_sub_documents(page_indexes)
5959
formatted_max_index = format('%03d', page_index_list[page_index_list.length - 1] + 1).to_s
6060
field_filename = "#{basename}_#{format('%03d',
6161
(page_index_list[0] + 1))}-#{formatted_max_index}#{extension}"
62-
extracted_pdf = Mindee::Extraction::PdfExtractor::ExtractedPdf.new(cut_pages(page_index_list),
63-
field_filename)
62+
extracted_pdf = Mindee::Image::PdfExtractor::ExtractedPdf.new(cut_pages(page_index_list),
63+
field_filename)
6464
extracted_pdfs << extracted_pdf
6565
end
6666
extracted_pdfs
@@ -72,7 +72,7 @@ def extract_sub_documents(page_indexes)
7272
# Extracts invoices as complete PDFs from the document.
7373
# @param page_indexes [Array<Array<Integer>, InvoiceSplitterV1PageGroup>]
7474
# @param strict [Boolean]
75-
# @return [Array<Mindee::Extraction::PdfExtractor::ExtractedPdf>]
75+
# @return [Array<Mindee::Image::PdfExtractor::ExtractedPdf>]
7676
def extract_invoices(page_indexes, strict: false)
7777
raise Errors::MindeePDFError, 'No indexes provided.' if page_indexes.empty?
7878
unless page_indexes[0].is_a?(Mindee::Product::InvoiceSplitter::InvoiceSplitterV1PageGroup)

lib/mindee/extraction/tax_extractor/ocr_extractor.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# frozen_string_literal: true
22

33
module Mindee
4-
module Extraction
4+
module Image
55
# Generic extractor class
66
class OcrExtractor
77
# Checks for a list of possible matches in a string & returns the index of the first found candidate.

lib/mindee/extraction/tax_extractor/tax_extractor.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
# rubocop:disable Metrics/ClassLength
66

77
module Mindee
8-
module Extraction
8+
module Image
99
# Tax extractor class
1010
class TaxExtractor < OcrExtractor
1111
# Extracts the most relevant candidate.

0 commit comments

Comments
 (0)