Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
2 changes: 1 addition & 1 deletion .github/workflows/_test-code-samples.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,4 @@ jobs:
env:
MINDEE_LOG_LEVEL: DEBUG
run: |
./spec/test_code_samples.sh ${{ secrets.MINDEE_ACCOUNT_SE_TESTS }} ${{ secrets.MINDEE_ENDPOINT_SE_TESTS }} ${{ secrets.MINDEE_API_KEY_SE_TESTS }}
./spec/test_code_samples.sh ${{ secrets.MINDEE_ACCOUNT_SE_TESTS }} ${{ secrets.MINDEE_ENDPOINT_SE_TESTS }} ${{ secrets.MINDEE_API_KEY_SE_TESTS }} ${{ secrets.MINDEE_V2_SE_TESTS_API_KEY }} ${{ secrets.MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID }}
3 changes: 3 additions & 0 deletions .github/workflows/_test-integrations.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ jobs:
env:
MINDEE_API_KEY: ${{ secrets.MINDEE_API_KEY_SE_TESTS }}
WORKFLOW_ID: ${{ secrets.WORKFLOW_ID_SE_TESTS }}
MINDEE_V2_API_KEY: ${{ secrets.MINDEE_V2_SE_TESTS_API_KEY }}
MINDEE_V2_FINDOC_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID }}
MINDEE_V2_SE_TESTS_BLANK_PDF_URL: ${{ secrets.MINDEE_V2_SE_TESTS_BLANK_PDF_URL }}
MINDEE_LOG_LEVEL: DEBUG
run: |
bundle exec rake integration
Expand Down
12 changes: 8 additions & 4 deletions .github/workflows/pull-request.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,22 @@ name: Pull Request
on:
pull_request:

permissions:
contents: read
pull-requests: read

jobs:
static_analysis:
uses: mindee/mindee-api-ruby/.github/workflows/_static-analysis.yml@main
uses: ./.github/workflows/_static-analysis.yml
test_units:
uses: mindee/mindee-api-ruby/.github/workflows/_test-units.yml@main
uses: ./.github/workflows/_test-units.yml
needs: static_analysis
secrets: inherit
test_integrations:
uses: mindee/mindee-api-ruby/.github/workflows/_test-integrations.yml@main
uses: ./.github/workflows/_test-integrations.yml
needs: test_units
secrets: inherit
test_code_samples:
uses: mindee/mindee-api-ruby/.github/workflows/_test-code-samples.yml@main
uses: ./.github/workflows/_test-code-samples.yml
needs: test_units
secrets: inherit
28 changes: 28 additions & 0 deletions docs/code_samples/default_v2.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
require 'mindee'

input_path = '/path/to/the/file.ext'
api_key = 'MY_API_KEY'
model_id = 'MY_MODEL_ID'

# Init a new client
mindee_client = Mindee::ClientV2.new(api_key: api_key)

# Set inference parameters
params = Mindee::Input::InferenceParameters.new(
# ID of the model, required.
model_id,
# If set to `True`, will enable Retrieval-Augmented Generation.
rag: false,
)

# Load a file from disk
input_source = Mindee::Input::Source::PathInputSource.new(input_path)

# Send for processing
response = mindee_client.enqueue_and_get_inference(
input_source,
params # Note: this parameter can also be provided as a Hash.
)

# Print a brief summary of the parsed data
puts response.inference
6 changes: 6 additions & 0 deletions lib/mindee.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# frozen_string_literal: true

require 'mindee/client'
require 'mindee/client_v2'
require 'mindee/page_options'
require 'mindee/logging'

module Mindee
Expand Down Expand Up @@ -54,6 +56,10 @@ module Standard
# Universal fields and functions.
module Universal
end

# V2-specific module.
module V2
end
end

# Document input-related internals.
Expand Down
41 changes: 11 additions & 30 deletions lib/mindee/client.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,38 +2,17 @@

require_relative 'input'
require_relative 'http'
require_relative 'product'
require_relative 'logging'
require_relative 'page_options'
require_relative 'parsing/common/api_response'
require_relative 'parsing/common/job'
require_relative 'parsing/common/workflow_response'
require_relative 'logging'
require_relative 'product'

# Default owner for products.
OTS_OWNER = 'mindee'

module Mindee
# Class for page options in parse calls.
#
# @!attribute page_indexes [Array[Integer]] Zero-based list of page indexes.
# @!attribute operation [:KEEP_ONLY, :REMOVE] Operation to apply on the document, given the specified page indexes:
# * `:KEEP_ONLY` - keep only the specified pages, and remove all others.
# * `:REMOVE` - remove the specified pages, and keep all others.
# @!attribute on_min_pages [Integer, nil] Apply the operation only if the document has at least this many pages.
class PageOptions
attr_accessor :page_indexes, :operation, :on_min_pages

def initialize(params: {})
params ||= {}
params = params.transform_keys(&:to_sym)
@page_indexes = params.fetch(
:page_indexes,
[] # : Array[Integer]
)
@operation = params.fetch(:operation, :KEEP_ONLY)
@on_min_pages = params.fetch(:on_min_pages, nil)
end
end

# Class for configuration options in parse calls.
#
# @!attribute all_words [bool] Whether to include the full text for each page.
Expand Down Expand Up @@ -89,8 +68,9 @@ def initialize(params: {})
# * `:KEEP_ONLY` - keep only the specified pages, and remove all others.
# * `:REMOVE` - remove the specified pages, and keep all others.
# * `:on_min_pages` Apply the operation only if the document has at least this many pages.
# @!attribute close_file [bool, nil] Whether to close the file after sending it. Defaults to true.
class WorkflowOptions
attr_accessor :document_alias, :priority, :full_text, :public_url, :page_options, :rag
attr_accessor :document_alias, :priority, :full_text, :public_url, :page_options, :rag, :close_file

def initialize(params: {})
params = params.transform_keys(&:to_sym)
Expand All @@ -102,6 +82,7 @@ def initialize(params: {})
raw_page_options = params.fetch(:page_options, nil)
raw_page_options = PageOptions.new(params: raw_page_options) unless raw_page_options.is_a?(PageOptions)
@page_options = raw_page_options
@close_file = params.fetch(:close_file, true)
end
end

Expand Down Expand Up @@ -326,7 +307,7 @@ def execute_workflow(input_source, workflow_id, options: {})
process_pdf_if_required(input_source, opts)
end

workflow_endpoint = Mindee::HTTP::WorkflowEndpoint.new(workflow_id, api_key: @api_key)
workflow_endpoint = Mindee::HTTP::WorkflowEndpoint.new(workflow_id, api_key: @api_key.to_s)
logger.debug("Sending document to workflow '#{workflow_id}'")

prediction, raw_http = workflow_endpoint.execute_workflow(
Expand Down Expand Up @@ -455,11 +436,11 @@ def initialize_endpoint(product_class, endpoint_name: '', account_name: '', vers
account_name = fix_account_name(account_name)
version = fix_version(product_class, version)

HTTP::Endpoint.new(account_name, endpoint_name, version, api_key: @api_key)
HTTP::Endpoint.new(account_name, endpoint_name, version, api_key: @api_key.to_s)
end

def fix_endpoint_name(product_class, endpoint_name)
endpoint_name.nil? || endpoint_name.empty? ? product_class.endpoint_name : endpoint_name
endpoint_name.nil? || endpoint_name.empty? ? product_class.endpoint_name.to_s : endpoint_name.to_s
end

def fix_account_name(account_name)
Expand All @@ -474,11 +455,11 @@ def fix_account_name(account_name)
def fix_version(product_class, version)
return version unless version.nil? || version.empty?

if product_class.endpoint_version.nil? || product_class.endpoint_version.empty?
if product_class.endpoint_version.nil? || product_class.endpoint_version.to_s.empty?
logger.debug('No version provided for a custom build, will attempt to poll version 1 by default.')
return '1'
end
product_class.endpoint_version
product_class.endpoint_version || ''
end

# If needed, converts the parsing options provided as a hash into a proper ParseOptions object.
Expand Down
109 changes: 109 additions & 0 deletions lib/mindee/client_v2.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
# frozen_string_literal: true

require_relative 'input'
require_relative 'http'
require_relative 'product'
require_relative 'parsing/common/api_response'
require_relative 'parsing/common/job'
require_relative 'parsing/common/workflow_response'
require_relative 'logging'

module Mindee
# Mindee V2 API Client.
class ClientV2
# @return [HTTP::MindeeApiV2]
private attr_reader :mindee_api

# @param api_key [String]
def initialize(api_key: '')
@mindee_api = Mindee::HTTP::MindeeApiV2.new(api_key: api_key)
end

# Retrieves an inference.
# @param inference_id [String]
# @return [Mindee::Parsing::V2::InferenceResponse]
def get_inference(inference_id)
@mindee_api.req_get_inference(inference_id)
end

# Retrieves an inference.
# @param job_id [String]
# @return [Mindee::Parsing::V2::JobResponse]
def get_job(job_id)
@mindee_api.req_get_job(job_id)
end

# Enqueue a document for async parsing.
# @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource]
# The source of the input document (local file or URL).
# @param params [Hash, InferenceParameters]
# @return [Mindee::Parsing::V2::JobResponse]
def enqueue_inference(input_source, params)
normalized_params = normalize_inference_parameters(params)
logger.debug("Enqueueing document to model '#{normalized_params.model_id}'.")

@mindee_api.req_post_inference_enqueue(input_source, normalized_params)
end

# Enqueue a document for async parsing and automatically try to retrieve it.
# @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource]
# The source of the input document (local file or URL).
# @param params [Hash, InferenceParameters] Parameters for the inference.
# @return [Mindee::Parsing::V2::InferenceResponse]
def enqueue_and_get_inference(input_source, params)
normalized_params = normalize_inference_parameters(params)
normalized_params.validate_async_params
enqueue_response = enqueue_inference(input_source, normalized_params)

if enqueue_response.job.id.nil? || enqueue_response.job.id.empty?
logger.error("Failed enqueueing:\n#{enqueue_response.raw_http}")
raise Mindee::Errors::MindeeError, 'Enqueueing of the document failed.'
end

job_id = enqueue_response.job.id
logger.debug("Successfully enqueued document with job id: #{job_id}.")

sleep(normalized_params.polling_options.initial_delay_sec)
retry_counter = 1
poll_results = get_job(job_id)

while retry_counter < normalized_params.polling_options.max_retries
if poll_results.job.status == 'Failed'
break
elsif poll_results.job.status == 'Processed'
return get_inference(poll_results.job.id)
end

logger.debug(
"Successfully enqueued inference with job id: #{job_id}.\n" \
"Attempt n°#{retry_counter}/#{normalized_params.polling_options.max_retries}.\n" \
"Job status: #{poll_results.job.status}."
)

sleep(normalized_params.polling_options.delay_sec)
poll_results = get_job(job_id)
retry_counter += 1
end

error = poll_results.job.error
unless error.nil?
err_to_raise = Mindee::Errors::MindeeHTTPErrorV2.new(error)
# NOTE: purposefully decoupled from the line above, otherwise rubocop thinks `error` is a `message` param.
raise err_to_raise
end

sec_count = normalized_params.polling_options.delay_sec * retry_counter
raise Mindee::Errors::MindeeError,
"Asynchronous parsing request timed out after #{sec_count} seconds"
end

# If needed, converts the parsing options provided as a hash into a proper InferenceParameters object.
# @param params [Hash, InferenceParameters] Params.
# @return [InferenceParameters]
def normalize_inference_parameters(params)
return params if params.is_a?(Input::InferenceParameters)

Input::InferenceParameters.from_hash(params: params)
end
end
end
1 change: 1 addition & 0 deletions lib/mindee/errors.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@

require_relative 'errors/mindee_error'
require_relative 'errors/mindee_http_error'
require_relative 'errors/mindee_http_error_v2'
require_relative 'errors/mindee_input_error'
26 changes: 26 additions & 0 deletions lib/mindee/errors/mindee_http_error_v2.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# frozen_string_literal: true

require_relative 'mindee_error'

module Mindee
module Errors
# API V2 HttpError
class MindeeHTTPErrorV2 < MindeeError
# @return [Integer]
attr_reader :status
# @return [String]
attr_reader :detail

# @param http_error [Hash, Parsing::V2::ErrorResponse]
def initialize(http_error)
if http_error.is_a?(Parsing::V2::ErrorResponse)
http_error = { 'detail' => http_error.detail,
'status' => http_error.status }
end
@status = http_error['status']
@detail = http_error['detail']
super("HTTP error: #{@status} - #{@detail}")
end
end
end
end
4 changes: 2 additions & 2 deletions lib/mindee/geometry/point.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ module Geometry
# A relative set of coordinates (X, Y) on the document.
class Point
# @return [Float]
attr_accessor :x
attr_reader :x
# @return [Float]
attr_accessor :y
attr_reader :y

# rubocop:disable Naming/MethodParameterName

Expand Down
8 changes: 4 additions & 4 deletions lib/mindee/geometry/quadrilateral.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@ module Geometry
# Contains exactly 4 relative vertices coordinates (Points).
class Quadrilateral
# @return [Mindee::Geometry::Point]
attr_accessor :top_left
attr_reader :top_left
# @return [Mindee::Geometry::Point]
attr_accessor :top_right
attr_reader :top_right
# @return [Mindee::Geometry::Point]
attr_accessor :bottom_right
attr_reader :bottom_right
# @return [Mindee::Geometry::Point]
attr_accessor :bottom_left
attr_reader :bottom_left

# @param top_left [Mindee::Geometry::Point]
# @param top_right [Mindee::Geometry::Point]
Expand Down
1 change: 1 addition & 0 deletions lib/mindee/geometry/utils.rb
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ def self.get_min_max_x(points)
# @param anchor [Array<Mindee::Geometry::Point] Reference polygon
# @param margin_left [Float] Margin tolerance on the left of the anchor
# @param margin_right [Float] Margin tolerance on the right of the anchor
# @return bool
def self.below?(candidate, anchor, margin_left, margin_right)
return false if Geometry.get_min_max_y(candidate).min < Geometry.get_min_max_y(anchor).min
if Geometry.get_min_max_x(candidate).min <
Expand Down
2 changes: 2 additions & 0 deletions lib/mindee/http.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# frozen_string_literal: true

require_relative 'http/api_settings_v2'
require_relative 'http/endpoint'
require_relative 'http/http_error_handler'
require_relative 'http/mindee_api_v2'
require_relative 'http/workflow_endpoint'
Loading
Loading