Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 0 additions & 12 deletions .github/PULL_REQUEST_TEMPLATE.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,6 @@
<!--- Why is this change required? What problem does it solve? -->


## Related Issue
<!--- If suggesting a new feature or change, please discuss it in an issue first -->
<!--- If fixing a bug, there should be an issue describing it with steps to reproduce -->
<!--- Please link to the issue here: -->


## How Has This Been Tested
<!--- Please describe in detail how you tested your changes. -->
<!--- Include details of your testing environment, and the tests you ran to -->
<!--- see how your change affects other areas of the code, etc. -->


## Types of changes
<!--- What types of changes does your code introduce? Put an `x` in all the boxes that apply: -->

Expand Down
3 changes: 0 additions & 3 deletions .github/workflows/_test-integrations.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,6 @@ jobs:
- "macos-latest"
ruby:
- "3.0"
- "3.1"
- "3.2"
- "3.3"
- "3.4"
steps:
- uses: actions/checkout@v4
Expand Down
14 changes: 12 additions & 2 deletions docs/code_samples/default_v2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,18 @@ mindee_client = Mindee::ClientV2.new(api_key: api_key)
inference_params = Mindee::Input::InferenceParameters.new(
# ID of the model, required.
model_id,
# If set to `true`, will enable Retrieval-Augmented Generation.
rag: false,

# Options: set to `true` or `false` to override defaults

# Enhance extraction accuracy with Retrieval-Augmented Generation.
rag: nil,
# Extract the full text content from the document as strings.
raw_text: nil,
# Calculate bounding box polygons for all fields.
polygon: nil,
# Boost the precision and accuracy of all extractions.
# Calculate confidence scores for all fields.
confidence: nil,
)

# Load a file from disk
Expand Down
10 changes: 8 additions & 2 deletions lib/mindee/http/mindee_api_v2.rb
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,14 @@ def enqueue(input_source, params)
file_data, file_metadata = input_source.read_contents(close: params.close_file)
[['file', file_data, file_metadata]] # : Array[untyped]
end
form_data.push ['model_id', params.model_id]
form_data.push ['rag', 'true'] if params.rag
form_data.push(['model_id', params.model_id])

# deal with optional features
form_data.push(['rag', params.rag.to_s]) unless params.rag.nil?
form_data.push(['raw_text', params.raw_text.to_s]) unless params.raw_text.nil?
form_data.push(['polygon', params.polygon.to_s]) unless params.polygon.nil?
form_data.push(['confidence', params.confidence.to_s]) unless params.confidence.nil?

form_data.push ['file_alias', params.file_alias] if params.file_alias
unless params.webhook_ids.nil? || params.webhook_ids.empty?
form_data.push ['webhook_ids', params.webhook_ids.join(',')]
Expand Down
47 changes: 40 additions & 7 deletions lib/mindee/input/inference_parameters.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,21 @@ class InferenceParameters
# @return [String] ID of the model (required).
attr_reader :model_id

# @return [Boolean, nil] Enable Retrieval-Augmented Generation.
# @return [Boolean, nil] Enhance extraction accuracy with Retrieval-Augmented Generation.
attr_reader :rag

# @return [Boolean, nil] Extract the full text content from the document as strings,
# and fill the raw_text` attribute.
attr_reader :raw_text

# @return [Boolean, nil] Calculate bounding box polygons for all fields,
# and fill their `locations` attribute.
attr_reader :polygon

# @return [Boolean, nil] Boost the precision and accuracy of all extractions.
# Calculate confidence scores for all fields, and fill their confidence attribute.
attr_reader :confidence

# @return [String, nil] Optional alias for the file.
attr_reader :file_alias

Expand All @@ -22,21 +34,39 @@ class InferenceParameters
# @return [Boolean, nil] Whether to close the file after parsing.
attr_reader :close_file

# rubocop:disable Metrics/ParameterLists
# @param [String] model_id ID of the model
# @param [FalseClass] rag Whether to enable rag.
# @param [nil] rag Whether to enable RAG.
# @param [nil] raw_text Whether to enable rax text.
# @param [nil] polygon Whether to enable polygons.
# @param [nil] confidence Whether to enable confidence scores.
# @param [nil] file_alias File alias, if applicable.
# @param [nil] webhook_ids
# @param [nil] polling_options
# @param [TrueClass] close_file
def initialize(model_id, rag: false, file_alias: nil, webhook_ids: nil, polling_options: nil, close_file: true)
def initialize(
model_id,
rag: nil,
raw_text: nil,
polygon: nil,
confidence: nil,
file_alias: nil,
webhook_ids: nil,
polling_options: nil,
close_file: true
)
raise Errors::MindeeInputError, 'Model ID is required.' if model_id.empty? || model_id.nil?

@model_id = model_id
@rag = rag || false
@rag = rag
@raw_text = raw_text
@polygon = polygon
@confidence = confidence
@file_alias = file_alias
@webhook_ids = webhook_ids || []
@polling_options = get_clean_polling_options(polling_options)
@close_file = close_file.nil? || close_file
# rubocop:enable Metrics/ParameterLists
end

# Validates the parameters for async auto-polling
Expand Down Expand Up @@ -70,7 +100,10 @@ def self.from_hash(params: {})
end

model_id = params.fetch(:model_id)
rag = params.fetch(:rag, false)
rag = params.fetch(:rag, nil)
raw_text = params.fetch(:raw_text, nil)
polygon = params.fetch(:polygon, nil)
confidence = params.fetch(:confidence, nil)
file_alias = params.fetch(:file_alias, nil)
webhook_ids = params.fetch(:webhook_ids, [])
polling_options_input = params.fetch(:page_options, PollingOptions.new)
Expand All @@ -83,8 +116,8 @@ def self.from_hash(params: {})
)
end
close_file = params.fetch(:close_file, true)
InferenceParameters.new(model_id, rag: rag, file_alias: file_alias, webhook_ids: webhook_ids,
close_file: close_file)
InferenceParameters.new(model_id, rag: rag, raw_text: raw_text, polygon: polygon, confidence: confidence,
file_alias: file_alias, webhook_ids: webhook_ids, close_file: close_file)
end

private
Expand Down
6 changes: 6 additions & 0 deletions sig/mindee/input/inference_parameters.rbs
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,21 @@ module Mindee
module Input
class InferenceParameters
attr_reader close_file: bool
attr_reader confidence: bool?
attr_reader file_alias: String?
attr_reader model_id: String
attr_reader polling_options: PollingOptions
attr_reader polygon: bool?
attr_reader rag: bool?
attr_reader raw_text: bool?
attr_reader webhook_ids: Array[String]?

def initialize: (
String,
?rag: bool?,
?raw_text: bool?,
?polygon: bool?,
?confidence: bool?,
?file_alias: String?,
?webhook_ids: Array[String]?,
?polling_options: Hash[Symbol | String, untyped] | PollingOptions?,
Expand Down
83 changes: 62 additions & 21 deletions spec/client_v2_integration.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,49 +17,90 @@
max_retries: 80
)

params = Mindee::Input::InferenceParameters.new(model_id,
rag: false,
file_alias: 'ruby-integration-test',
polling_options: polling)
params = Mindee::Input::InferenceParameters.new(
model_id,
rag: false,
raw_text: true,
polygon: false,
confidence: false,
file_alias: 'ruby-integration-test',
polling_options: polling
)

response = client.enqueue_and_get_inference(input, params)

expect(response).not_to be_nil
expect(response.inference).not_to be_nil

expect(response.inference.file).not_to be_nil
expect(response.inference.file.name).to eq('multipage_cut-2.pdf')
file = response.inference.file
expect(file).not_to be_nil
expect(file).to be_a(Mindee::Parsing::V2::InferenceFile)
expect(file.name).to eq('multipage_cut-2.pdf')
expect(file.page_count).to eq(2)

model = response.inference.model
expect(model).not_to be_nil
expect(model).to be_a(Mindee::Parsing::V2::InferenceModel)
expect(model.id).to eq(model_id)

expect(response.inference.model).not_to be_nil
expect(response.inference.model.id).to eq(model_id)
active_options = response.inference.active_options
expect(active_options).not_to be_nil
expect(active_options).to be_a(Mindee::Parsing::V2::InferenceActiveOptions)
expect(active_options.raw_text).to eq(true)
expect(active_options.polygon).to eq(false)
expect(active_options.confidence).to eq(false)
expect(active_options.rag).to eq(false)

expect(response.inference.active_options).not_to be_nil
result = response.inference.result
expect(result).not_to be_nil

expect(response.inference.result).not_to be_nil
expect(response.inference.result.raw_text).to be_nil
expect(response.inference.result.fields).not_to be_nil
expect(result.raw_text).not_to be_nil
expect(result.raw_text.pages.length).to eq(2)

expect(result.fields).not_to be_nil
end

it 'parses a filled single-page image successfully' do
src_path = File.join(__dir__ || './', 'data', 'products', 'financial_document', 'default_sample.jpg')
input = Mindee::Input::Source::FileInputSource.new(File.open(src_path, 'rb'), 'default_sample.jpg')

params = Mindee::Input::InferenceParameters.new(model_id,
rag: false,
file_alias: 'ruby-integration-test')
params = Mindee::Input::InferenceParameters.new(
model_id,
raw_text: false,
polygon: false,
confidence: false,
rag: false,
file_alias: 'ruby-integration-test'
)

response = client.enqueue_and_get_inference(input, params)
expect(response).not_to be_nil

expect(response.inference).not_to be_nil
expect(response.inference.file.name).to eq('default_sample.jpg')
file = response.inference.file
expect(file).not_to be_nil
expect(file).to be_a(Mindee::Parsing::V2::InferenceFile)
expect(file.name).to eq('default_sample.jpg')
expect(file.page_count).to eq(1)

model = response.inference.model
expect(model).not_to be_nil
expect(model).to be_a(Mindee::Parsing::V2::InferenceModel)
expect(model.id).to eq(model_id)

active_options = response.inference.active_options
expect(active_options).not_to be_nil
expect(active_options).to be_a(Mindee::Parsing::V2::InferenceActiveOptions)
expect(active_options.raw_text).to eq(false)
expect(active_options.polygon).to eq(false)
expect(active_options.confidence).to eq(false)
expect(active_options.rag).to eq(false)

expect(response.inference.model).not_to be_nil
expect(response.inference.model.id).to eq(model_id)
result = response.inference.result
expect(result).not_to be_nil

expect(response.inference.active_options).not_to be_nil
expect(result.raw_text).to be_nil

fields = response.inference.result.fields
fields = result.fields
expect(fields).not_to be_nil
expect(fields['supplier_name']).not_to be_nil
expect(fields['supplier_name'].value).to eq('John Smith')
Expand Down
5 changes: 4 additions & 1 deletion spec/parsing/v2/inference_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,12 @@ def load_v2_inference(resource_path)
describe 'simple' do
it 'loads a blank inference with valid properties' do
response = load_v2_inference(blank_path)
fields = response.inference.result.fields

fields = response.inference.result.fields
expect(fields).not_to be_empty
expect(fields).to be_a(Mindee::Parsing::V2::Field::InferenceFields)
expect(fields.size).to eq(21)

expect(fields).to have_key('taxes')
expect(fields['taxes']).not_to be_nil
expect(fields['taxes']).to be_a(list_field)
Expand Down Expand Up @@ -180,6 +182,7 @@ def load_v2_inference(resource_path)
expect(active_options.raw_text).to eq(true)

fields = response.inference.result.fields
expect(fields).to be_a(Mindee::Parsing::V2::Field::InferenceFields)

expect(fields['field_simple_string']).to be_a(simple_field)
expect(fields['field_simple_string'].value).to eq('field_simple_string-value')
Expand Down