Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions lib/mindee/http/mindee_api_v2.rb
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ def enqueue_form_options(form_data, params)
form_data.push(['confidence', params.confidence.to_s]) unless params.confidence.nil?
form_data.push ['file_alias', params.file_alias] if params.file_alias
form_data.push ['text_context', params.text_context] if params.text_context
form_data.push ['data_schema', params.data_schema.to_s] if params.data_schema
unless params.webhook_ids.nil? || params.webhook_ids.empty?
form_data.push ['webhook_ids', params.webhook_ids.join(',')]
end
Expand Down
1 change: 1 addition & 0 deletions lib/mindee/input.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# frozen_string_literal: true

require_relative 'input/data_schema'
require_relative 'input/inference_parameters'
require_relative 'input/polling_options'
require_relative 'input/sources'
126 changes: 126 additions & 0 deletions lib/mindee/input/data_schema.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
# frozen_string_literal: true

module Mindee
module Input
# Data Schema Field.
class DataSchemaField
# @return [String] Display name for the field, also impacts inference results.
attr_reader :title
# @return [String] Name of the field in the data schema.
attr_reader :name
# @return [Boolean] Whether this field can contain multiple values.
attr_reader :is_array
# @return [String] Data type of the field.
attr_reader :type
# @return [Array<String>, nil] Allowed values when type is `classification`. Leave empty for other types.
attr_reader :classification_values
# @return [Boolean, nil] Whether to remove duplicate values in the array.
# Only applicable if `is_array` is True.
attr_reader :unique_values
# @return [String, nil] Detailed description of what this field represents.
attr_reader :description
# @return [String, nil] Optional extraction guidelines.
attr_reader :guidelines
# @return [Array<Hash>, nil] Nested fields.
attr_reader :nested_fields

# @param field [Hash]
def initialize(field)
field.transform_keys!(&:to_sym)
@name = field[:name]
@title = field[:title]
@is_array = field[:is_array]
@type = field[:type]
@classification_values = field[:classification_values]
@unique_values = field[:unique_values]
@description = field[:description]
@guidelines = field[:guidelines]
@nested_fields = field[:nested_fields]
end

# @return [Hash]
def to_hash
out = {
name: @name,
title: @title,
is_array: @is_array,
type: @type,
} # @type var out: Hash[Symbol, untyped]
out[:classification_values] = @classification_values unless @classification_values.nil?
out[:unique_values] = @unique_values unless @unique_values.nil?
out[:description] = @description unless @description.nil?
out[:guidelines] = @guidelines unless @guidelines.nil?
out[:nested_fields] = @nested_fields unless @nested_fields.nil?
out
end

# @return [String]
def to_s
to_hash.to_json
end
end

# The structure to completely replace the data schema of the model.
class DataSchemaReplace
# @return [Array<DataSchemaField>] Subfields when type is `nested_object`. Leave empty for other types.
attr_reader :fields

# @param data_schema_replace [Hash]
def initialize(data_schema_replace)
data_schema_replace.transform_keys!(&:to_sym)
fields_list = data_schema_replace[:fields]
raise Mindee::Errors::MindeeError, 'Invalid Data Schema provided.' if fields_list.nil?
raise TypeError, 'Data Schema replacement fields cannot be empty.' if fields_list.empty?

@fields = fields_list.map { |field| DataSchemaField.new(field) }
end

# @return [Hash]
def to_hash
{ fields: @fields.map(&:to_hash) }
end

# @return [String]
def to_s
to_hash.to_json
end
end

# Modify the Data Schema.
class DataSchema
# @return [Mindee::Input::DataSchemaReplace]
attr_reader :replace

# @param data_schema [Hash, String]
def initialize(data_schema)
case data_schema
when String
parsed = JSON.parse(data_schema.to_s, object_class: Hash)
parsed.transform_keys!(&:to_sym)
@replace = DataSchemaReplace.new(parsed[:replace])
when Hash
data_schema.transform_keys!(&:to_sym)
@replace = if data_schema[:replace].is_a?(DataSchemaReplace)
data_schema[:replace]
else
DataSchemaReplace.new(data_schema[:replace])
end
when DataSchema
@replace = data_schema.replace
else
raise TypeError, 'Invalid Data Schema provided.'
end
end

# @return [Hash]
def to_hash
{ replace: @replace.to_hash }
end

# @return [String]
def to_s
to_hash.to_json
end
end
end
end
9 changes: 8 additions & 1 deletion lib/mindee/input/inference_parameters.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# frozen_string_literal: true

require_relative 'data_schema'

module Mindee
module Input
# Parameters to set when sending a file for inference.
Expand Down Expand Up @@ -35,6 +37,9 @@ class InferenceParameters
# @return [PollingOptions] Options for polling. Set only if having timeout issues.
attr_reader :polling_options

# @return [DataSchemaField]
attr_reader :data_schema

# @return [Boolean, nil] Whether to close the file after parsing.
attr_reader :close_file

Expand All @@ -58,7 +63,8 @@ def initialize(
webhook_ids: nil,
text_context: nil,
polling_options: nil,
close_file: true
close_file: true,
data_schema: nil
)
raise Errors::MindeeInputError, 'Model ID is required.' if model_id.empty? || model_id.nil?

Expand All @@ -72,6 +78,7 @@ def initialize(
@text_context = text_context
@polling_options = get_clean_polling_options(polling_options)
@close_file = close_file.nil? || close_file
@data_schema = DataSchema.new(data_schema) unless data_schema.nil?
# rubocop:enable Metrics/ParameterLists
end

Expand Down
22 changes: 22 additions & 0 deletions lib/mindee/parsing/v2/inference_active_options.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,23 @@
module Mindee
module Parsing
module V2
# Data schema options activated during the inference.
class DataSchemaActiveOption
# @return [Boolean]
attr_reader :replace

# @param server_response [Hash]
def initialize(server_response)
@replace = server_response[:replace] || server_response['replace']
end

# String representation.
# @return [String]
def to_s
"Data Schema\n-----------\n:Replace: #{@replace ? 'True' : 'False'}"
end
end

# Options which were activated during the inference.
class InferenceActiveOptions
# @return [Boolean] Whether the Raw Text feature was activated.
Expand All @@ -15,6 +32,8 @@ class InferenceActiveOptions
attr_reader :rag
# @return [Boolean] Whether the text context feature was activated.
attr_reader :text_context
# @return [DataSchemaActiveOption]
attr_reader :data_schema

# @param server_response [Hash] Raw JSON parsed into a Hash.
def initialize(server_response)
Expand All @@ -23,6 +42,7 @@ def initialize(server_response)
@confidence = server_response['confidence']
@rag = server_response['rag']
@text_context = server_response['text_context']
@data_schema = DataSchemaActiveOption.new(server_response['data_schema'])
end

# String representation.
Expand All @@ -35,6 +55,8 @@ def to_s
":Polygon: #{@polygon ? 'True' : 'False'}",
":Confidence: #{@confidence ? 'True' : 'False'}",
":RAG: #{@rag ? 'True' : 'False'}",
":Text Context: #{@text_context ? 'True' : 'False'}\n",
@data_schema.to_s,
'',
]
parts.join("\n")
Expand Down
34 changes: 34 additions & 0 deletions sig/mindee/input/data_schema.rbs
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
module Mindee
module Input
class DataSchemaField
attr_reader title: String
attr_reader name: String
attr_reader is_array: bool
attr_reader type: String
attr_reader classification_values: String|nil
attr_reader unique_values: bool|nil
attr_reader description: String|nil
attr_reader guidelines: String|nil
attr_reader nested_fields: Array[Hash[String|Symbol, untyped]]|nil

def initialize: (Hash[Symbol, untyped]) -> void
def to_hash: () -> Hash[Symbol, untyped]
def to_string: () -> String
end

class DataSchemaReplace
attr_reader fields: Array[DataSchemaField]
def initialize: (Hash[Symbol, untyped]) -> void
def to_hash: () -> Hash[Symbol, untyped]
def to_string: () -> String
end

class DataSchema
attr_reader replace: DataSchemaReplace

def initialize: (Hash[String|Symbol, untyped]|String|DataSchema) -> void
def to_hash: () -> Hash[Symbol, untyped]
def to_s: -> String
end
end
end
4 changes: 3 additions & 1 deletion sig/mindee/input/inference_parameters.rbs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ module Mindee
attr_reader raw_text: bool?
attr_reader text_context: String?
attr_reader webhook_ids: Array[String]?
attr_reader data_schema: DataSchema?

def initialize: (
String,
Expand All @@ -23,7 +24,8 @@ module Mindee
?text_context: String?,
?webhook_ids: Array[String]?,
?polling_options: Hash[Symbol | String, untyped] | PollingOptions?,
?close_file: bool?
?close_file: bool?,
?data_schema: DataSchema|String|Hash[Symbol | String, untyped]?
) -> void

def self.from_hash: (params: Hash[String | Symbol, untyped]) -> InferenceParameters
Expand Down
8 changes: 8 additions & 0 deletions sig/mindee/parsing/v2/inference_active_options.rbs
Original file line number Diff line number Diff line change
@@ -1,14 +1,22 @@
module Mindee
module Parsing
module V2
class DataSchemaActiveOption
attr_reader replace: bool

def initialize: (Hash[Symbol |string, untyped]) -> void
def to_s: () -> String
end
class InferenceActiveOptions
attr_reader confidence: bool
attr_reader polygon: bool
attr_reader rag: bool
attr_reader raw_text: bool
attr_reader text_context: bool
attr_reader data_schema: DataSchemaActiveOption

def initialize: (Hash[String | Symbol, untyped]) -> void
def to_s: () -> String
end
end
end
Expand Down
52 changes: 49 additions & 3 deletions spec/v2/client_v2_integration.rb
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
raw_text: true,
polygon: false,
confidence: false,
file_alias: 'ruby-integration-test',
file_alias: 'rb_integration_test',
polling_options: polling,
text_context: 'this is a test'
)
Expand Down Expand Up @@ -72,7 +72,7 @@
polygon: false,
confidence: false,
rag: false,
file_alias: 'ruby-integration-test'
file_alias: 'rb_integration_test'
)

response = client.enqueue_and_get_inference(input, inference_params)
Expand Down Expand Up @@ -191,7 +191,7 @@
polygon: false,
confidence: false,
rag: false,
file_alias: 'ruby-integration-test'
file_alias: 'rb_integration_test'
)
client.enqueue_and_get_inference(input, inference_params)
end.to raise_error(Mindee::Errors::MindeeHTTPErrorV2) { |e|
Expand All @@ -216,4 +216,50 @@
expect(response.inference).not_to be_nil
end
end

context 'A Data Schema Override' do
it 'Overrides successfully' do
data_schema_replace = File.read(File.join(V2_DATA_DIR, 'inference', 'data_schema_replace_param.json'))
input = Mindee::Input::Source::PathInputSource.new(File.join(FILE_TYPES_DIR, 'pdf', 'blank_1.pdf'))

inference_params = Mindee::Input::InferenceParameters.new(
model_id,
raw_text: false,
polygon: false,
confidence: false,
rag: false,
file_alias: 'rb_integration_data_schema_replace',
data_schema: data_schema_replace
)

response = client.enqueue_and_get_inference(input, inference_params)
expect(response).not_to be_nil

model = response.inference.model
expect(model).not_to be_nil
expect(model).to be_a(Mindee::Parsing::V2::InferenceModel)
expect(model.id).to eq(model_id)

active_options = response.inference.active_options
expect(active_options).not_to be_nil
expect(active_options).to be_a(Mindee::Parsing::V2::InferenceActiveOptions)
expect(active_options.raw_text).to eq(false)
expect(active_options.polygon).to eq(false)
expect(active_options.confidence).to eq(false)
expect(active_options.rag).to eq(false)
expect(active_options.text_context).to eq(false)
expect(active_options.data_schema).to_not be_nil
expect(active_options.data_schema.replace).to eq(true)

result = response.inference.result
expect(result).not_to be_nil

expect(result.raw_text).to be_nil

fields = result.fields
expect(fields).not_to be_nil
expect(fields['test_replace']).not_to be_nil
expect(fields['test_replace'].value).to eq('a test value')
end
end
end
Loading