From d698844cd899bebf9dd982a29a54972b9555bdac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ianar=C3=A9=20S=C3=A9vi?= Date: Tue, 16 Sep 2025 16:49:50 +0200 Subject: [PATCH] :recycle: rework field accessors --- docs/code_samples/default_v2.txt | 4 +- .../parsing/v2/field/inference_fields.rb | 55 ++++--- lib/mindee/parsing/v2/field/list_field.rb | 63 ++++---- lib/mindee/parsing/v2/field/object_field.rb | 22 +-- lib/mindee/parsing/v2/field/simple_field.rb | 7 +- .../parsing/v2/field/inference_fields.rbs | 5 +- sig/mindee/parsing/v2/field/list_field.rbs | 4 +- sig/mindee/parsing/v2/field/object_field.rbs | 3 +- sig/mindee/parsing/v2/field/simple_field.rbs | 2 +- spec/parsing/v2/inference_spec.rb | 146 ++++++++++++------ 10 files changed, 184 insertions(+), 127 deletions(-) diff --git a/docs/code_samples/default_v2.txt b/docs/code_samples/default_v2.txt index 2bff9c3e..cb9873ba 100644 --- a/docs/code_samples/default_v2.txt +++ b/docs/code_samples/default_v2.txt @@ -1,3 +1,5 @@ +# frozen_string_literal: true + require 'mindee' input_path = '/path/to/the/file.ext' @@ -22,7 +24,7 @@ inference_params = Mindee::Input::InferenceParameters.new( polygon: nil, # Boost the precision and accuracy of all extractions. # Calculate confidence scores for all fields. - confidence: nil, + confidence: nil ) # Load a file from disk diff --git a/lib/mindee/parsing/v2/field/inference_fields.rb b/lib/mindee/parsing/v2/field/inference_fields.rb index 7f717356..8d1094c9 100644 --- a/lib/mindee/parsing/v2/field/inference_fields.rb +++ b/lib/mindee/parsing/v2/field/inference_fields.rb @@ -6,7 +6,8 @@ module Mindee module Parsing module V2 module Field - # Collection of inference fields that extends Hash functionality. + # Represents a hash-like collection of inference fields, providing methods for + # retrieval and string representation. class InferenceFields < Hash # @return [Integer] Level of indentation for rst display. attr_reader :indent_level @@ -29,23 +30,37 @@ def get(key) self[key] end - # Allow dot notation access to fields. - # @param method_name [Symbol] The method name (field key). - # @return [BaseField, nil] The field or nil if not found. - def method_missing(method_name, *args, &block) - key = method_name.to_s - if key?(key) - self[key] - else - super - end + # Get a field by key and ensure it is a SimpleField. + # @param key [String] Field key to retrieve. + # @return [SimpleField] The SimpleField. + # @raise [TypeError] If the field is not a SimpleField. + def get_simple_field(key) + field = self[key] + raise TypeError, "Field #{key} is not a SimpleField" unless field.is_a?(SimpleField) + + field end - # Check if method_missing should handle the method. - # @param method_name [Symbol] The method name. - # @return [Boolean] `true` if the method should be handled. - def respond_to_missing?(method_name, include_private = false) - key?(method_name.to_s) || super + # Get a field by key and ensure it is a ListField. + # @param key [String] Field key to retrieve. + # @return [ListField] The ListField. + # @raise [TypeError] If the field is not a ListField. + def get_list_field(key) + field = self[key] + raise TypeError, "Field #{key} is not a ListField" unless field.is_a?(ListField) + + field + end + + # Get a field by key and ensure it is an ObjectField. + # @param key [String] Field key to retrieve. + # @return [ObjectField] The ObjectField. + # @raise [TypeError] If the field is not an ObjectField. + def get_object_field(key) + field = self[key] + raise TypeError, "Field #{key} is not a ObjectField" unless field.is_a?(ObjectField) + + field end # rubocop:disable Metrics/CyclomaticComplexity @@ -64,13 +79,7 @@ def to_s(indent = 0) line = "#{padding}:#{field_key}:" case (field_value.class.name || '').split('::').last - when 'ListField' - # Check if ListField has items and they're not empty - list_f = field_value # @type var list_f: ListField - if defined?(list_f.items) && list_f.items && !list_f.items.empty? - line += list_f.to_s - end - when 'ObjectField' + when 'ListField', 'ObjectField' line += field_value.to_s when 'SimpleField' # Check if SimpleField has a non-empty value diff --git a/lib/mindee/parsing/v2/field/list_field.rb b/lib/mindee/parsing/v2/field/list_field.rb index d69a024e..33bb5470 100644 --- a/lib/mindee/parsing/v2/field/list_field.rb +++ b/lib/mindee/parsing/v2/field/list_field.rb @@ -8,7 +8,6 @@ module V2 module Field # Represents a field that contains a list of items. class ListField < BaseField - include Enumerable # @return [Array] Items contained in the list. attr_reader :items @@ -29,10 +28,36 @@ def initialize(server_response, indent_level = 0) end end + # Return only simple fields. + # @return [Array] Simple fields contained in the list. + # @raise [TypeError] If the fields are not SimpleField. + def simple_items + fields = [] + @items.each do |item| + raise TypeError, "Invalid field type detected: #{item.class}" unless item.is_a?(SimpleField) + + fields << item + end + fields + end + + # Return only object fields. + # @return [Array] Object fields contained in the list. + # @raise [TypeError] If the fields are not ObjectField. + def object_items + fields = [] + @items.each do |item| + raise TypeError, "Invalid field type detected: #{item.class}" unless item.is_a?(ObjectField) + + fields << item + end + fields + end + # String representation of the list field. # @return [String] Formatted string with bullet points for each item. def to_s - return "\n" if @items.empty? + return '' unless @items && !@items.empty? parts = [''] @items.each do |item| @@ -47,40 +72,6 @@ def to_s parts.join("\n * ") end - - # Check if the list is empty. - # @return [Boolean] `true` if the list has no items. - def empty? - @items.empty? - end - - # Get the number of items in the list. - # @return [Integer] Number of items. - def size - @items.size - end - - # Get the number of items in the list (alias for size). - # @return [Integer] Number of items. - def length - @items.length - end - - # Get an item by index. - # @param index [Integer] The index of the item to retrieve. - # @return [BaseField, nil] The item at the given index. - def [](index) - @items[index] - end - - # Iterator for Enumerator inheritance. - # NOTE: Untyped due to incomplete support in current supported version of RBS. - def each(&block) - return to_enum(:each) unless block_given? - - @items.each(&block) - self - end end end end diff --git a/lib/mindee/parsing/v2/field/object_field.rb b/lib/mindee/parsing/v2/field/object_field.rb index 72658307..a21cff16 100644 --- a/lib/mindee/parsing/v2/field/object_field.rb +++ b/lib/mindee/parsing/v2/field/object_field.rb @@ -78,22 +78,12 @@ def multi_str out_str end - # Allow dot notation access to nested fields. - # @param method_name [Symbol] The method name (field key). - # @return [ObjectField, nil] The field or nil if not found. - def method_missing(method_name, ...) - if @fields.respond_to?(method_name) - @fields.send(method_name, ...) - else - super - end - end - - # Check if method_missing should handle the method. - # @param method_name [Symbol] The method name. - # @return [Boolean] `true` if the method should be handled. - def respond_to_missing?(method_name, include_private = false) - @fields.respond_to?(method_name) || super + # Get a field by key and ensure it is a SimpleField. + # @param key [String] Field key to retrieve. + # @return [SimpleField] The SimpleField. + # @raise [TypeError] If the field is not a SimpleField. + def get_simple_field(key) + @fields.get_simple_field(key) end end end diff --git a/lib/mindee/parsing/v2/field/simple_field.rb b/lib/mindee/parsing/v2/field/simple_field.rb index e2c8cbf4..f5e1903e 100644 --- a/lib/mindee/parsing/v2/field/simple_field.rb +++ b/lib/mindee/parsing/v2/field/simple_field.rb @@ -15,7 +15,12 @@ class SimpleField < BaseField # @param indent_level [Integer] Level of indentation for rst display. def initialize(server_response, indent_level = 0) super - @value = server_response.key?('value') ? server_response['value'] : nil + value = server_response['value'] + @value = if value.is_a?(Integer) + value.to_f + else + value + end end # String representation of the field value. diff --git a/sig/mindee/parsing/v2/field/inference_fields.rbs b/sig/mindee/parsing/v2/field/inference_fields.rbs index 1774a5b1..203f06aa 100644 --- a/sig/mindee/parsing/v2/field/inference_fields.rbs +++ b/sig/mindee/parsing/v2/field/inference_fields.rbs @@ -6,11 +6,12 @@ module Mindee class InferenceFields < Hash[String, ListField | ObjectField | SimpleField?] attr_reader indent_level: Integer + def get_list_field: (String) -> ListField + def get_simple_field: (String) -> SimpleField + def get_object_field: (String) -> ObjectField def logger: () -> Logger def initialize: (Hash[String | Symbol, untyped], ?Integer) -> void def get: (String) -> (ListField | ObjectField | SimpleField?) - def method_missing: (Symbol, *untyped) -> (ListField | ObjectField | SimpleField?) - def respond_to_missing?: (Symbol, ?bool) -> bool def to_s: (?Integer) -> String end end diff --git a/sig/mindee/parsing/v2/field/list_field.rbs b/sig/mindee/parsing/v2/field/list_field.rbs index 2dac57b7..1f0295da 100644 --- a/sig/mindee/parsing/v2/field/list_field.rbs +++ b/sig/mindee/parsing/v2/field/list_field.rbs @@ -6,8 +6,10 @@ module Mindee class ListField < BaseField include Enumerable[BaseField] - attr_reader items: Array[BaseField] + attr_reader items: Array[SimpleField | ObjectField | ListField] def initialize: (Hash[String | Symbol, untyped], ?Integer) -> void + def object_items: -> Array[ObjectField] + def simple_items: -> Array[SimpleField] def to_s: -> String def empty?: -> bool def size: -> Integer diff --git a/sig/mindee/parsing/v2/field/object_field.rbs b/sig/mindee/parsing/v2/field/object_field.rbs index fe66d4ed..7b66b986 100644 --- a/sig/mindee/parsing/v2/field/object_field.rbs +++ b/sig/mindee/parsing/v2/field/object_field.rbs @@ -8,12 +8,11 @@ module Mindee class ObjectField < BaseField attr_reader fields: InferenceFields def initialize: (Hash[String | Symbol, untyped], ?Integer) -> void + def get_simple_field: (String) -> SimpleField def multi_str: -> String - def respond_to_missing?: (Symbol, bool) -> bool def single_str: -> String def to_s: -> String def to_s_from_list: -> String - def method_missing: (Symbol, *untyped, untyped) -> (ObjectField?) end end end diff --git a/sig/mindee/parsing/v2/field/simple_field.rbs b/sig/mindee/parsing/v2/field/simple_field.rbs index c5ac46a7..facf0b12 100644 --- a/sig/mindee/parsing/v2/field/simple_field.rbs +++ b/sig/mindee/parsing/v2/field/simple_field.rbs @@ -4,7 +4,7 @@ module Mindee module V2 module Field class SimpleField < BaseField - attr_reader value: String | Integer | Float | bool? + attr_reader value: String | Integer | Float | bool | nil def initialize: (Hash[String | Symbol, untyped], ?Integer) -> void def to_s: -> String diff --git a/spec/parsing/v2/inference_spec.rb b/spec/parsing/v2/inference_spec.rb index 826ee301..bc03d259 100644 --- a/spec/parsing/v2/inference_spec.rb +++ b/spec/parsing/v2/inference_spec.rb @@ -85,56 +85,50 @@ def load_v2_inference(resource_path) expect(fields).not_to be_empty expect(fields.size).to eq(21) - date_field = fields['date'] + date_field = fields.get_simple_field('date') expect(date_field).to be_a(simple_field) expect(date_field.value).to eq('2019-11-02') expect(fields).to have_key('taxes') - taxes = fields['taxes'] + taxes = fields.get_list_field('taxes') expect(taxes).to be_a(list_field) - taxes_list = taxes - expect(taxes_list.items.length).to eq(1) - expect(taxes_list.to_s).to be_a(String) - expect(taxes_list.to_s).to_not be_empty + expect(taxes.items.length).to eq(1) + expect(taxes.to_s).to be_a(String) + expect(taxes.to_s).to_not be_empty - first_tax_item = taxes_list.items.first + first_tax_item = taxes.items.first expect(first_tax_item).to be_a(object_field) - expect(fields).to have_key('line_items') - expect(fields['line_items']).not_to be_nil - expect(fields['line_items']).to be_a(list_field) - expect(fields['line_items'][0]).to be_a(object_field) - expect(fields['line_items'][0]['quantity'].value).to eq(1.0) - - expect(fields).to have_key('line_items') - expect(fields['line_items']).not_to be_nil - expect(fields['line_items']).to be_a(list_field) - expect(fields['line_items'][0]).to be_a(object_field) - expect(fields['line_items'][0]['quantity'].value).to eq(1.0) - tax_item_obj = first_tax_item expect(tax_item_obj.fields.size).to eq(3) - base_field = tax_item_obj.fields['base'] + expect(fields).to have_key('line_items') + line_items = fields.get_list_field('line_items') + expect(line_items).not_to be_nil + expect(line_items).to be_a(list_field) + first_line_item = line_items.object_items[0] + expect(first_line_item).to be_a(object_field) + expect(first_line_item.get_simple_field('quantity').value).to eq(1.0) + + base_field = tax_item_obj.fields.get_simple_field('base') expect(base_field).to be_a(simple_field) expect(base_field.value).to eq(31.5) expect(fields).to have_key('supplier_address') - supplier_address = fields['supplier_address'] + supplier_address = fields.get_object_field('supplier_address') expect(supplier_address).to be_a(object_field) + expect(supplier_address.to_s).to be_a(String) + expect(supplier_address.to_s).to_not be_empty - supplier_obj = supplier_address - country_field = supplier_obj.fields['country'] + country_field = supplier_address.fields.get_simple_field('country') expect(country_field).to be_a(simple_field) expect(country_field.value).to eq('USA') expect(country_field.to_s).to eq('USA') - expect(supplier_address.to_s).to be_a(String) - expect(supplier_address.to_s).to_not be_empty - customer_addr = fields['customer_address'] + customer_addr = fields.get_object_field('customer_address') expect(customer_addr).to be_a(object_field) - city_field = customer_addr.fields['city'] + city_field = customer_addr.fields.get_simple_field('city') expect(city_field).to be_a(simple_field) expect(city_field.value).to eq('New York') @@ -150,18 +144,18 @@ def load_v2_inference(resource_path) expect(fields['field_simple']).to be_a(simple_field) expect(fields['field_object']).to be_a(object_field) - field_object = fields['field_object'] + field_object = fields.get_object_field('field_object') lvl1 = field_object.fields expect(lvl1['sub_object_list']).to be_a(list_field) expect(lvl1['sub_object_object']).to be_a(object_field) - sub_object_object = lvl1['sub_object_object'] + sub_object_object = lvl1.get_object_field('sub_object_object') lvl2 = sub_object_object.fields expect(lvl2['sub_object_object_sub_object_list']).to be_a(list_field) - nested_list = lvl2['sub_object_object_sub_object_list'] + nested_list = lvl2.get_list_field('sub_object_object_sub_object_list') expect(nested_list.items).not_to be_empty expect(nested_list.items.first).to be_a(object_field) @@ -174,7 +168,7 @@ def load_v2_inference(resource_path) end describe 'standard field types' do - it 'recognizes all field variants' do + def load_standard_fields response = load_v2_inference(standard_field_path) active_options = response.inference.active_options @@ -184,27 +178,91 @@ def load_v2_inference(resource_path) fields = response.inference.result.fields expect(fields).to be_a(Mindee::Parsing::V2::Field::InferenceFields) + fields + end + + it 'recognizes simple fields' do + fields = load_standard_fields + + # low-level access expect(fields['field_simple_string']).to be_a(simple_field) - expect(fields['field_simple_string'].value).to eq('field_simple_string-value') + expect(fields.get('field_simple_string')).to be_a(simple_field) + + field_simple_string = fields.get_simple_field('field_simple_string') + expect(field_simple_string).to be_a(simple_field) + expect(field_simple_string.value).to eq('field_simple_string-value') + expect(field_simple_string.confidence).to eq(field_confidence::CERTAIN) + expect(field_simple_string.to_s).to eq('field_simple_string-value') + + field_simple_int = fields.get_simple_field('field_simple_int') + expect(field_simple_int).to be_a(simple_field) + expect(field_simple_int.value).to be_a(Float) + + field_simple_float = fields.get_simple_field('field_simple_float') + expect(field_simple_float).to be_a(simple_field) + expect(field_simple_float.value).to be_a(Float) + + field_simple_bool = fields.get_simple_field('field_simple_bool') + expect(field_simple_bool).to be_a(simple_field) + expect(field_simple_bool.value).to eq(true) + expect(field_simple_bool.to_s).to eq('True') + + field_simple_null = fields.get_simple_field('field_simple_null') + expect(field_simple_null).to be_a(simple_field) + expect(field_simple_null.value).to be_nil + expect(field_simple_null.to_s).to eq('') + end - expect(fields['field_simple_float']).to be_a(simple_field) - expect(fields['field_simple_float'].value).to eq(1.1) + it 'recognizes simple list fields' do + fields = load_standard_fields - expect(fields['field_simple_int']).to be_a(simple_field) - expect(fields['field_simple_int'].value).to eq(12.0) + # low-level access + expect(fields['field_simple_list']).to be_a(list_field) + expect(fields.get('field_simple_list')).to be_a(list_field) - expect(fields['field_simple_zero']).to be_a(simple_field) - expect(fields['field_simple_zero'].value).to eq(0) + field_simple_list = fields.get_list_field('field_simple_list') + expect(field_simple_list).to be_a(list_field) - expect(fields['field_simple_bool']).to be_a(simple_field) - expect(fields['field_simple_bool'].value).to eq(true) + expect(field_simple_list.items[0]).to be_a(simple_field) + expect(field_simple_list.simple_items[0]).to be_a(simple_field) + field_simple_list.simple_items.each do |entry| + expect(entry).to be_a(simple_field) + expect(entry.value).not_to be_nil + end + end - expect(fields['field_simple_null']).to be_a(simple_field) - expect(fields['field_simple_null'].value).to be_nil + it 'recognizes object fields' do + fields = load_standard_fields + # low-level access expect(fields['field_object']).to be_a(object_field) - expect(fields['field_simple_list']).to be_a(list_field) + expect(fields.get('field_object')).to be_a(object_field) + + field_object = fields.get_object_field('field_object') + expect(field_object).to be_a(object_field) + expect(field_object.get_simple_field('subfield_1')).to be_a(simple_field) + field_object.fields.each_value do |entry| + expect(entry).to be_a(simple_field) + expect(entry.value).not_to be_nil + end + end + + it 'recognizes object list fields' do + fields = load_standard_fields + + # low-level access expect(fields['field_object_list']).to be_a(list_field) + expect(fields.get('field_object_list')).to be_a(list_field) + + field_object_list = fields.get_list_field('field_object_list') + expect(field_object_list).to be_a(list_field) + + expect(field_object_list.items[0]).to be_a(object_field) + expect(field_object_list.object_items[0]).to be_a(object_field) + field_object_list.object_items.each do |entry| + expect(entry).to be_a(object_field) + expect(entry.fields).not_to be_nil + end end end @@ -243,7 +301,7 @@ def load_v2_inference(resource_path) expect(response.inference).not_to be_nil - date_field = response.inference.result.fields['date'] + date_field = response.inference.result.fields.get_simple_field('date') expect(date_field).to be_a(simple_field) expect(date_field.locations).to be_an(Array) expect(date_field.locations[0]).not_to be_nil