From 9fea2ab4ae02fda4d01eb36d8931b47512b3c7f7 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Thu, 2 Apr 2026 11:40:11 +0200 Subject: [PATCH 1/6] :arrow_up: :boom: drop support for ruby < 3.2 and bump dependency --- .rubocop.yml | 2 +- lib/mindee/geometry/polygon.rb | 4 +- lib/mindee/image/image_extractor.rb | 2 +- lib/mindee/input/sources/path_input_source.rb | 2 +- lib/mindee/page_options.rb | 2 +- lib/mindee/pdf/extracted_pdf.rb | 12 +- lib/mindee/pdf/pdf_extractor.rb | 10 +- lib/mindee/pdf/pdf_processor.rb | 1 - .../v1/extraction/multi_receipts_extractor.rb | 2 +- .../v1/parsing/common/extras/cropper_extra.rb | 2 +- lib/mindee/v1/parsing/common/inference.rb | 2 +- .../standard/company_registration_field.rb | 10 +- lib/mindee/v1/parsing/standard/tax_field.rb | 2 +- .../parsing/universal/universal_list_field.rb | 2 +- .../barcode_reader_v1_document.rb | 1 + .../financial_document_v1_document.rb | 1 + .../financial_document_v1_line_item.rb | 5 +- .../bank_account_details_v1_document.rb | 1 + .../bank_account_details_v2_bban.rb | 3 +- .../bank_account_details_v2_document.rb | 1 + .../bank_statement_v2_document.rb | 1 + .../bank_statement_v2_transaction.rb | 5 +- .../product/fr/id_card/id_card_v1_document.rb | 1 + .../product/fr/id_card/id_card_v2_document.rb | 1 + .../international_id_v2_document.rb | 1 + .../v1/product/invoice/invoice_v4_document.rb | 1 + .../product/invoice/invoice_v4_line_item.rb | 5 +- .../invoice_splitter_v1_document.rb | 1 + .../invoice_splitter_v1_invoice_page_group.rb | 5 +- .../multi_receipts_detector_v1_document.rb | 1 + .../product/passport/passport_v1_document.rb | 1 + .../v1/product/receipt/receipt_v5_document.rb | 1 + .../product/receipt/receipt_v5_line_item.rb | 5 +- .../product/resume/resume_v1_certificate.rb | 5 +- .../v1/product/resume/resume_v1_document.rb | 1 + .../v1/product/resume/resume_v1_education.rb | 5 +- .../v1/product/resume/resume_v1_language.rb | 5 +- .../resume_v1_professional_experience.rb | 5 +- .../resume/resume_v1_social_networks_url.rb | 5 +- .../product/universal/universal_document.rb | 1 + .../product/universal/universal_prediction.rb | 1 + lib/mindee/v2/file_operation/crop.rb | 2 +- lib/mindee/v2/http/mindee_api.rb | 2 +- .../v2/parsing/field/inference_fields.rb | 2 +- lib/mindee/v2/parsing/field/list_field.rb | 4 +- lib/mindee/v2/parsing/field/object_field.rb | 15 +- lib/mindee/v2/parsing/job.rb | 2 +- lib/mindee/v2/parsing/raw_text.rb | 2 +- lib/mindee/v2/product/ocr/ocr_page.rb | 2 +- mindee.gemspec | 18 +- sig/mindee/image/image_extractor.rbs | 2 +- sig/mindee/pdf/extracted_pdf.rbs | 2 +- .../extraction/multi_receipts_extractor.rbs | 2 +- sig/mindee/v1/parsing/common/inference.rbs | 1 - sig/mindee/v1/parsing/common/page.rbs | 2 +- .../standard/company_registration_field.rbs | 2 +- sig/mindee/v1/parsing/standard/tax_field.rbs | 2 +- .../v1/product/cropper/cropper_v1_page.rbs | 3 +- .../financial_document_v1_line_item.rbs | 2 +- .../bank_account_details_v2_bban.rbs | 2 +- .../bank_statement_v2_transaction.rbs | 2 +- .../product/invoice/invoice_v4_line_item.rbs | 2 +- ...invoice_splitter_v1_invoice_page_group.rbs | 2 +- .../multi_receipts_detector_v1.rbs | 9 +- .../multi_receipts_detector_v1_document.rbs | 2 +- .../product/receipt/receipt_v5_line_item.rbs | 2 +- .../product/resume/resume_v1_certificate.rbs | 2 +- .../v1/product/resume/resume_v1_education.rbs | 2 +- .../v1/product/resume/resume_v1_language.rbs | 2 +- .../resume_v1_professional_experience.rbs | 2 +- .../resume/resume_v1_social_networks_url.rbs | 2 +- spec/image/image_extractor_spec.rb | 1 + spec/openssl_crl_workaround.rb | 2 +- spec/pdf/extracted_pdf_spec.rb | 71 ++++---- spec/pdf/pdf_compressor_spec.rb | 19 +- spec/pdf/pdf_processor_spec.rb | 168 +++++++++--------- spec/v1/client_spec.rb | 14 +- spec/v1/http/error_handler_integration.rb | 26 +-- spec/v1/ocr_spec.rb | 1 + 79 files changed, 292 insertions(+), 232 deletions(-) diff --git a/.rubocop.yml b/.rubocop.yml index d7fcf91c8..cee4fe23d 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -10,7 +10,7 @@ AllCops: - local_test/* - Steepfile - TargetRubyVersion: 3.0.0 + TargetRubyVersion: 3.2 SuggestExtensions: false Gemspec/DevelopmentDependencies: diff --git a/lib/mindee/geometry/polygon.rb b/lib/mindee/geometry/polygon.rb index e2e2b1ad5..9d99a729a 100644 --- a/lib/mindee/geometry/polygon.rb +++ b/lib/mindee/geometry/polygon.rb @@ -7,7 +7,7 @@ module Geometry class Polygon < Array # @param server_response [Hash] Raw server response hash. def initialize(server_response) - points = [] + points = [] # @type var points: Array[Mindee::Geometry::Point] server_response.map do |point| points << Point.new(point[0], point[1]) end @@ -30,7 +30,7 @@ def point_in_y?(point) # @return [String] Polygon as a string. def to_s - "(#{map(&:to_s).join(', ')})" + "(#{join(', ')})" end end end diff --git a/lib/mindee/image/image_extractor.rb b/lib/mindee/image/image_extractor.rb index 0599b425d..98f685372 100644 --- a/lib/mindee/image/image_extractor.rb +++ b/lib/mindee/image/image_extractor.rb @@ -47,7 +47,7 @@ def self.extract_multiple_images_from_source(input_source, page_id, polygons) # @param [Array] polygons # @return [Array] Extracted Images. def self.extract_images_from_polygons(input_source, pdf_stream, page_id, polygons) - extracted_elements = [] + extracted_elements = [] # @type var extracted_elements: Array[Image::ExtractedImage] polygons.each_with_index do |polygon, element_id| polygon = ImageUtils.normalize_polygon(polygon) diff --git a/lib/mindee/input/sources/path_input_source.rb b/lib/mindee/input/sources/path_input_source.rb index 2902de7e9..da9b08c4e 100644 --- a/lib/mindee/input/sources/path_input_source.rb +++ b/lib/mindee/input/sources/path_input_source.rb @@ -11,7 +11,7 @@ class PathInputSource < LocalInputSource # @param filepath [String] # @param repair_pdf [bool] def initialize(filepath, repair_pdf: false) - io_stream = File.open(filepath, 'rb') + io_stream = File.new(filepath, 'rb') super(io_stream, File.basename(filepath), repair_pdf: repair_pdf) end end diff --git a/lib/mindee/page_options.rb b/lib/mindee/page_options.rb index ad8f93aec..58bc4cb6b 100644 --- a/lib/mindee/page_options.rb +++ b/lib/mindee/page_options.rb @@ -11,7 +11,7 @@ class PageOptions attr_accessor :page_indexes, :operation, :on_min_pages def initialize(params: {}) - params ||= {} + params ||= {} # : Hash[Symbol, untyped] params = params.transform_keys(&:to_sym) @page_indexes = params.fetch( :page_indexes, diff --git a/lib/mindee/pdf/extracted_pdf.rb b/lib/mindee/pdf/extracted_pdf.rb index 4c7a1ab0d..7f2efb6f0 100644 --- a/lib/mindee/pdf/extracted_pdf.rb +++ b/lib/mindee/pdf/extracted_pdf.rb @@ -13,11 +13,17 @@ class ExtractedPDF # @return [String] attr_reader :filename - # @param pdf_bytes [StringIO] + # @param pdf_stream [StringIO, File] # @param filename [String] - def initialize(pdf_bytes, filename) - @pdf_bytes = pdf_bytes + def initialize(pdf_stream, filename) @filename = filename + + if pdf_stream.is_a?(File) + pdf_stream.rewind + @pdf_bytes = StringIO.new(pdf_stream.read) + else + @pdf_bytes = pdf_stream + end end # Retrieves the page count for a given pdf. diff --git a/lib/mindee/pdf/pdf_extractor.rb b/lib/mindee/pdf/pdf_extractor.rb index c2b0df615..3e7f760ed 100644 --- a/lib/mindee/pdf/pdf_extractor.rb +++ b/lib/mindee/pdf/pdf_extractor.rb @@ -40,7 +40,7 @@ def cut_pages(page_indexes) # @param page_indexes [Array>] List of page number to use for merging in the original Pdf. # @return [Array] The buffer containing the new Pdf. def extract_sub_documents(page_indexes) - extracted_pdfs = [] + extracted_pdfs = [] # @type var extracted_pdfs: Array[Mindee::PDF::ExtractedPDF] extension = File.extname(@filename) basename = File.basename(@filename, extension) page_indexes.each do |page_index_list| @@ -54,7 +54,7 @@ def extract_sub_documents(page_indexes) "Index #{page_index} is out of range." end end - formatted_max_index = format('%03d', page_index_list[page_index_list.length - 1] + 1).to_s + formatted_max_index = format('%03d', page_index_list[-1] + 1).to_s field_filename = "#{basename}_#{format('%03d', page_index_list[0] + 1)}-#{formatted_max_index}#{extension}" extracted_pdf = Mindee::PDF::ExtractedPDF.new(cut_pages(page_index_list), @@ -74,15 +74,15 @@ def extract_sub_documents(page_indexes) def extract_invoices(page_indexes, strict: false) raise Error::MindeePDFError, 'No indexes provided.' if page_indexes.empty? - if page_indexes[0].is_a?(Array) && page_indexes[0].all? { |i| i.is_a?(Integer) } + if page_indexes[0].is_a?(Array) && page_indexes[0].all?(Integer) page_indexes_as_array = page_indexes # @type var page_indexes : Array[Array[Integer]] return extract_sub_documents(page_indexes_as_array) end p_ids = page_indexes # @type var page_indexes: Product::InvoiceSplitter::InvoiceSplitterV1InvoicePageGroups return extract_sub_documents(p_ids.map(&:page_indexes)) unless strict - correct_page_indexes = [] - current_list = [] + correct_page_indexes = [] # @type var correct_page_indexes: Array[Array[Integer]] + current_list = [] # @type var current_list: Array[Integer] previous_confidence = nil p_ids.each_with_index do |p_i, i| page_index = p_i # @type var page_index: Product::InvoiceSplitter::InvoiceSplitterV1InvoicePageGroup diff --git a/lib/mindee/pdf/pdf_processor.rb b/lib/mindee/pdf/pdf_processor.rb index 4796c0e3c..77841ab8c 100644 --- a/lib/mindee/pdf/pdf_processor.rb +++ b/lib/mindee/pdf/pdf_processor.rb @@ -1,6 +1,5 @@ # frozen_string_literal: true -require 'set' require 'origami' require_relative 'pdf_tools' diff --git a/lib/mindee/v1/extraction/multi_receipts_extractor.rb b/lib/mindee/v1/extraction/multi_receipts_extractor.rb index 0b87f2724..fa3dbb6b7 100644 --- a/lib/mindee/v1/extraction/multi_receipts_extractor.rb +++ b/lib/mindee/v1/extraction/multi_receipts_extractor.rb @@ -11,7 +11,7 @@ module Extraction # @param inference [Inference] Results of the inference. # @return [Array] Individual extracted receipts as an array of ExtractedMultiReceiptsImage. def self.extract_receipts(input_source, inference) - images = [] + images = [] # @type var images: Array[Image::ExtractedImage] unless inference.prediction.receipts raise Error::MindeeInputError, 'No possible receipts candidates found for Multi-Receipts extraction.' diff --git a/lib/mindee/v1/parsing/common/extras/cropper_extra.rb b/lib/mindee/v1/parsing/common/extras/cropper_extra.rb index 7fc94acf2..a1290dfb2 100644 --- a/lib/mindee/v1/parsing/common/extras/cropper_extra.rb +++ b/lib/mindee/v1/parsing/common/extras/cropper_extra.rb @@ -22,7 +22,7 @@ def initialize(raw_prediction, page_id = nil) # @return [String] def to_s - @croppings.map(&:to_s).join("\n ") + @croppings.join("\n ") end end end diff --git a/lib/mindee/v1/parsing/common/inference.rb b/lib/mindee/v1/parsing/common/inference.rb index 3f31c92e0..53dfcf77e 100644 --- a/lib/mindee/v1/parsing/common/inference.rb +++ b/lib/mindee/v1/parsing/common/inference.rb @@ -57,7 +57,7 @@ def to_s out_str << "\n#{"#{@prediction}\n" if @prediction.to_s.size.positive?}" if @pages.any? { |page| !page.prediction.nil? } out_str << "\nPage Predictions\n================\n\n" - out_str << @pages.map(&:to_s).join("\n\n") + out_str << @pages.join("\n\n") end out_str.rstrip! out_str diff --git a/lib/mindee/v1/parsing/standard/company_registration_field.rb b/lib/mindee/v1/parsing/standard/company_registration_field.rb index fd00a7c91..c8841683a 100644 --- a/lib/mindee/v1/parsing/standard/company_registration_field.rb +++ b/lib/mindee/v1/parsing/standard/company_registration_field.rb @@ -21,21 +21,21 @@ def initialize(prediction, page_id, reconstructed: false) # @return [String] The formatted table line, as a string. def to_table_line printable = printable_values - format('| %-15s | %-20s ', type: printable['type'], value: printable['value']) + format('| %-15s | %-20s ', type: printable[:type], value: printable[:value]) end # @return [String] def to_s printable = printable_values - format('Type: %s, Value: %s', type: printable['type'], value: printable['value']) + format('Type: %s, Value: %s', type: printable[:type], value: printable[:value]) end # Hashed representation of the values. # @return [Hash] Hash of the values. def printable_values - printable = {} - printable['type'] = type - printable['value'] = value + printable = {} # @type var printable: Hash[Symbol, String] + printable[:type] = type + printable[:value] = value.to_s printable end end diff --git a/lib/mindee/v1/parsing/standard/tax_field.rb b/lib/mindee/v1/parsing/standard/tax_field.rb index 0bc653475..657e0a007 100644 --- a/lib/mindee/v1/parsing/standard/tax_field.rb +++ b/lib/mindee/v1/parsing/standard/tax_field.rb @@ -49,7 +49,7 @@ def to_s # @return [Hash] def printable_values - out_h = {} + out_h = {} # @type var out_h: Hash[Symbol, String] out_h[:code] = @code.nil? ? '' : @code out_h[:base] = @base.nil? ? '' : print_float(@base) out_h[:rate] = @rate.nil? ? '' : print_float(@rate).to_s diff --git a/lib/mindee/v1/parsing/universal/universal_list_field.rb b/lib/mindee/v1/parsing/universal/universal_list_field.rb index 6793766c2..da735839f 100644 --- a/lib/mindee/v1/parsing/universal/universal_list_field.rb +++ b/lib/mindee/v1/parsing/universal/universal_list_field.rb @@ -46,7 +46,7 @@ def contents_list # Return a string representation of all values. def contents_string(separator = ' ') - @values.map(&:to_s).join(separator) + @values.join(separator) end # String representation diff --git a/lib/mindee/v1/product/barcode_reader/barcode_reader_v1_document.rb b/lib/mindee/v1/product/barcode_reader/barcode_reader_v1_document.rb index cfa50f759..c6a6cffbf 100644 --- a/lib/mindee/v1/product/barcode_reader/barcode_reader_v1_document.rb +++ b/lib/mindee/v1/product/barcode_reader/barcode_reader_v1_document.rb @@ -9,6 +9,7 @@ module BarcodeReader # Barcode Reader API version 1.0 document data. class BarcodeReaderV1Document < Mindee::V1::Parsing::Common::Prediction include Mindee::V1::Parsing::Standard + # List of decoded 1D barcodes. # @return [Array] attr_reader :codes_1d diff --git a/lib/mindee/v1/product/financial_document/financial_document_v1_document.rb b/lib/mindee/v1/product/financial_document/financial_document_v1_document.rb index 561e42531..2a879f465 100644 --- a/lib/mindee/v1/product/financial_document/financial_document_v1_document.rb +++ b/lib/mindee/v1/product/financial_document/financial_document_v1_document.rb @@ -10,6 +10,7 @@ module FinancialDocument # Financial Document API version 1.14 document data. class FinancialDocumentV1Document < Mindee::V1::Parsing::Common::Prediction include Mindee::V1::Parsing::Standard + # The customer's address used for billing. # @return [Mindee::V1::Parsing::Standard::AddressField] attr_reader :billing_address diff --git a/lib/mindee/v1/product/financial_document/financial_document_v1_line_item.rb b/lib/mindee/v1/product/financial_document/financial_document_v1_line_item.rb index a2a02548f..d9989b4e7 100644 --- a/lib/mindee/v1/product/financial_document/financial_document_v1_line_item.rb +++ b/lib/mindee/v1/product/financial_document/financial_document_v1_line_item.rb @@ -9,6 +9,7 @@ module FinancialDocument # List of line item present on the document. class FinancialDocumentV1LineItem < Mindee::V1::Parsing::Standard::FeatureField include Mindee::V1::Parsing::Standard + # The item description. # @return [String] attr_reader :description @@ -51,7 +52,7 @@ def initialize(prediction, page_id) # @return [Hash] def printable_values - printable = {} + printable = {} # @type var printable: Hash[Symbol, String] printable[:description] = format_for_display(@description) printable[:product_code] = format_for_display(@product_code) printable[:quantity] = @@ -70,7 +71,7 @@ def printable_values # @return [Hash] def table_printable_values - printable = {} + printable = {} # @type var printable: Hash[Symbol, String] printable[:description] = format_for_display(@description, 36) printable[:product_code] = format_for_display(@product_code, nil) printable[:quantity] = diff --git a/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v1_document.rb b/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v1_document.rb index f2f8d6c2b..b58fa1780 100644 --- a/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v1_document.rb +++ b/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v1_document.rb @@ -10,6 +10,7 @@ module BankAccountDetails # Bank Account Details API version 1.0 document data. class BankAccountDetailsV1Document < Mindee::V1::Parsing::Common::Prediction include Mindee::V1::Parsing::Standard + # The name of the account holder as seen on the document. # @return [Mindee::V1::Parsing::Standard::StringField] attr_reader :account_holder_name diff --git a/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_bban.rb b/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_bban.rb index 18c614fbc..2c1a2dcaf 100644 --- a/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_bban.rb +++ b/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_bban.rb @@ -10,6 +10,7 @@ module BankAccountDetails # Full extraction of BBAN, including: branch code, bank code, account and key. class BankAccountDetailsV2Bban < Mindee::V1::Parsing::Standard::FeatureField include Mindee::V1::Parsing::Standard + # The BBAN bank code outputted as a string. # @return [String] attr_reader :bban_bank_code @@ -36,7 +37,7 @@ def initialize(prediction, page_id) # @return [Hash] def printable_values - printable = {} + printable = {} # @type var printable: Hash[Symbol, String] printable[:bban_bank_code] = format_for_display(@bban_bank_code) printable[:bban_branch_code] = format_for_display(@bban_branch_code) printable[:bban_key] = format_for_display(@bban_key) diff --git a/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_document.rb b/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_document.rb index 7310694db..0861defd4 100644 --- a/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_document.rb +++ b/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_document.rb @@ -11,6 +11,7 @@ module BankAccountDetails # Bank Account Details API version 2.0 document data. class BankAccountDetailsV2Document < Mindee::V1::Parsing::Common::Prediction include Mindee::V1::Parsing::Standard + # Full extraction of the account holders names. # @return [Mindee::V1::Parsing::Standard::StringField] attr_reader :account_holders_names diff --git a/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_document.rb b/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_document.rb index bcf8cbdec..fe27d7162 100644 --- a/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_document.rb +++ b/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_document.rb @@ -11,6 +11,7 @@ module BankStatement # Bank Statement API version 2.0 document data. class BankStatementV2Document < Mindee::V1::Parsing::Common::Prediction include Mindee::V1::Parsing::Standard + # The unique identifier for a customer's account in the bank's system. # @return [Mindee::V1::Parsing::Standard::StringField] attr_reader :account_number diff --git a/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_transaction.rb b/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_transaction.rb index 82702628b..b131b888b 100644 --- a/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_transaction.rb +++ b/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_transaction.rb @@ -10,6 +10,7 @@ module BankStatement # The list of values that represent the financial transactions recorded in a bank statement. class BankStatementV2Transaction < Mindee::V1::Parsing::Standard::FeatureField include Mindee::V1::Parsing::Standard + # The monetary amount of the transaction. # @return [Float] attr_reader :amount @@ -32,7 +33,7 @@ def initialize(prediction, page_id) # @return [Hash] def printable_values - printable = {} + printable = {} # @type var printable: Hash[Symbol, String] printable[:amount] = @amount.nil? ? '' : Parsing::Standard::BaseField.float_to_string(@amount) printable[:date] = format_for_display(@date) @@ -42,7 +43,7 @@ def printable_values # @return [Hash] def table_printable_values - printable = {} + printable = {} # @type var printable: Hash[Symbol, String] printable[:amount] = @amount.nil? ? '' : Parsing::Standard::BaseField.float_to_string(@amount) printable[:date] = format_for_display(@date, 10) diff --git a/lib/mindee/v1/product/fr/id_card/id_card_v1_document.rb b/lib/mindee/v1/product/fr/id_card/id_card_v1_document.rb index e8789eefe..b151903d7 100644 --- a/lib/mindee/v1/product/fr/id_card/id_card_v1_document.rb +++ b/lib/mindee/v1/product/fr/id_card/id_card_v1_document.rb @@ -10,6 +10,7 @@ module IdCard # Carte Nationale d'Identité API version 1.1 document data. class IdCardV1Document < Mindee::V1::Parsing::Common::Prediction include Mindee::V1::Parsing::Standard + # The name of the issuing authority. # @return [Mindee::V1::Parsing::Standard::StringField] attr_reader :authority diff --git a/lib/mindee/v1/product/fr/id_card/id_card_v2_document.rb b/lib/mindee/v1/product/fr/id_card/id_card_v2_document.rb index dfeb8d7e9..0d622e2b2 100644 --- a/lib/mindee/v1/product/fr/id_card/id_card_v2_document.rb +++ b/lib/mindee/v1/product/fr/id_card/id_card_v2_document.rb @@ -10,6 +10,7 @@ module IdCard # Carte Nationale d'Identité API version 2.0 document data. class IdCardV2Document < Mindee::V1::Parsing::Common::Prediction include Mindee::V1::Parsing::Standard + # The alternate name of the card holder. # @return [Mindee::V1::Parsing::Standard::StringField] attr_reader :alternate_name diff --git a/lib/mindee/v1/product/international_id/international_id_v2_document.rb b/lib/mindee/v1/product/international_id/international_id_v2_document.rb index 59b38572a..16c664c06 100644 --- a/lib/mindee/v1/product/international_id/international_id_v2_document.rb +++ b/lib/mindee/v1/product/international_id/international_id_v2_document.rb @@ -9,6 +9,7 @@ module InternationalId # International ID API version 2.2 document data. class InternationalIdV2Document < Mindee::V1::Parsing::Common::Prediction include Mindee::V1::Parsing::Standard + # The physical address of the document holder. # @return [Mindee::V1::Parsing::Standard::StringField] attr_reader :address diff --git a/lib/mindee/v1/product/invoice/invoice_v4_document.rb b/lib/mindee/v1/product/invoice/invoice_v4_document.rb index d3d66a3eb..cfd06c148 100644 --- a/lib/mindee/v1/product/invoice/invoice_v4_document.rb +++ b/lib/mindee/v1/product/invoice/invoice_v4_document.rb @@ -10,6 +10,7 @@ module Invoice # Invoice API version 4.11 document data. class InvoiceV4Document < Mindee::V1::Parsing::Common::Prediction include Mindee::V1::Parsing::Standard + # The customer billing address. # @return [Mindee::V1::Parsing::Standard::AddressField] attr_reader :billing_address diff --git a/lib/mindee/v1/product/invoice/invoice_v4_line_item.rb b/lib/mindee/v1/product/invoice/invoice_v4_line_item.rb index 767714597..2e5b83574 100644 --- a/lib/mindee/v1/product/invoice/invoice_v4_line_item.rb +++ b/lib/mindee/v1/product/invoice/invoice_v4_line_item.rb @@ -9,6 +9,7 @@ module Invoice # List of all the line items present on the invoice. class InvoiceV4LineItem < Mindee::V1::Parsing::Standard::FeatureField include Mindee::V1::Parsing::Standard + # The item description. # @return [String] attr_reader :description @@ -51,7 +52,7 @@ def initialize(prediction, page_id) # @return [Hash] def printable_values - printable = {} + printable = {} # @type var printable: Hash[Symbol, String] printable[:description] = format_for_display(@description) printable[:product_code] = format_for_display(@product_code) printable[:quantity] = @@ -70,7 +71,7 @@ def printable_values # @return [Hash] def table_printable_values - printable = {} + printable = {} # @type var printable: Hash[Symbol, String] printable[:description] = format_for_display(@description, 36) printable[:product_code] = format_for_display(@product_code, nil) printable[:quantity] = diff --git a/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_document.rb b/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_document.rb index 5dc097153..7619c0a5b 100644 --- a/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_document.rb +++ b/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_document.rb @@ -10,6 +10,7 @@ module InvoiceSplitter # Invoice Splitter API version 1.4 document data. class InvoiceSplitterV1Document < Mindee::V1::Parsing::Common::Prediction include Mindee::V1::Parsing::Standard + # List of page groups. Each group represents a single invoice within a multi-invoice document. # @return [Mindee::V1::Product::InvoiceSplitter::InvoiceSplitterV1InvoicePageGroups] attr_reader :invoice_page_groups diff --git a/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_group.rb b/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_group.rb index 6cf498c50..8c72438d7 100644 --- a/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_group.rb +++ b/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_group.rb @@ -9,6 +9,7 @@ module InvoiceSplitter # List of page groups. Each group represents a single invoice within a multi-invoice document. class InvoiceSplitterV1InvoicePageGroup < Mindee::V1::Parsing::Standard::FeatureField include Mindee::V1::Parsing::Standard + # List of page indexes that belong to the same invoice (group). # @return [Array] attr_reader :page_indexes @@ -23,14 +24,14 @@ def initialize(prediction, page_id) # @return [Hash] def printable_values - printable = {} + printable = {} # @type var printable: Hash[Symbol, String] printable[:page_indexes] = format_for_display(@page_indexes) printable end # @return [Hash] def table_printable_values - printable = {} + printable = {} # @type var printable: Hash[Symbol, String] printable[:page_indexes] = @page_indexes.join(', ') printable end diff --git a/lib/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1_document.rb b/lib/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1_document.rb index 0d61ad028..a5c23ea7f 100644 --- a/lib/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1_document.rb +++ b/lib/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1_document.rb @@ -9,6 +9,7 @@ module MultiReceiptsDetector # Multi Receipts Detector API version 1.1 document data. class MultiReceiptsDetectorV1Document < Mindee::V1::Parsing::Common::Prediction include Mindee::V1::Parsing::Standard + # Positions of the receipts on the document. # @return [Array] attr_reader :receipts diff --git a/lib/mindee/v1/product/passport/passport_v1_document.rb b/lib/mindee/v1/product/passport/passport_v1_document.rb index 01b5e1b21..513a51900 100644 --- a/lib/mindee/v1/product/passport/passport_v1_document.rb +++ b/lib/mindee/v1/product/passport/passport_v1_document.rb @@ -9,6 +9,7 @@ module Passport # Passport API version 1.1 document data. class PassportV1Document < Mindee::V1::Parsing::Common::Prediction include Mindee::V1::Parsing::Standard + # The date of birth of the passport holder. # @return [Mindee::V1::Parsing::Standard::DateField] attr_reader :birth_date diff --git a/lib/mindee/v1/product/receipt/receipt_v5_document.rb b/lib/mindee/v1/product/receipt/receipt_v5_document.rb index 5770fe815..fff10708b 100644 --- a/lib/mindee/v1/product/receipt/receipt_v5_document.rb +++ b/lib/mindee/v1/product/receipt/receipt_v5_document.rb @@ -10,6 +10,7 @@ module Receipt # Receipt API version 5.4 document data. class ReceiptV5Document < Mindee::V1::Parsing::Common::Prediction include Mindee::V1::Parsing::Standard + # The purchase category of the receipt. # @return [Mindee::V1::Parsing::Standard::ClassificationField] attr_reader :category diff --git a/lib/mindee/v1/product/receipt/receipt_v5_line_item.rb b/lib/mindee/v1/product/receipt/receipt_v5_line_item.rb index 7552ccb25..dd7964af7 100644 --- a/lib/mindee/v1/product/receipt/receipt_v5_line_item.rb +++ b/lib/mindee/v1/product/receipt/receipt_v5_line_item.rb @@ -9,6 +9,7 @@ module Receipt # List of all line items on the receipt. class ReceiptV5LineItem < Mindee::V1::Parsing::Standard::FeatureField include Mindee::V1::Parsing::Standard + # The item description. # @return [String] attr_reader :description @@ -35,7 +36,7 @@ def initialize(prediction, page_id) # @return [Hash] def printable_values - printable = {} + printable = {} # @type var printable: Hash[Symbol, String] printable[:description] = format_for_display(@description) printable[:quantity] = @quantity.nil? ? '' : Parsing::Standard::BaseField.float_to_string(@quantity) @@ -48,7 +49,7 @@ def printable_values # @return [Hash] def table_printable_values - printable = {} + printable = {} # @type var printable: Hash[Symbol, String] printable[:description] = format_for_display(@description, 36) printable[:quantity] = @quantity.nil? ? '' : Parsing::Standard::BaseField.float_to_string(@quantity) diff --git a/lib/mindee/v1/product/resume/resume_v1_certificate.rb b/lib/mindee/v1/product/resume/resume_v1_certificate.rb index d3714cd89..c0af21f6b 100644 --- a/lib/mindee/v1/product/resume/resume_v1_certificate.rb +++ b/lib/mindee/v1/product/resume/resume_v1_certificate.rb @@ -9,6 +9,7 @@ module Resume # The list of certificates obtained by the candidate. class ResumeV1Certificate < Mindee::V1::Parsing::Standard::FeatureField include Mindee::V1::Parsing::Standard + # The grade obtained for the certificate. # @return [String] attr_reader :grade @@ -35,7 +36,7 @@ def initialize(prediction, page_id) # @return [Hash] def printable_values - printable = {} + printable = {} # @type var printable: Hash[Symbol, String] printable[:grade] = format_for_display(@grade) printable[:name] = format_for_display(@name) printable[:provider] = format_for_display(@provider) @@ -45,7 +46,7 @@ def printable_values # @return [Hash] def table_printable_values - printable = {} + printable = {} # @type var printable: Hash[Symbol, String] printable[:grade] = format_for_display(@grade, 10) printable[:name] = format_for_display(@name, 30) printable[:provider] = format_for_display(@provider, 25) diff --git a/lib/mindee/v1/product/resume/resume_v1_document.rb b/lib/mindee/v1/product/resume/resume_v1_document.rb index c9d919a0c..33050a9a4 100644 --- a/lib/mindee/v1/product/resume/resume_v1_document.rb +++ b/lib/mindee/v1/product/resume/resume_v1_document.rb @@ -14,6 +14,7 @@ module Resume # Resume API version 1.2 document data. class ResumeV1Document < Mindee::V1::Parsing::Common::Prediction include Mindee::V1::Parsing::Standard + # The location information of the candidate, including city, state, and country. # @return [Mindee::V1::Parsing::Standard::StringField] attr_reader :address diff --git a/lib/mindee/v1/product/resume/resume_v1_education.rb b/lib/mindee/v1/product/resume/resume_v1_education.rb index 28c8f348b..adbd06400 100644 --- a/lib/mindee/v1/product/resume/resume_v1_education.rb +++ b/lib/mindee/v1/product/resume/resume_v1_education.rb @@ -9,6 +9,7 @@ module Resume # The list of the candidate's educational background. class ResumeV1Education < Mindee::V1::Parsing::Standard::FeatureField include Mindee::V1::Parsing::Standard + # The area of study or specialization. # @return [String] attr_reader :degree_domain @@ -47,7 +48,7 @@ def initialize(prediction, page_id) # @return [Hash] def printable_values - printable = {} + printable = {} # @type var printable: Hash[Symbol, String] printable[:degree_domain] = format_for_display(@degree_domain) printable[:degree_type] = format_for_display(@degree_type) printable[:end_month] = format_for_display(@end_month) @@ -60,7 +61,7 @@ def printable_values # @return [Hash] def table_printable_values - printable = {} + printable = {} # @type var printable: Hash[Symbol, String] printable[:degree_domain] = format_for_display(@degree_domain, 15) printable[:degree_type] = format_for_display(@degree_type, 25) printable[:end_month] = format_for_display(@end_month, nil) diff --git a/lib/mindee/v1/product/resume/resume_v1_language.rb b/lib/mindee/v1/product/resume/resume_v1_language.rb index 85b869be6..0eb9b49ff 100644 --- a/lib/mindee/v1/product/resume/resume_v1_language.rb +++ b/lib/mindee/v1/product/resume/resume_v1_language.rb @@ -9,6 +9,7 @@ module Resume # The list of languages that the candidate is proficient in. class ResumeV1Language < Mindee::V1::Parsing::Standard::FeatureField include Mindee::V1::Parsing::Standard + # The language's ISO 639 code. # @return [String] attr_reader :language @@ -27,7 +28,7 @@ def initialize(prediction, page_id) # @return [Hash] def printable_values - printable = {} + printable = {} # @type var printable: Hash[Symbol, String] printable[:language] = format_for_display(@language) printable[:level] = format_for_display(@level) printable @@ -35,7 +36,7 @@ def printable_values # @return [Hash] def table_printable_values - printable = {} + printable = {} # @type var printable: Hash[Symbol, String] printable[:language] = format_for_display(@language, nil) printable[:level] = format_for_display(@level, 20) printable diff --git a/lib/mindee/v1/product/resume/resume_v1_professional_experience.rb b/lib/mindee/v1/product/resume/resume_v1_professional_experience.rb index 9a1a36519..769c4afcf 100644 --- a/lib/mindee/v1/product/resume/resume_v1_professional_experience.rb +++ b/lib/mindee/v1/product/resume/resume_v1_professional_experience.rb @@ -9,6 +9,7 @@ module Resume # The list of the candidate's professional experiences. class ResumeV1ProfessionalExperience < Mindee::V1::Parsing::Standard::FeatureField include Mindee::V1::Parsing::Standard + # The type of contract for the professional experience. # @return [String] attr_reader :contract_type @@ -55,7 +56,7 @@ def initialize(prediction, page_id) # @return [Hash] def printable_values - printable = {} + printable = {} # @type var printable: Hash[Symbol, String] printable[:contract_type] = format_for_display(@contract_type) printable[:department] = format_for_display(@department) printable[:description] = format_for_display(@description) @@ -70,7 +71,7 @@ def printable_values # @return [Hash] def table_printable_values - printable = {} + printable = {} # @type var printable: Hash[Symbol, String] printable[:contract_type] = format_for_display(@contract_type, 15) printable[:department] = format_for_display(@department, 10) printable[:description] = format_for_display(@description, 36) diff --git a/lib/mindee/v1/product/resume/resume_v1_social_networks_url.rb b/lib/mindee/v1/product/resume/resume_v1_social_networks_url.rb index 77bc93c67..c6f579a53 100644 --- a/lib/mindee/v1/product/resume/resume_v1_social_networks_url.rb +++ b/lib/mindee/v1/product/resume/resume_v1_social_networks_url.rb @@ -9,6 +9,7 @@ module Resume # The list of social network profiles of the candidate. class ResumeV1SocialNetworksUrl < Mindee::V1::Parsing::Standard::FeatureField include Mindee::V1::Parsing::Standard + # The name of the social network. # @return [String] attr_reader :name @@ -27,7 +28,7 @@ def initialize(prediction, page_id) # @return [Hash] def printable_values - printable = {} + printable = {} # @type var printable: Hash[Symbol, String] printable[:name] = format_for_display(@name) printable[:url] = format_for_display(@url) printable @@ -35,7 +36,7 @@ def printable_values # @return [Hash] def table_printable_values - printable = {} + printable = {} # @type var printable: Hash[Symbol, String] printable[:name] = format_for_display(@name, 20) printable[:url] = format_for_display(@url, 50) printable diff --git a/lib/mindee/v1/product/universal/universal_document.rb b/lib/mindee/v1/product/universal/universal_document.rb index 4ac627609..5a470b6df 100644 --- a/lib/mindee/v1/product/universal/universal_document.rb +++ b/lib/mindee/v1/product/universal/universal_document.rb @@ -10,6 +10,7 @@ module Universal # Universal Document V1 prediction class UniversalDocument < Mindee::V1::Product::Universal::UniversalPrediction include Mindee::V1::Parsing::Standard + # @param raw_prediction [Hash] def initialize(raw_prediction) super diff --git a/lib/mindee/v1/product/universal/universal_prediction.rb b/lib/mindee/v1/product/universal/universal_prediction.rb index 057006be2..c06fdec15 100644 --- a/lib/mindee/v1/product/universal/universal_prediction.rb +++ b/lib/mindee/v1/product/universal/universal_prediction.rb @@ -11,6 +11,7 @@ class UniversalPrediction < Mindee::V1::Parsing::Common::Prediction include Mindee::V1::Parsing::Common include Mindee::V1::Parsing::Standard include Mindee::V1::Parsing::Universal + # All value fields in the document # @return [Hash] attr_reader :fields diff --git a/lib/mindee/v2/file_operation/crop.rb b/lib/mindee/v2/file_operation/crop.rb index 63c6bf0ca..7746f9e01 100644 --- a/lib/mindee/v2/file_operation/crop.rb +++ b/lib/mindee/v2/file_operation/crop.rb @@ -35,7 +35,7 @@ def self.extract_crops(input_source, crops) polygons[crop.location.page] << crop.location.polygon end - images = [] + images = [] # @type var images: Array[Image::ExtractedImage] polygons.each_with_index do |page_polygons, page_index| extracted = Mindee::Image::ImageExtractor.extract_multiple_images_from_source( input_source, page_index, page_polygons diff --git a/lib/mindee/v2/http/mindee_api.rb b/lib/mindee/v2/http/mindee_api.rb index a82c76004..9ccbed791 100644 --- a/lib/mindee/v2/http/mindee_api.rb +++ b/lib/mindee/v2/http/mindee_api.rb @@ -76,7 +76,7 @@ def req_get_search_models(model_name, model_type) url = "#{@settings.base_url}/v2/search/models" uri = URI(url) - query_params = {} + query_params = {} # @type var query_params: Hash[Symbol, String | nil] query_params[:name] = model_name if model_name query_params[:model_type] = model_type if model_type uri.query = URI.encode_www_form(query_params) unless query_params.empty? diff --git a/lib/mindee/v2/parsing/field/inference_fields.rb b/lib/mindee/v2/parsing/field/inference_fields.rb index 16aa7fc8e..e7d0431f3 100644 --- a/lib/mindee/v2/parsing/field/inference_fields.rb +++ b/lib/mindee/v2/parsing/field/inference_fields.rb @@ -73,7 +73,7 @@ def to_s(indent = 0) indent ||= @indent_level padding = ' ' * indent - lines = [] + lines = [] # @type var lines: Array[String] each do |field_key, field_value| line = "#{padding}:#{field_key}:" diff --git a/lib/mindee/v2/parsing/field/list_field.rb b/lib/mindee/v2/parsing/field/list_field.rb index 7d17a25e7..9d6a7effb 100644 --- a/lib/mindee/v2/parsing/field/list_field.rb +++ b/lib/mindee/v2/parsing/field/list_field.rb @@ -32,7 +32,7 @@ def initialize(server_response, indent_level = 0) # @return [Array] Simple fields contained in the list. # @raise [TypeError] If the fields are not SimpleField. def simple_items - fields = [] + fields = [] # @type var fields: Array[SimpleField] @items.each do |item| raise TypeError, "Invalid field type detected: #{item.class}" unless item.is_a?(SimpleField) @@ -45,7 +45,7 @@ def simple_items # @return [Array] Object fields contained in the list. # @raise [TypeError] If the fields are not ObjectField. def object_items - fields = [] + fields = [] # @type var fields: Array[ObjectField] @items.each do |item| raise TypeError, "Invalid field type detected: #{item.class}" unless item.is_a?(ObjectField) diff --git a/lib/mindee/v2/parsing/field/object_field.rb b/lib/mindee/v2/parsing/field/object_field.rb index a0ce89a49..c9d7feaa9 100644 --- a/lib/mindee/v2/parsing/field/object_field.rb +++ b/lib/mindee/v2/parsing/field/object_field.rb @@ -111,22 +111,25 @@ def get_object_field(key) # Get all simple fields. # @return [Hash] Simple fields contained in the object. def simple_fields - # @type var fields: Hash - @fields.select { |_, value| value.is_a?(SimpleField) } + # @type var result: Hash[String, SimpleField] + result = @fields.select { |_, value| value.is_a?(SimpleField) } # rubocop:disable Style/RedundantAssignment + result end # Get all list fields. # @return [Hash] List fields contained in the object. def list_fields - # @type var fields: Hash - @fields.select { |_, value| value.is_a?(ListField) } + # @type var result: Hash[String, ListField] + result = @fields.select { |_, value| value.is_a?(ListField) } # rubocop:disable Style/RedundantAssignment + result end # Get all object fields. # @return [Hash] Object fields contained in the object. def object_fields - # @type var fields: Hash - @fields.select { |_, value| value.is_a?(ObjectField) } + # @type var result: Hash[String, ObjectField] + result = @fields.select { |_, value| value.is_a?(ObjectField) } # rubocop:disable Style/RedundantAssignment + result end end end diff --git a/lib/mindee/v2/parsing/job.rb b/lib/mindee/v2/parsing/job.rb index 41b091c3a..e3f726d8e 100644 --- a/lib/mindee/v2/parsing/job.rb +++ b/lib/mindee/v2/parsing/job.rb @@ -81,7 +81,7 @@ def to_s '', 'Webhooks', '=========', - @webhooks.map(&:to_s).join("\n\n"), + @webhooks.join("\n\n"), ] end diff --git a/lib/mindee/v2/parsing/raw_text.rb b/lib/mindee/v2/parsing/raw_text.rb index 033d7f262..4418bab37 100644 --- a/lib/mindee/v2/parsing/raw_text.rb +++ b/lib/mindee/v2/parsing/raw_text.rb @@ -17,7 +17,7 @@ def initialize(server_response) end def to_s - "#{@pages.map(&:to_s).join("\n\n")}\n" + "#{@pages.join("\n\n")}\n" end end end diff --git a/lib/mindee/v2/product/ocr/ocr_page.rb b/lib/mindee/v2/product/ocr/ocr_page.rb index 5de396885..718ed21af 100644 --- a/lib/mindee/v2/product/ocr/ocr_page.rb +++ b/lib/mindee/v2/product/ocr/ocr_page.rb @@ -23,7 +23,7 @@ def initialize(server_response) # @return [String] def to_s ocr_words = "\n" - ocr_words += @words.map(&:to_s).join("\n\n") if @words&.any? + ocr_words += @words.join("\n\n") if @words&.any? "OCR Words\n======#{ocr_words}\n\n:Content: #{@content}" end end diff --git a/mindee.gemspec b/mindee.gemspec index e42488ecf..090f30050 100644 --- a/mindee.gemspec +++ b/mindee.gemspec @@ -27,21 +27,21 @@ Gem::Specification.new do |spec| .reject { |f| f == 'products.rb' } spec.require_paths = ['lib'] - spec.required_ruby_version = Gem::Requirement.new('>= 3.0') + spec.required_ruby_version = Gem::Requirement.new('>= 3.2') - spec.add_dependency 'base64', '~> 0.1' + spec.add_dependency 'base64', '~> 0.3' spec.add_dependency 'bundle-audit', '~> 0.2.0' - spec.add_dependency 'marcel', '~> 1.0' + spec.add_dependency 'marcel', '~> 1.1' spec.add_dependency 'mini_magick', '>= 4', '< 6' spec.add_dependency 'origamindee', '~> 4.0' - spec.add_dependency 'pdf-reader', '~> 2.14' + spec.add_dependency 'pdf-reader', '~> 2.15' - spec.add_development_dependency 'openssl', '~> 3.3.2' + spec.add_development_dependency 'openssl', '~> 4.0' spec.add_development_dependency 'prism', '~> 1.3' - spec.add_development_dependency 'rake', '~> 13.2' - spec.add_development_dependency 'rbs', '~> 3.6' + spec.add_development_dependency 'rake', '~> 13.3' + spec.add_development_dependency 'rbs', '~> 3.10' spec.add_development_dependency 'rspec', '~> 3.13' - spec.add_development_dependency 'rubocop', '~> 1.76.0' - spec.add_development_dependency 'steep', '~> 1.7' + spec.add_development_dependency 'rubocop', '~> 1.86' + spec.add_development_dependency 'steep', '~> 1.10' spec.add_development_dependency 'yard', '~> 0.9' end diff --git a/sig/mindee/image/image_extractor.rbs b/sig/mindee/image/image_extractor.rbs index a92fe154c..5f57c5121 100644 --- a/sig/mindee/image/image_extractor.rbs +++ b/sig/mindee/image/image_extractor.rbs @@ -6,7 +6,7 @@ module Mindee def self.to_blob: () -> String def self.extract_multiple_images_from_source: (Input::Source::LocalInputSource, Integer, Array[Array[Geometry::Point] |Geometry::Polygon | Geometry::Quadrilateral]) -> Array[ExtractedImage] def self.extract_images_from_polygons: (Input::Source::LocalInputSource, StringIO | File, Integer, Array[Array[Geometry::Point] | Geometry::Polygon | Geometry::Quadrilateral]) -> Array[ExtractedImage] - def self.create_extracted_image: (StringIO | File, String, Integer, Integer) -> void + def self.create_extracted_image: (StringIO | File, String, Integer, Integer) -> ExtractedImage def self.load_input_source_pdf_page_as_stringio: (Input::Source::LocalInputSource, Integer) -> (StringIO | File) end end diff --git a/sig/mindee/pdf/extracted_pdf.rbs b/sig/mindee/pdf/extracted_pdf.rbs index 658400f98..937bd3d60 100644 --- a/sig/mindee/pdf/extracted_pdf.rbs +++ b/sig/mindee/pdf/extracted_pdf.rbs @@ -5,7 +5,7 @@ module Mindee attr_reader pdf_bytes: StringIO attr_reader filename: String - def initialize: (StringIO, String) -> void + def initialize: (StringIO | File, String) -> void def page_count: -> Integer diff --git a/sig/mindee/v1/extraction/multi_receipts_extractor.rbs b/sig/mindee/v1/extraction/multi_receipts_extractor.rbs index a04207362..ca2691837 100644 --- a/sig/mindee/v1/extraction/multi_receipts_extractor.rbs +++ b/sig/mindee/v1/extraction/multi_receipts_extractor.rbs @@ -2,7 +2,7 @@ module Mindee module V1 module Extraction - def self.extract_receipts: (untyped, untyped) -> Array[untyped] + def self.extract_receipts: (Input::Source::LocalInputSource, Product::MultiReceiptsDetector::MultiReceiptsDetectorV1) -> Array[Image::ExtractedImage] end end end diff --git a/sig/mindee/v1/parsing/common/inference.rbs b/sig/mindee/v1/parsing/common/inference.rbs index a4f302470..c9255023d 100644 --- a/sig/mindee/v1/parsing/common/inference.rbs +++ b/sig/mindee/v1/parsing/common/inference.rbs @@ -19,7 +19,6 @@ module Mindee attr_reader has_sync: bool attr_reader is_rotation_applied: bool attr_reader pages: Array[Page] - attr_reader prediction: Prediction attr_reader product: Product def initialize: (Hash[String | Symbol, untyped]) -> void diff --git a/sig/mindee/v1/parsing/common/page.rbs b/sig/mindee/v1/parsing/common/page.rbs index d64ce1b28..5e533830c 100644 --- a/sig/mindee/v1/parsing/common/page.rbs +++ b/sig/mindee/v1/parsing/common/page.rbs @@ -7,7 +7,7 @@ module Mindee attr_reader extras: Extras::Extras attr_reader orientation: Common::Orientation attr_reader page_id: Integer - attr_reader prediction: Common::Prediction + attr_reader prediction: untyped # Voluntary to allow proper overloads, leave as-is. def initialize: (Hash[String | Symbol, untyped]) -> void diff --git a/sig/mindee/v1/parsing/standard/company_registration_field.rbs b/sig/mindee/v1/parsing/standard/company_registration_field.rbs index ad506a7db..325af88f5 100644 --- a/sig/mindee/v1/parsing/standard/company_registration_field.rbs +++ b/sig/mindee/v1/parsing/standard/company_registration_field.rbs @@ -12,7 +12,7 @@ module Mindee def to_s: -> String - def printable_values: -> Hash[String, untyped] + def printable_values: -> Hash[Symbol, string] end end end diff --git a/sig/mindee/v1/parsing/standard/tax_field.rbs b/sig/mindee/v1/parsing/standard/tax_field.rbs index 0212172dc..f3219fcbb 100644 --- a/sig/mindee/v1/parsing/standard/tax_field.rbs +++ b/sig/mindee/v1/parsing/standard/tax_field.rbs @@ -15,7 +15,7 @@ module Mindee def to_s: -> String - def printable_values: -> Hash[untyped, String] + def printable_values: -> Hash[Symbol, String] def to_table_line: -> String end diff --git a/sig/mindee/v1/product/cropper/cropper_v1_page.rbs b/sig/mindee/v1/product/cropper/cropper_v1_page.rbs index f6477eee1..e90ab9e94 100644 --- a/sig/mindee/v1/product/cropper/cropper_v1_page.rbs +++ b/sig/mindee/v1/product/cropper/cropper_v1_page.rbs @@ -8,8 +8,9 @@ module Mindee def initialize: (Hash[String | Symbol, untyped]) -> void end class CropperV1PagePrediction < CropperV1Document + attr_reader cropping: Array[Parsing::Standard::PositionField] + def initialize: (Hash[String | Symbol, untyped], Integer?) -> void - def cropping: -> Array[Parsing::Standard::PositionField] def to_s: -> String end end diff --git a/sig/mindee/v1/product/financial_document/financial_document_v1_line_item.rbs b/sig/mindee/v1/product/financial_document/financial_document_v1_line_item.rbs index ef8a1665f..9ad61b17a 100644 --- a/sig/mindee/v1/product/financial_document/financial_document_v1_line_item.rbs +++ b/sig/mindee/v1/product/financial_document/financial_document_v1_line_item.rbs @@ -21,7 +21,7 @@ module Mindee def unit_price: -> Float - def printable_values: -> Hash[String | Symbol, untyped] + def printable_values: -> Hash[Symbol, String] def table_printable_values: -> Hash[String | Symbol, untyped] diff --git a/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_bban.rbs b/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_bban.rbs index 144eec111..64d17ae09 100644 --- a/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_bban.rbs +++ b/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_bban.rbs @@ -14,7 +14,7 @@ module Mindee def bban_number: -> String - def printable_values: -> Hash[String | Symbol, untyped] + def printable_values: -> Hash[Symbol, String] def to_s: -> String end diff --git a/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2_transaction.rbs b/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2_transaction.rbs index 9f1260e23..3097a8b25 100644 --- a/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2_transaction.rbs +++ b/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2_transaction.rbs @@ -12,7 +12,7 @@ module Mindee def description: -> String - def printable_values: -> Hash[String | Symbol, untyped] + def printable_values: -> Hash[Symbol, String] def table_printable_values: -> Hash[String | Symbol, untyped] diff --git a/sig/mindee/v1/product/invoice/invoice_v4_line_item.rbs b/sig/mindee/v1/product/invoice/invoice_v4_line_item.rbs index 1c100dbf3..4962dc0c9 100644 --- a/sig/mindee/v1/product/invoice/invoice_v4_line_item.rbs +++ b/sig/mindee/v1/product/invoice/invoice_v4_line_item.rbs @@ -21,7 +21,7 @@ module Mindee def unit_price: -> Float - def printable_values: -> Hash[String | Symbol, untyped] + def printable_values: -> Hash[Symbol, String] def table_printable_values: -> Hash[String | Symbol, untyped] diff --git a/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_group.rbs b/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_group.rbs index 2780b171d..b1929140a 100644 --- a/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_group.rbs +++ b/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_group.rbs @@ -7,7 +7,7 @@ module Mindee def page_indexes: -> Array[Integer] - def printable_values: -> Hash[String | Symbol, untyped] + def printable_values: -> Hash[Symbol, String] def table_printable_values: -> Hash[String | Symbol, untyped] diff --git a/sig/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1.rbs b/sig/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1.rbs index 4249ecc69..f6688fc08 100644 --- a/sig/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1.rbs +++ b/sig/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1.rbs @@ -3,10 +3,11 @@ module Mindee module V1 module Product - module MultiReceiptsDetector - class MultiReceiptsDetectorV1 < Parsing::Common::Inference - def initialize: (Hash[String | Symbol, untyped]) -> void - end + module MultiReceiptsDetector + class MultiReceiptsDetectorV1 < Parsing::Common::Inference + attr_reader prediction: MultiReceiptsDetectorV1Document + def initialize: (Hash[String | Symbol, untyped]) -> void + end end end end diff --git a/sig/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1_document.rbs b/sig/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1_document.rbs index caaba3fc0..5a7689873 100644 --- a/sig/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1_document.rbs +++ b/sig/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1_document.rbs @@ -5,8 +5,8 @@ module Mindee module Product module MultiReceiptsDetector class MultiReceiptsDetectorV1Document < Parsing::Common::Prediction + attr_reader receipts:(Array[Parsing::Standard::PositionField]) def initialize: (Hash[String | Symbol, untyped], Integer?) -> void - def receipts: -> (Array[Parsing::Standard::PositionField]) def to_s: -> String end end diff --git a/sig/mindee/v1/product/receipt/receipt_v5_line_item.rbs b/sig/mindee/v1/product/receipt/receipt_v5_line_item.rbs index a46c17e7f..5cb4816e2 100644 --- a/sig/mindee/v1/product/receipt/receipt_v5_line_item.rbs +++ b/sig/mindee/v1/product/receipt/receipt_v5_line_item.rbs @@ -13,7 +13,7 @@ module Mindee def unit_price: -> Float - def printable_values: -> Hash[String | Symbol, untyped] + def printable_values: -> Hash[Symbol, String] def table_printable_values: -> Hash[String | Symbol, untyped] diff --git a/sig/mindee/v1/product/resume/resume_v1_certificate.rbs b/sig/mindee/v1/product/resume/resume_v1_certificate.rbs index af0e5bc21..98b30446c 100644 --- a/sig/mindee/v1/product/resume/resume_v1_certificate.rbs +++ b/sig/mindee/v1/product/resume/resume_v1_certificate.rbs @@ -13,7 +13,7 @@ module Mindee def year: -> String - def printable_values: -> Hash[String | Symbol, untyped] + def printable_values: -> Hash[Symbol, String] def table_printable_values: -> Hash[String | Symbol, untyped] diff --git a/sig/mindee/v1/product/resume/resume_v1_education.rbs b/sig/mindee/v1/product/resume/resume_v1_education.rbs index c2319c511..c533ebd04 100644 --- a/sig/mindee/v1/product/resume/resume_v1_education.rbs +++ b/sig/mindee/v1/product/resume/resume_v1_education.rbs @@ -19,7 +19,7 @@ module Mindee def start_year: -> String - def printable_values: -> Hash[String | Symbol, untyped] + def printable_values: -> Hash[Symbol, String] def table_printable_values: -> Hash[String | Symbol, untyped] diff --git a/sig/mindee/v1/product/resume/resume_v1_language.rbs b/sig/mindee/v1/product/resume/resume_v1_language.rbs index 19c9c1dc3..224d6359c 100644 --- a/sig/mindee/v1/product/resume/resume_v1_language.rbs +++ b/sig/mindee/v1/product/resume/resume_v1_language.rbs @@ -9,7 +9,7 @@ module Mindee def level: -> String - def printable_values: -> Hash[String | Symbol, untyped] + def printable_values: -> Hash[Symbol, String] def table_printable_values: -> Hash[String | Symbol, untyped] diff --git a/sig/mindee/v1/product/resume/resume_v1_professional_experience.rbs b/sig/mindee/v1/product/resume/resume_v1_professional_experience.rbs index 803dbfbfa..61910b5cf 100644 --- a/sig/mindee/v1/product/resume/resume_v1_professional_experience.rbs +++ b/sig/mindee/v1/product/resume/resume_v1_professional_experience.rbs @@ -23,7 +23,7 @@ module Mindee def start_year: -> String - def printable_values: -> Hash[String | Symbol, untyped] + def printable_values: -> Hash[Symbol, String] def table_printable_values: -> Hash[String | Symbol, untyped] diff --git a/sig/mindee/v1/product/resume/resume_v1_social_networks_url.rbs b/sig/mindee/v1/product/resume/resume_v1_social_networks_url.rbs index 9d2305362..c0889b5e0 100644 --- a/sig/mindee/v1/product/resume/resume_v1_social_networks_url.rbs +++ b/sig/mindee/v1/product/resume/resume_v1_social_networks_url.rbs @@ -9,7 +9,7 @@ module Mindee def url: -> String - def printable_values: -> Hash[String | Symbol, untyped] + def printable_values: -> Hash[Symbol, String] def table_printable_values: -> Hash[String | Symbol, untyped] diff --git a/spec/image/image_extractor_spec.rb b/spec/image/image_extractor_spec.rb index 911ea0070..98b2f5e7b 100644 --- a/spec/image/image_extractor_spec.rb +++ b/spec/image/image_extractor_spec.rb @@ -6,6 +6,7 @@ describe Mindee::Image do include Mindee::Image + let(:barcode_path) do File.join(V1_DATA_DIR, 'products', 'barcode_reader', 'default_sample.jpg') end diff --git a/spec/openssl_crl_workaround.rb b/spec/openssl_crl_workaround.rb index 1f10c085a..f4dff34d9 100644 --- a/spec/openssl_crl_workaround.rb +++ b/spec/openssl_crl_workaround.rb @@ -3,7 +3,7 @@ require 'openssl' # Workaround for errors in SSL certificates validations on macOS. -params = OpenSSL::SSL::SSLContext::DEFAULT_PARAMS +params = OpenSSL::SSL::SSLContext::DEFAULT_PARAMS.dup params[:verify_mode] = OpenSSL::SSL::VERIFY_PEER diff --git a/spec/pdf/extracted_pdf_spec.rb b/spec/pdf/extracted_pdf_spec.rb index 5fb37bd4f..9f8e4d999 100644 --- a/spec/pdf/extracted_pdf_spec.rb +++ b/spec/pdf/extracted_pdf_spec.rb @@ -17,62 +17,71 @@ describe '#initialize' do it 'initializes with valid pdf bytes and filename' do - pdf_stream = File.open(valid_pdf_path, 'r') - extracted_pdf = described_class.new(pdf_stream, 'invoice.pdf') - - expect(extracted_pdf.pdf_bytes).to eq(pdf_stream) - expect(extracted_pdf.filename).to eq('invoice.pdf') + File.open(valid_pdf_path, 'r') do |pdf_stream| + extracted_pdf = described_class.new(pdf_stream, 'invoice.pdf') + expect(extracted_pdf.pdf_bytes).to be_a(StringIO) + pdf_stream.rewind + extracted_pdf.pdf_bytes.rewind + expect(extracted_pdf.pdf_bytes.read).to eq(pdf_stream.read) + + expect(extracted_pdf.filename).to eq('invoice.pdf') + end end end describe '#page_count' do it 'raises an error for invalid PDF content' do - jpg_stream = File.open(invalid_pdf_path, 'r') - pdf_wrapper = described_class.new(jpg_stream, 'dummy.pdf') + File.open(invalid_pdf_path, 'r') do |jpg_stream| + pdf_wrapper = described_class.new(jpg_stream, 'dummy.pdf') - expect do - pdf_wrapper.page_count - end.to raise_error Mindee::Error::MindeePDFError, %r{Could not retrieve page count} + expect do + pdf_wrapper.page_count + end.to raise_error Mindee::Error::MindeePDFError, %r{Could not retrieve page count} + end end it 'returns the correct page count for a valid PDF' do - pdf_stream = File.open(valid_pdf_path, 'r') - allow(Mindee::PDF::PDFProcessor).to receive(:open_pdf).and_return(double(pages: [1, 2, 3])) - pdf_wrapper = described_class.new(pdf_stream, 'invoice.pdf') + File.open(valid_pdf_path, 'r') do |pdf_stream| + allow(Mindee::PDF::PDFProcessor).to receive(:open_pdf).and_return(double(pages: [1, 2, 3])) + pdf_wrapper = described_class.new(pdf_stream, 'invoice.pdf') - expect(pdf_wrapper.page_count).to eq(3) + expect(pdf_wrapper.page_count).to eq(3) + end end end describe '#write_to_file' do it 'writes the PDF bytes to a specified file path' do - pdf_stream = File.open(valid_pdf_path, 'r') - expected_pdf_content = pdf_stream.read - pdf_stream.rewind - pdf_wrapper = described_class.new(pdf_stream, 'invoice.pdf') - - expect { pdf_wrapper.write_to_file(output_path) }.not_to raise_error - expect(File).to have_received(:binwrite).with(output_path, expected_pdf_content) + File.open(valid_pdf_path, 'r') do |pdf_stream| + expected_pdf_content = pdf_stream.read + pdf_stream.rewind + pdf_wrapper = described_class.new(pdf_stream, 'invoice.pdf') + + expect { pdf_wrapper.write_to_file(output_path) }.not_to raise_error + expect(File).to have_received(:binwrite).with(output_path, expected_pdf_content) + end end it 'raises an error if the output path is a directory' do allow(File).to receive(:directory?).and_return(true) - pdf_stream = File.open(valid_pdf_path, 'r') - pdf_wrapper = described_class.new(pdf_stream, 'invoice.pdf') + File.open(valid_pdf_path, 'r') do |pdf_stream| + pdf_wrapper = described_class.new(pdf_stream, 'invoice.pdf') - expect do - pdf_wrapper.write_to_file(output_path) - end.to raise_error Mindee::Error::MindeePDFError, %r{Provided path is not a file} + expect do + pdf_wrapper.write_to_file(output_path) + end.to raise_error Mindee::Error::MindeePDFError, %r{Provided path is not a file} + end end it 'raises an error if the save path is invalid' do allow(File).to receive(:exist?).and_return(false) - pdf_stream = File.open(valid_pdf_path, 'r') - pdf_wrapper = described_class.new(pdf_stream, 'invoice.pdf') + File.open(valid_pdf_path, 'r') do |pdf_stream| + pdf_wrapper = described_class.new(pdf_stream, 'invoice.pdf') - expect do - pdf_wrapper.write_to_file(output_path) - end.to raise_error Mindee::Error::MindeePDFError, %r{Invalid save path provided} + expect do + pdf_wrapper.write_to_file(output_path) + end.to raise_error Mindee::Error::MindeePDFError, %r{Invalid save path provided} + end end end diff --git a/spec/pdf/pdf_compressor_spec.rb b/spec/pdf/pdf_compressor_spec.rb index e9b50adb8..12119ab4c 100644 --- a/spec/pdf/pdf_compressor_spec.rb +++ b/spec/pdf/pdf_compressor_spec.rb @@ -45,16 +45,17 @@ 50 => "#{ROOT_DATA_DIR}/output/compressed_direct_50.pdf", 10 => "#{ROOT_DATA_DIR}/output/compressed_direct_10.pdf", } - pdf = File.open(input_file_path) - output_file_paths.each_pair do |key, value| - compressed_pdf = Mindee::PDF::PDFCompressor.compress_pdf(pdf, quality: key) - compressed_pdf.rewind - File.write(value, compressed_pdf.read) + File.open(input_file_path) do |pdf| + output_file_paths.each_pair do |key, value| + compressed_pdf = Mindee::PDF::PDFCompressor.compress_pdf(pdf, quality: key) + compressed_pdf.rewind + File.write(value, compressed_pdf.read) + end + expect(File.size(input_file_path)).to be > File.size(output_file_paths[85]) + expect(File.size(output_file_paths[75])).to be < File.size(output_file_paths[85]) + expect(File.size(output_file_paths[50])).to be < File.size(output_file_paths[75]) + expect(File.size(output_file_paths[10])).to be < File.size(output_file_paths[50]) end - expect(File.size(input_file_path)).to be > File.size(output_file_paths[85]) - expect(File.size(output_file_paths[75])).to be < File.size(output_file_paths[85]) - expect(File.size(output_file_paths[50])).to be < File.size(output_file_paths[75]) - expect(File.size(output_file_paths[10])).to be < File.size(output_file_paths[50]) end after(:each) do diff --git a/spec/pdf/pdf_processor_spec.rb b/spec/pdf/pdf_processor_spec.rb index e4e2f8f23..bf2b4c1ac 100644 --- a/spec/pdf/pdf_processor_spec.rb +++ b/spec/pdf/pdf_processor_spec.rb @@ -15,29 +15,31 @@ def open_pdf(io_stream) filepath = File.join(FILE_TYPES_DIR, 'pdf/blank.pdf').freeze it 'Should grab the first page' do - io_stream = File.open(filepath, 'rb') - io_stream.seek(0) - options = Mindee::PageOptions.new(params: { - page_indexes: [0], - operation: :KEEP_ONLY, - on_min_pages: 0, - }) - new_stream = Mindee::PDF::PDFProcessor.parse(io_stream, options) - new_pdf = open_pdf(new_stream) - expect(new_pdf.pages.size).to eq(1) + File.open(filepath, 'rb') do |io_stream| + io_stream.seek(0) + options = Mindee::PageOptions.new(params: { + page_indexes: [0], + operation: :KEEP_ONLY, + on_min_pages: 0, + }) + new_stream = Mindee::PDF::PDFProcessor.parse(io_stream, options) + new_pdf = open_pdf(new_stream) + expect(new_pdf.pages.size).to eq(1) + end end it 'Should grab the last page' do - io_stream = File.open(filepath, 'rb') - io_stream.seek(0) - options = Mindee::PageOptions.new(params: { - page_indexes: [-1], - operation: :KEEP_ONLY, - on_min_pages: 0, - }) - new_stream = Mindee::PDF::PDFProcessor.parse(io_stream, options) - new_pdf = open_pdf(new_stream) - expect(new_pdf.pages.size).to eq(1) + File.open(filepath, 'rb') do |io_stream| + io_stream.seek(0) + options = Mindee::PageOptions.new(params: { + page_indexes: [-1], + operation: :KEEP_ONLY, + on_min_pages: 0, + }) + new_stream = Mindee::PDF::PDFProcessor.parse(io_stream, options) + new_pdf = open_pdf(new_stream) + expect(new_pdf.pages.size).to eq(1) + end end end @@ -45,81 +47,87 @@ def open_pdf(io_stream) filepath = File.join(FILE_TYPES_DIR, 'pdf/multipage.pdf').freeze it 'Should grab the first page' do - io_stream = File.open(filepath, 'rb') - io_stream.seek(0) - options = Mindee::PageOptions.new(params: { - page_indexes: [0], - operation: :KEEP_ONLY, - on_min_pages: 0, - }) - new_stream = Mindee::PDF::PDFProcessor.parse(io_stream, options) - new_pdf = open_pdf(new_stream) - expect(new_pdf.pages.size).to eq(1) + File.open(filepath, 'rb') do |io_stream| + io_stream.seek(0) + options = Mindee::PageOptions.new(params: { + page_indexes: [0], + operation: :KEEP_ONLY, + on_min_pages: 0, + }) + new_stream = Mindee::PDF::PDFProcessor.parse(io_stream, options) + new_pdf = open_pdf(new_stream) + expect(new_pdf.pages.size).to eq(1) + end end it 'Should grab the last page' do - io_stream = File.open(filepath, 'rb') - io_stream.seek(0) - options = Mindee::PageOptions.new(params: { - page_indexes: [-1], - operation: :KEEP_ONLY, - on_min_pages: 0, - }) - new_stream = Mindee::PDF::PDFProcessor.parse(io_stream, options) - new_pdf = open_pdf(new_stream) - expect(new_pdf.pages.size).to eq(1) + File.open(filepath, 'rb') do |io_stream| + io_stream.seek(0) + options = Mindee::PageOptions.new(params: { + page_indexes: [-1], + operation: :KEEP_ONLY, + on_min_pages: 0, + }) + new_stream = Mindee::PDF::PDFProcessor.parse(io_stream, options) + new_pdf = open_pdf(new_stream) + expect(new_pdf.pages.size).to eq(1) + end end it 'Should grab the first 2, and the last page' do - io_stream = File.open(filepath, 'rb') - io_stream.seek(0) - options = Mindee::PageOptions.new(params: { - page_indexes: [0, 1, -1], - operation: :KEEP_ONLY, - on_min_pages: 0, - }) - new_stream = Mindee::PDF::PDFProcessor.parse(io_stream, options) - new_pdf = open_pdf(new_stream) - expect(new_pdf.pages.size).to eq(3) + File.open(filepath, 'rb') do |io_stream| + io_stream.seek(0) + options = Mindee::PageOptions.new(params: { + page_indexes: [0, 1, -1], + operation: :KEEP_ONLY, + on_min_pages: 0, + }) + new_stream = Mindee::PDF::PDFProcessor.parse(io_stream, options) + new_pdf = open_pdf(new_stream) + expect(new_pdf.pages.size).to eq(3) + end end it 'Should grab the first 5 pages' do - io_stream = File.open(filepath, 'rb') - io_stream.seek(0) - options = Mindee::PageOptions.new(params: { - page_indexes: [0, 1, 2, 3, 4], - operation: :KEEP_ONLY, - on_min_pages: 0, - }) - new_stream = Mindee::PDF::PDFProcessor.parse(io_stream, options) - new_pdf = open_pdf(new_stream) - expect(new_pdf.pages.size).to eq(5) + File.open(filepath, 'rb') do |io_stream| + io_stream.seek(0) + options = Mindee::PageOptions.new(params: { + page_indexes: [0, 1, 2, 3, 4], + operation: :KEEP_ONLY, + on_min_pages: 0, + }) + new_stream = Mindee::PDF::PDFProcessor.parse(io_stream, options) + new_pdf = open_pdf(new_stream) + expect(new_pdf.pages.size).to eq(5) + end end it 'Should remove the first 3 pages' do - io_stream = File.open(filepath, 'rb') - io_stream.seek(0) - options = Mindee::PageOptions.new(params: { - page_indexes: [0, 1, 2], - operation: :REMOVE, - on_min_pages: 0, - }) - new_stream = Mindee::PDF::PDFProcessor.parse(io_stream, options) - new_pdf = open_pdf(new_stream) - expect(new_pdf.pages.size).to eq(9) + File.open(filepath, 'rb') do |io_stream| + io_stream.seek(0) + options = Mindee::PageOptions.new(params: { + page_indexes: [0, 1, 2], + operation: :REMOVE, + on_min_pages: 0, + }) + new_stream = Mindee::PDF::PDFProcessor.parse(io_stream, options) + new_pdf = open_pdf(new_stream) + expect(new_pdf.pages.size).to eq(9) + end end it 'Should fail on invalid operation' do - io_stream = File.open(filepath, 'rb') - io_stream.seek(0) - options = Mindee::PageOptions.new(params: { - page_indexes: [1], - operation: :broken, - on_min_pages: 0, - }) - expect do - Mindee::PDF::PDFProcessor.parse(io_stream, options) - end.to raise_error ArgumentError + File.open(filepath, 'rb') do |io_stream| + io_stream.seek(0) + options = Mindee::PageOptions.new(params: { + page_indexes: [1], + operation: :broken, + on_min_pages: 0, + }) + expect do + Mindee::PDF::PDFProcessor.parse(io_stream, options) + end.to raise_error ArgumentError + end end end end diff --git a/spec/v1/client_spec.rb b/spec/v1/client_spec.rb index ef0136638..8d11519c0 100644 --- a/spec/v1/client_spec.rb +++ b/spec/v1/client_spec.rb @@ -16,9 +16,10 @@ end it 'should open PDF files from a file handle' do - file = File.open("#{V1_DATA_DIR}/products/invoices/invoice_10p.pdf", 'rb') - input_source = mindee_client.source_from_file(file, 'invoice_10p.pdf') - expect(input_source).to respond_to(:read_contents) + File.open("#{V1_DATA_DIR}/products/invoices/invoice_10p.pdf", 'rb') do |file| + input_source = mindee_client.source_from_file(file, 'invoice_10p.pdf') + expect(input_source).to respond_to(:read_contents) + end end it 'should open PDF files from raw bytes' do @@ -41,9 +42,10 @@ end it 'should open JPG files from a file handle' do - file = File.open("#{FILE_TYPES_DIR}/receipt.jpg", 'rb') - input_source = mindee_client.source_from_file(file, 'receipt.jpg') - expect(input_source).to respond_to(:read_contents) + File.open("#{FILE_TYPES_DIR}/receipt.jpg", 'rb') do |file| + input_source = mindee_client.source_from_file(file, 'receipt.jpg') + expect(input_source).to respond_to(:read_contents) + end end it 'should open JPG files from raw bytes' do diff --git a/spec/v1/http/error_handler_integration.rb b/spec/v1/http/error_handler_integration.rb index a494ceb16..edec057ec 100644 --- a/spec/v1/http/error_handler_integration.rb +++ b/spec/v1/http/error_handler_integration.rb @@ -8,22 +8,24 @@ context 'An HTTP call' do it 'should make an invalid API sync parse call raising an exception' do mindee_client1 = Mindee::V1::Client.new(api_key: 'invalid-api-key') - file = File.open("#{FILE_TYPES_DIR}/receipt.jpg", 'rb') - input_source = mindee_client1.source_from_file(file, 'receipt.jpg') - doc_class = Mindee::V1::Product::Receipt::ReceiptV5 - expect do - mindee_client1.parse(input_source, doc_class, options: { all_words: false, close_file: true }) - end.to raise_error Mindee::Error::MindeeHTTPClientError + File.open("#{FILE_TYPES_DIR}/receipt.jpg", 'rb') do |file| + input_source = mindee_client1.source_from_file(file, 'receipt.jpg') + doc_class = Mindee::V1::Product::Receipt::ReceiptV5 + expect do + mindee_client1.parse(input_source, doc_class, options: { all_words: false, close_file: true }) + end.to raise_error Mindee::Error::MindeeHTTPClientError + end end it 'should make an invalid API async enqueue call raising an exception' do mindee_client1 = Mindee::V1::Client.new(api_key: 'invalid-api-key') - file = File.open("#{V1_DATA_DIR}/products/invoice_splitter/default_sample.pdf", 'rb') - input_source = mindee_client1.source_from_file(file, 'default_sample.pdf') - doc_class = Mindee::V1::Product::Invoice::InvoiceV4 - expect do - mindee_client1.enqueue(input_source, doc_class) - end.to raise_error Mindee::Error::MindeeHTTPClientError + File.open("#{V1_DATA_DIR}/products/invoice_splitter/default_sample.pdf", 'rb') do |file| + input_source = mindee_client1.source_from_file(file, 'default_sample.pdf') + doc_class = Mindee::V1::Product::Invoice::InvoiceV4 + expect do + mindee_client1.enqueue(input_source, doc_class) + end.to raise_error Mindee::Error::MindeeHTTPClientError + end end it 'should make an invalid API async parse call raising an exception' do diff --git a/spec/v1/ocr_spec.rb b/spec/v1/ocr_spec.rb index 5075c8e83..be8c3cc20 100644 --- a/spec/v1/ocr_spec.rb +++ b/spec/v1/ocr_spec.rb @@ -7,6 +7,7 @@ describe Mindee::V1::Parsing::Common do include Mindee::V1::Parsing::Common + context 'An OCR extraction' do json_data = load_json(V1_OCR_DIR, 'complete.json') it 'should extract ocr data from a document' do From 94a74d2edbc583b3098e5a3c12232ab1ecb42d6e Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Thu, 2 Apr 2026 12:06:54 +0200 Subject: [PATCH 2/6] :recycle: change Ocr to OCR to keep consistency --- .github/workflows/_publish-code.yml | 2 +- .github/workflows/_publish-docs.yml | 2 +- .github/workflows/_static-analysis.yml | 2 +- .github/workflows/_test-integrations.yml | 2 +- .github/workflows/_test-smoke.yml | 3 +-- .github/workflows/_test-units.yml | 2 -- bin/v2/products.rb | 2 +- docs/code_samples/v2_ocr.txt | 2 +- lib/mindee.rb | 8 ++++++++ lib/mindee/input/base_parameters.rb | 3 +++ lib/mindee/v1/parsing/common/ocr/ocr.rb | 2 +- lib/mindee/v2/parsing/raw_text.rb | 2 ++ lib/mindee/v2/parsing/raw_text_page.rb | 2 ++ lib/mindee/v2/parsing/search/search_response.rb | 2 ++ .../v2/product/classification/classification.rb | 1 + .../params/classification_parameters.rb | 1 + lib/mindee/v2/product/crop/crop.rb | 1 + .../v2/product/crop/params/crop_parameters.rb | 1 + lib/mindee/v2/product/extraction/extraction.rb | 1 + .../v2/product/extraction/params/data_schema.rb | 1 + lib/mindee/v2/product/ocr/ocr.rb | 11 ++++++----- lib/mindee/v2/product/ocr/ocr_inference.rb | 8 ++++---- lib/mindee/v2/product/ocr/ocr_page.rb | 8 ++++---- lib/mindee/v2/product/ocr/ocr_response.rb | 8 ++++---- lib/mindee/v2/product/ocr/ocr_result.rb | 8 ++++---- lib/mindee/v2/product/ocr/ocr_word.rb | 4 ++-- lib/mindee/v2/product/ocr/params/ocr_parameters.rb | 9 +++++---- .../v2/product/split/params/split_parameters.rb | 2 ++ sig/mindee/v2/product/ocr/ocr.rbs | 4 ++-- sig/mindee/v2/product/ocr/ocr_inference.rbs | 6 +++--- sig/mindee/v2/product/ocr/ocr_page.rbs | 6 +++--- sig/mindee/v2/product/ocr/ocr_response.rbs | 12 ++++++------ sig/mindee/v2/product/ocr/ocr_result.rbs | 6 +++--- sig/mindee/v2/product/ocr/ocr_word.rbs | 4 ++-- .../ocr/params/ocr_parameters/ocr_parameters.rbs | 6 +++--- spec/v2/product/ocr/ocr_integration.rb | 8 ++++---- spec/v2/product/ocr/ocr_spec.rb | 14 +++++++------- 37 files changed, 95 insertions(+), 71 deletions(-) diff --git a/.github/workflows/_publish-code.yml b/.github/workflows/_publish-code.yml index 17cda7119..00ff4db8e 100644 --- a/.github/workflows/_publish-code.yml +++ b/.github/workflows/_publish-code.yml @@ -16,7 +16,7 @@ jobs: - name: Set up Ruby uses: ruby/setup-ruby@v1 with: - ruby-version: "3.0" + ruby-version: "3.2" bundler-cache: true - name: Set credentials diff --git a/.github/workflows/_publish-docs.yml b/.github/workflows/_publish-docs.yml index e10700623..989494d9d 100644 --- a/.github/workflows/_publish-docs.yml +++ b/.github/workflows/_publish-docs.yml @@ -17,7 +17,7 @@ jobs: - name: set up Ruby uses: ruby/setup-ruby@v1 with: - ruby-version: "3.1" + ruby-version: "3.2" bundler-cache: true - name: Analyse the code with Rubocop diff --git a/.github/workflows/_static-analysis.yml b/.github/workflows/_static-analysis.yml index 6b2a1ccf6..d35199286 100644 --- a/.github/workflows/_static-analysis.yml +++ b/.github/workflows/_static-analysis.yml @@ -19,7 +19,7 @@ jobs: - name: set up Ruby uses: ruby/setup-ruby@v1 with: - ruby-version: "3.0.0" + ruby-version: "3.2.0" bundler-cache: true - name: Set up Python diff --git a/.github/workflows/_test-integrations.yml b/.github/workflows/_test-integrations.yml index b0c861654..0a75c3612 100644 --- a/.github/workflows/_test-integrations.yml +++ b/.github/workflows/_test-integrations.yml @@ -32,7 +32,7 @@ jobs: - "ubuntu-24.04" - "macos-latest" ruby: - - "3.0" + - "3.2" - "4.0" steps: - uses: actions/checkout@v5 diff --git a/.github/workflows/_test-smoke.yml b/.github/workflows/_test-smoke.yml index a22fa8c68..7a3579d46 100644 --- a/.github/workflows/_test-smoke.yml +++ b/.github/workflows/_test-smoke.yml @@ -24,11 +24,10 @@ jobs: strategy: matrix: ruby: - - "3.0" - - "3.1" - "3.2" - "3.3" - "3.4" + - "4.0" runs-on: ubuntu-latest steps: - uses: actions/checkout@v5 diff --git a/.github/workflows/_test-units.yml b/.github/workflows/_test-units.yml index e677d36ab..58811cf4b 100644 --- a/.github/workflows/_test-units.yml +++ b/.github/workflows/_test-units.yml @@ -18,8 +18,6 @@ jobs: - "ubuntu-22.04" - "macos-latest" ruby: - - "3.0" - - "3.1" - "3.2" - "3.3" - "3.4" diff --git a/bin/v2/products.rb b/bin/v2/products.rb index be9320cd4..d96a6acc6 100644 --- a/bin/v2/products.rb +++ b/bin/v2/products.rb @@ -25,7 +25,7 @@ }, 'ocr' => { description: 'OCR Utility', - response_class: Mindee::V2::Product::Ocr::Ocr, + response_class: Mindee::V2::Product::OCR::OCR, }, 'split' => { description: 'Split Utility', diff --git a/docs/code_samples/v2_ocr.txt b/docs/code_samples/v2_ocr.txt index 1f038471e..e8d0c1de2 100644 --- a/docs/code_samples/v2_ocr.txt +++ b/docs/code_samples/v2_ocr.txt @@ -21,7 +21,7 @@ input_source = Mindee::Input::Source::PathInputSource.new(input_path) # Send for processing response = mindee_client.enqueue_and_get_result( - Mindee::V2::Product::Ocr::Ocr, + Mindee::V2::Product::OCR::Ocr, input_source, ocr_params ) diff --git a/lib/mindee.rb b/lib/mindee.rb index fb157a3e8..bf8c6b3bf 100644 --- a/lib/mindee.rb +++ b/lib/mindee.rb @@ -86,6 +86,13 @@ module Standard module Universal end end + + # V1-specific products. + module Product + # French products. + module FR + end + end end # V2-specific module. @@ -94,6 +101,7 @@ module V2 module HTTP end + # File operations. module FileOperation # Crop operations. module Crop diff --git a/lib/mindee/input/base_parameters.rb b/lib/mindee/input/base_parameters.rb index 2de9352d4..0651f2c75 100644 --- a/lib/mindee/input/base_parameters.rb +++ b/lib/mindee/input/base_parameters.rb @@ -54,6 +54,9 @@ def slug self.class.slug end + # Load from a hash + # @param [Hash] params Parameters to provide as a hash. + # @return [BaseParameters] def self.from_hash(params: {}) load_from_hash(params: params) new( diff --git a/lib/mindee/v1/parsing/common/ocr/ocr.rb b/lib/mindee/v1/parsing/common/ocr/ocr.rb index f7c1c9126..794136674 100644 --- a/lib/mindee/v1/parsing/common/ocr/ocr.rb +++ b/lib/mindee/v1/parsing/common/ocr/ocr.rb @@ -6,7 +6,7 @@ module Mindee module V1 module Parsing module Common - # Ocr-specific parsing fields and options + # OCR-specific parsing fields and options module OCR # A single word. class OCRWord diff --git a/lib/mindee/v2/parsing/raw_text.rb b/lib/mindee/v2/parsing/raw_text.rb index 4418bab37..73841dcc0 100644 --- a/lib/mindee/v2/parsing/raw_text.rb +++ b/lib/mindee/v2/parsing/raw_text.rb @@ -16,6 +16,8 @@ def initialize(server_response) end end + # String representation. + # @return [String] def to_s "#{@pages.join("\n\n")}\n" end diff --git a/lib/mindee/v2/parsing/raw_text_page.rb b/lib/mindee/v2/parsing/raw_text_page.rb index bf34ee429..625fca71d 100644 --- a/lib/mindee/v2/parsing/raw_text_page.rb +++ b/lib/mindee/v2/parsing/raw_text_page.rb @@ -13,6 +13,8 @@ def initialize(server_response) @content = server_response['content'] end + # String representation. + # @return [String] def to_s @content end diff --git a/lib/mindee/v2/parsing/search/search_response.rb b/lib/mindee/v2/parsing/search/search_response.rb index 4ab081d2e..9195a5e5f 100644 --- a/lib/mindee/v2/parsing/search/search_response.rb +++ b/lib/mindee/v2/parsing/search/search_response.rb @@ -18,6 +18,8 @@ def initialize(server_response) @pagination_metadata = PaginationMetadata.new(server_response['pagination']) end + # String representation. + # @return [String] def to_s [ 'Models', diff --git a/lib/mindee/v2/product/classification/classification.rb b/lib/mindee/v2/product/classification/classification.rb index 0eff57a8f..cfea179aa 100644 --- a/lib/mindee/v2/product/classification/classification.rb +++ b/lib/mindee/v2/product/classification/classification.rb @@ -6,6 +6,7 @@ module Mindee module V2 module Product + # Classification module. module Classification # Classification product. class Classification < BaseProduct diff --git a/lib/mindee/v2/product/classification/params/classification_parameters.rb b/lib/mindee/v2/product/classification/params/classification_parameters.rb index 398b218e5..2fcd57dab 100644 --- a/lib/mindee/v2/product/classification/params/classification_parameters.rb +++ b/lib/mindee/v2/product/classification/params/classification_parameters.rb @@ -4,6 +4,7 @@ module Mindee module V2 module Product module Classification + # Classification parameters. module Params # Parameters accepted by the classification utility v2 endpoint. class ClassificationParameters < Mindee::Input::BaseParameters diff --git a/lib/mindee/v2/product/crop/crop.rb b/lib/mindee/v2/product/crop/crop.rb index 24211cfb3..edcec424f 100644 --- a/lib/mindee/v2/product/crop/crop.rb +++ b/lib/mindee/v2/product/crop/crop.rb @@ -6,6 +6,7 @@ module Mindee module V2 module Product + # Crop module. module Crop # Crop product. class Crop < BaseProduct diff --git a/lib/mindee/v2/product/crop/params/crop_parameters.rb b/lib/mindee/v2/product/crop/params/crop_parameters.rb index 712fe7cec..0b52c4ec4 100644 --- a/lib/mindee/v2/product/crop/params/crop_parameters.rb +++ b/lib/mindee/v2/product/crop/params/crop_parameters.rb @@ -4,6 +4,7 @@ module Mindee module V2 module Product module Crop + # Crop parameters. module Params # Parameters accepted by the crop utility v2 endpoint. class CropParameters < Mindee::Input::BaseParameters diff --git a/lib/mindee/v2/product/extraction/extraction.rb b/lib/mindee/v2/product/extraction/extraction.rb index 289a78da5..e318214fd 100644 --- a/lib/mindee/v2/product/extraction/extraction.rb +++ b/lib/mindee/v2/product/extraction/extraction.rb @@ -6,6 +6,7 @@ module Mindee module V2 module Product + # Extraction module. module Extraction # Extraction product. # Note: currently a placeholder for the `Inference` class. diff --git a/lib/mindee/v2/product/extraction/params/data_schema.rb b/lib/mindee/v2/product/extraction/params/data_schema.rb index a984c42f3..84b72cbfa 100644 --- a/lib/mindee/v2/product/extraction/params/data_schema.rb +++ b/lib/mindee/v2/product/extraction/params/data_schema.rb @@ -6,6 +6,7 @@ module Mindee module V2 module Product module Extraction + # Extraction parameters. module Params # Modify the Data Schema. class DataSchema diff --git a/lib/mindee/v2/product/ocr/ocr.rb b/lib/mindee/v2/product/ocr/ocr.rb index 50ff9e80b..9f81147cb 100644 --- a/lib/mindee/v2/product/ocr/ocr.rb +++ b/lib/mindee/v2/product/ocr/ocr.rb @@ -6,12 +6,13 @@ module Mindee module V2 module Product - module Ocr - # Ocr product. - class Ocr < BaseProduct + # OCR module. + module OCR + # OCR product. + class OCR < BaseProduct @slug = 'ocr' - @params_type = Mindee::V2::Product::Ocr::Params::OcrParameters - @response_type = Mindee::V2::Product::Ocr::OcrResponse + @params_type = Mindee::V2::Product::OCR::Params::OCRParameters + @response_type = Mindee::V2::Product::OCR::OCRResponse end end end diff --git a/lib/mindee/v2/product/ocr/ocr_inference.rb b/lib/mindee/v2/product/ocr/ocr_inference.rb index 5daeb0628..63937ba30 100644 --- a/lib/mindee/v2/product/ocr/ocr_inference.rb +++ b/lib/mindee/v2/product/ocr/ocr_inference.rb @@ -5,17 +5,17 @@ module Mindee module V2 module Product - module Ocr + module OCR # The inference result for an OCR utility request. - class OcrInference < Mindee::V2::Parsing::BaseInference - # @return [OcrResult] Parsed inference payload. + class OCRInference < Mindee::V2::Parsing::BaseInference + # @return [OCRResult] Parsed inference payload. attr_reader :result # @param server_response [Hash] Hash representation of the JSON returned by the service. def initialize(server_response) super - @result = OcrResult.new(server_response['result']) + @result = OCRResult.new(server_response['result']) end # String representation. diff --git a/lib/mindee/v2/product/ocr/ocr_page.rb b/lib/mindee/v2/product/ocr/ocr_page.rb index 718ed21af..1da88823f 100644 --- a/lib/mindee/v2/product/ocr/ocr_page.rb +++ b/lib/mindee/v2/product/ocr/ocr_page.rb @@ -5,17 +5,17 @@ module Mindee module V2 module Product - module Ocr + module OCR # OCR result for a single page. - class OcrPage - # @return [Array] List of words extracted from the document page. + class OCRPage + # @return [Array] List of words extracted from the document page. attr_reader :words # @return [String] Full text content extracted from the document page. attr_reader :content # @param server_response [Hash] Hash representation of the JSON returned by the service. def initialize(server_response) - @words = server_response['words'].map { |word| OcrWord.new(word) } + @words = server_response['words'].map { |word| OCRWord.new(word) } @content = server_response['content'] end diff --git a/lib/mindee/v2/product/ocr/ocr_response.rb b/lib/mindee/v2/product/ocr/ocr_response.rb index 3bac4d1e9..0516e5356 100644 --- a/lib/mindee/v2/product/ocr/ocr_response.rb +++ b/lib/mindee/v2/product/ocr/ocr_response.rb @@ -7,17 +7,17 @@ module Mindee module V2 module Product - module Ocr + module OCR # HTTP response wrapper that embeds a V2 Inference. - class OcrResponse < Mindee::V2::Parsing::BaseResponse - # @return [OcrInference] Parsed inference payload. + class OCRResponse < Mindee::V2::Parsing::BaseResponse + # @return [OCRInference] Parsed inference payload. attr_reader :inference # @param server_response [Hash] Hash parsed from the API JSON response. def initialize(server_response) super - @inference = OcrInference.new(server_response['inference']) + @inference = OCRInference.new(server_response['inference']) end # String representation. diff --git a/lib/mindee/v2/product/ocr/ocr_result.rb b/lib/mindee/v2/product/ocr/ocr_result.rb index 093a6ae9d..c0eebb8eb 100644 --- a/lib/mindee/v2/product/ocr/ocr_result.rb +++ b/lib/mindee/v2/product/ocr/ocr_result.rb @@ -5,17 +5,17 @@ module Mindee module V2 module Product - module Ocr + module OCR # Result of a ocr utility inference. - class OcrResult - # @return [Array] List of OCR results for each page in the document. + class OCRResult + # @return [Array] List of OCR results for each page in the document. attr_reader :pages # @param server_response [Hash] Hash representation of the JSON returned by the service. def initialize(server_response) @pages = if server_response.key?('pages') server_response['pages'].map do |pages| - OcrPage.new(pages) + OCRPage.new(pages) end end end diff --git a/lib/mindee/v2/product/ocr/ocr_word.rb b/lib/mindee/v2/product/ocr/ocr_word.rb index 489c7dc85..3e5cce3a6 100644 --- a/lib/mindee/v2/product/ocr/ocr_word.rb +++ b/lib/mindee/v2/product/ocr/ocr_word.rb @@ -3,9 +3,9 @@ module Mindee module V2 module Product - module Ocr + module OCR # OCR result for a single word extracted from the document page. - class OcrWord + class OCRWord # @return [String] Text content of the word. attr_reader :content # @return [Mindee::Geometry::Polygon] Position information as a list of points in clockwise order. diff --git a/lib/mindee/v2/product/ocr/params/ocr_parameters.rb b/lib/mindee/v2/product/ocr/params/ocr_parameters.rb index ca068c83a..976423150 100644 --- a/lib/mindee/v2/product/ocr/params/ocr_parameters.rb +++ b/lib/mindee/v2/product/ocr/params/ocr_parameters.rb @@ -3,10 +3,11 @@ module Mindee module V2 module Product - module Ocr + module OCR + # OCR Parameters. module Params # Parameters accepted by the ocr utility v2 endpoint. - class OcrParameters < Mindee::Input::BaseParameters + class OCRParameters < Mindee::Input::BaseParameters # @return [String] Slug for the endpoint. def self.slug 'ocr' @@ -29,9 +30,9 @@ def initialize( # Loads the parameters from a Hash. # @param [Hash] params Parameters to provide as a hash. - # @return [OcrParameters] + # @return [OCRParameters] def self.from_hash(params: {}) - OcrParameters.new( + OCRParameters.new( params.fetch(:model_id), file_alias: params.fetch(:file_alias, nil), webhook_ids: params.fetch(:webhook_ids, nil), diff --git a/lib/mindee/v2/product/split/params/split_parameters.rb b/lib/mindee/v2/product/split/params/split_parameters.rb index 57dd79b8b..f06b7fd00 100644 --- a/lib/mindee/v2/product/split/params/split_parameters.rb +++ b/lib/mindee/v2/product/split/params/split_parameters.rb @@ -3,7 +3,9 @@ module Mindee module V2 module Product + # Split module. module Split + # Split parameters. module Params # Parameters accepted by the split utility v2 endpoint. class SplitParameters < Mindee::Input::BaseParameters diff --git a/sig/mindee/v2/product/ocr/ocr.rbs b/sig/mindee/v2/product/ocr/ocr.rbs index 9bbc96d01..2abff6d91 100644 --- a/sig/mindee/v2/product/ocr/ocr.rbs +++ b/sig/mindee/v2/product/ocr/ocr.rbs @@ -1,8 +1,8 @@ module Mindee module V2 module Product - module Ocr - class Ocr < BaseProduct + module OCR + class OCR < BaseProduct end end end diff --git a/sig/mindee/v2/product/ocr/ocr_inference.rbs b/sig/mindee/v2/product/ocr/ocr_inference.rbs index 029aa6fbb..af2891535 100644 --- a/sig/mindee/v2/product/ocr/ocr_inference.rbs +++ b/sig/mindee/v2/product/ocr/ocr_inference.rbs @@ -1,9 +1,9 @@ module Mindee module V2 module Product - module Ocr - class OcrInference - attr_reader result: OcrResult + module OCR + class OCRInference + attr_reader result: OCRResult def initialize: (Hash[String | Symbol, untyped]) -> void def to_s: -> String diff --git a/sig/mindee/v2/product/ocr/ocr_page.rbs b/sig/mindee/v2/product/ocr/ocr_page.rbs index 11cd96011..d94029752 100644 --- a/sig/mindee/v2/product/ocr/ocr_page.rbs +++ b/sig/mindee/v2/product/ocr/ocr_page.rbs @@ -1,9 +1,9 @@ module Mindee module V2 module Product - module Ocr - class OcrPage - attr_reader words: Array[OcrWord] + module OCR + class OCRPage + attr_reader words: Array[OCRWord] attr_reader content: String def initialize: (Hash[String | Symbol, untyped]) -> void diff --git a/sig/mindee/v2/product/ocr/ocr_response.rbs b/sig/mindee/v2/product/ocr/ocr_response.rbs index f9d96875d..8f713b5b2 100644 --- a/sig/mindee/v2/product/ocr/ocr_response.rbs +++ b/sig/mindee/v2/product/ocr/ocr_response.rbs @@ -3,18 +3,18 @@ module Mindee module V2 module Product - module Ocr - class OcrResponse < Parsing::BaseResponse[OcrInference] + module OCR + class OCRResponse < Parsing::BaseResponse[OCRInference] self.@slug: String - self.@_params_type: singleton(Params::OcrParameters) + self.@_params_type: singleton(Params::OCRParameters) - attr_reader inference: Mindee::V2::Product::Ocr::OcrInference + attr_reader inference: Mindee::V2::Product::OCR::OCRInference def initialize: (Hash[String | Symbol, untyped]) -> void - def _params_type: -> singleton(Params::OcrParameters) + def _params_type: -> singleton(Params::OCRParameters) def to_s: -> String - def self._params_type: () -> singleton(Params::OcrParameters) + def self._params_type: () -> singleton(Params::OCRParameters) def self.slug: () -> String end end diff --git a/sig/mindee/v2/product/ocr/ocr_result.rbs b/sig/mindee/v2/product/ocr/ocr_result.rbs index c35b0d73f..dc7fcc42a 100644 --- a/sig/mindee/v2/product/ocr/ocr_result.rbs +++ b/sig/mindee/v2/product/ocr/ocr_result.rbs @@ -1,9 +1,9 @@ module Mindee module V2 module Product - module Ocr - class OcrResult - attr_reader pages: Array[Mindee::V2::Product::Ocr::OcrPage] + module OCR + class OCRResult + attr_reader pages: Array[Mindee::V2::Product::OCR::OCRPage] def initialize: (Hash[String | Symbol, untyped]) -> void def to_s: -> String diff --git a/sig/mindee/v2/product/ocr/ocr_word.rbs b/sig/mindee/v2/product/ocr/ocr_word.rbs index 2d9967fb6..0fba61dc2 100644 --- a/sig/mindee/v2/product/ocr/ocr_word.rbs +++ b/sig/mindee/v2/product/ocr/ocr_word.rbs @@ -1,8 +1,8 @@ module Mindee module V2 module Product - module Ocr - class OcrWord + module OCR + class OCRWord attr_reader content: String attr_reader polygon: Geometry::Polygon diff --git a/sig/mindee/v2/product/ocr/params/ocr_parameters/ocr_parameters.rbs b/sig/mindee/v2/product/ocr/params/ocr_parameters/ocr_parameters.rbs index 731720822..a2df6f1ad 100644 --- a/sig/mindee/v2/product/ocr/params/ocr_parameters/ocr_parameters.rbs +++ b/sig/mindee/v2/product/ocr/params/ocr_parameters/ocr_parameters.rbs @@ -1,12 +1,12 @@ module Mindee module V2 module Product - module Ocr + module OCR module Params - class OcrParameters + class OCRParameters def self.slug: -> String - def self.from_hash: (params: Hash[String | Symbol, untyped]) -> OcrParameters + def self.from_hash: (params: Hash[String | Symbol, untyped]) -> OCRParameters def initialize: ( String, diff --git a/spec/v2/product/ocr/ocr_integration.rb b/spec/v2/product/ocr/ocr_integration.rb index 90f726a91..3d525a279 100644 --- a/spec/v2/product/ocr/ocr_integration.rb +++ b/spec/v2/product/ocr/ocr_integration.rb @@ -3,7 +3,7 @@ require 'mindee' require 'mindee/v2/product' -describe Mindee::V2::Product::Ocr, :integration, :v2 do +describe Mindee::V2::Product::OCR, :integration, :v2 do let(:ocr_model_id) do ENV.fetch('MINDEE_V2_SE_TESTS_OCR_MODEL_ID') end @@ -20,15 +20,15 @@ params = { model_id: ocr_model_id } response = v2_client.enqueue_and_get_result( - Mindee::V2::Product::Ocr::Ocr, + Mindee::V2::Product::OCR::OCR, input_source, params ) expect(response.inference).not_to be_nil expect(response.inference.file.name).to eq('default_sample.jpg') - expect(response.inference).to be_a(Mindee::V2::Product::Ocr::OcrInference) - expect(response.inference.result).to be_a(Mindee::V2::Product::Ocr::OcrResult) + expect(response.inference).to be_a(Mindee::V2::Product::OCR::OCRInference) + expect(response.inference.result).to be_a(Mindee::V2::Product::OCR::OCRResult) expect(response.inference.result.pages.size).to eq(1) expect(response.inference.result.pages[0].words.size).to be > 5 end diff --git a/spec/v2/product/ocr/ocr_spec.rb b/spec/v2/product/ocr/ocr_spec.rb index 039a05854..ff5add243 100644 --- a/spec/v2/product/ocr/ocr_spec.rb +++ b/spec/v2/product/ocr/ocr_spec.rb @@ -3,16 +3,16 @@ require 'json' require 'mindee' -describe Mindee::V2::Product::Ocr::Ocr, :v2 do +describe Mindee::V2::Product::OCR::OCR, :v2 do let(:ocr_data_dir) { File.join(V2_PRODUCT_DATA_DIR, 'ocr') } it 'parses a single page OCR response properly' do json_path = File.join(ocr_data_dir, 'ocr_single.json') json_sample = JSON.parse(File.read(json_path)) - response = Mindee::V2::Product::Ocr::OcrResponse.new(json_sample) + response = Mindee::V2::Product::OCR::OCRResponse.new(json_sample) - expect(response.inference).to be_a(Mindee::V2::Product::Ocr::OcrInference) + expect(response.inference).to be_a(Mindee::V2::Product::OCR::OCRInference) expect(response.inference.result.pages).not_to be_empty expect(response.inference.result.pages.size).to eq(1) @@ -66,11 +66,11 @@ json_path = File.join(ocr_data_dir, 'ocr_multiple.json') json_sample = JSON.parse(File.read(json_path)) - response = Mindee::V2::Product::Ocr::OcrResponse.new(json_sample) + response = Mindee::V2::Product::OCR::OCRResponse.new(json_sample) - expect(response.inference).to be_a(Mindee::V2::Product::Ocr::OcrInference) - expect(response.inference.result).to be_a(Mindee::V2::Product::Ocr::OcrResult) - expect(response.inference.result.pages[0]).to be_a(Mindee::V2::Product::Ocr::OcrPage) + expect(response.inference).to be_a(Mindee::V2::Product::OCR::OCRInference) + expect(response.inference.result).to be_a(Mindee::V2::Product::OCR::OCRResult) + expect(response.inference.result.pages[0]).to be_a(Mindee::V2::Product::OCR::OCRPage) expect(response.inference.result.pages.size).to eq(3) page_zero_words = response.inference.result.pages[0].words From 8a5a75ddf11e1a78a1b1b2d961e5cb8f4faa895d Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Thu, 2 Apr 2026 12:24:33 +0200 Subject: [PATCH 3/6] fix readme links --- README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index b49077b82..b3bcc70ba 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ https://app.mindee.com It uses **API version 2**. Consult the -**[V2 Documentation](https://docs.mindee.com/integrations/client-libraries-sdk)** +**V2 Documentation** ### V1 @@ -26,17 +26,17 @@ https://platform.mindee.com/ It uses **API version 1**. Consult the -[V1 Documentation](https://docs.mindee.com/v1/libraries/ruby-sdk) +V1 Documentation ## Additional Information -**[Source Code](https://github.com/mindee/mindee-api-ruby)** +**Reference Documentation** -**[Feedback](https://feedback.mindee.com/)** +**Feedback** ### License Copyright © Mindee -Available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT). +Available as open source under the terms of the MIT License. From 8fba4de142933d291f79ca06cb409f2e1697ccf8 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Thu, 2 Apr 2026 12:31:31 +0200 Subject: [PATCH 4/6] fix readme... again --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b3bcc70ba..627ff9761 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ Consult the ## Additional Information -**Source Code** **Reference Documentation** From a0931a272b579431babe8f81f1fb838fa57a3a68 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Thu, 2 Apr 2026 12:51:48 +0200 Subject: [PATCH 5/6] bump ruby version in CLI test --- .github/workflows/_test-cli.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/_test-cli.yml b/.github/workflows/_test-cli.yml index 2a2793faf..d8c2581b5 100644 --- a/.github/workflows/_test-cli.yml +++ b/.github/workflows/_test-cli.yml @@ -24,7 +24,7 @@ jobs: - os: "windows-latest" rid: "win-x64" ruby: - - "3.0" + - "3.2" - "4.0" runs-on: ${{ matrix.os_config.os }} steps: From 84831f380e62274d05a40cceca1a1f5a03f9a0b8 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Thu, 2 Apr 2026 12:58:17 +0200 Subject: [PATCH 6/6] fix syntax in test --- docs/code_samples/v2_ocr.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/code_samples/v2_ocr.txt b/docs/code_samples/v2_ocr.txt index e8d0c1de2..db4f7b505 100644 --- a/docs/code_samples/v2_ocr.txt +++ b/docs/code_samples/v2_ocr.txt @@ -21,7 +21,7 @@ input_source = Mindee::Input::Source::PathInputSource.new(input_path) # Send for processing response = mindee_client.enqueue_and_get_result( - Mindee::V2::Product::OCR::Ocr, + Mindee::V2::Product::OCR::OCR, input_source, ocr_params )