From 257585b759c6c4ef3e5f86336fca158d8cf22e9d Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Wed, 27 May 2026 11:50:49 +0200 Subject: [PATCH 1/3] :recycle: add polling options as a dedicated parameter --- lib/mindee/image/image_compressor.rb | 2 +- lib/mindee/input/base_parameters.rb | 9 ++++++++- lib/mindee/v2/client.rb | 14 +++++++++++--- sig/mindee/input/base_parameters.rbs | 1 + sig/mindee/v2/client.rbs | 4 ++-- spec/v2/client_v2_integration.rb | 8 ++++++-- 6 files changed, 29 insertions(+), 9 deletions(-) diff --git a/lib/mindee/image/image_compressor.rb b/lib/mindee/image/image_compressor.rb index d9274244..c3914967 100644 --- a/lib/mindee/image/image_compressor.rb +++ b/lib/mindee/image/image_compressor.rb @@ -9,7 +9,7 @@ module ImageCompressor # @param image [MiniMagick::Image, StringIO] Input image. # @param quality [Integer, nil] Quality of the final file. # @param max_width [Integer, nil] Maximum width. If not specified, the horizontal ratio will remain the same. - # @param max_height [Integer] Maximum height. If not specified, the vertical ratio will remain the same. + # @param max_height [Integer, nil] Maximum height. If not specified, the vertical ratio will remain the same. # @return [StringIO] def self.compress_image(image, quality: 85, max_width: nil, max_height: nil) processed_image = ImageUtils.to_image(image) diff --git a/lib/mindee/input/base_parameters.rb b/lib/mindee/input/base_parameters.rb index 0651f2c7..9da06e00 100644 --- a/lib/mindee/input/base_parameters.rb +++ b/lib/mindee/input/base_parameters.rb @@ -22,7 +22,7 @@ class BaseParameters # @param [String] model_id ID of the model # @param [String, nil] file_alias File alias, if applicable. # @param [Array, nil] webhook_ids List of webhook IDs to propagate the API response to. - # @param [Hash, nil] polling_options Options for polling. Set only if having timeout issues. + # @param [Hash, PollingOptions, nil] polling_options Options for polling. Set only if having timeout issues. # @param [Boolean, nil] close_file Whether to close the file after parsing. def initialize( model_id, @@ -40,6 +40,13 @@ def initialize( @close_file = close_file.nil? || close_file end + # Sets polling options after normalizing hash inputs. + # @param [Hash, PollingOptions, nil] polling_options + # @return [PollingOptions] + def polling_options=(polling_options) + @polling_options = get_clean_polling_options(polling_options) + end + # @return [String] Slug for the endpoint. def self.slug if self == BaseParameters diff --git a/lib/mindee/v2/client.rb b/lib/mindee/v2/client.rb index 24cbf4c1..bc3d51f3 100644 --- a/lib/mindee/v2/client.rb +++ b/lib/mindee/v2/client.rb @@ -57,14 +57,16 @@ def enqueue( # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource] # The source of the input document (local file or URL). # @param params [Hash, Input::BaseParameters] Parameters for the inference. + # @param polling_options [Hash, PollingOptions, nil] Parameters for polling. # @return [Parsing::BaseResponse] def enqueue_and_get_result( product, input_source, - params + params, + polling_options: nil ) enqueue_response = enqueue(product, input_source, params) - normalized_params = normalize_parameters(product.params_type, params) + normalized_params = normalize_parameters(product.params_type, params, polling_options: polling_options) normalized_params.validate_async_params if enqueue_response.job.id.nil? || enqueue_response.job.id.empty? @@ -121,8 +123,14 @@ def search_models(model_name, model_type) # If needed, converts the parsing options provided as a hash into a proper BaseParameters subclass object. # @param params [Hash, Class] Params. + # @param polling_options [Hash, PollingOptions, nil] Polling options. # @return [BaseParameters] - def normalize_parameters(param_class, params) + def normalize_parameters(param_class, params, polling_options: nil) + if params.is_a?(Hash) + params[:polling_options] = polling_options if polling_options + elsif params.is_a?(Mindee::Input::BaseParameters) && !polling_options.nil? + params.polling_options = polling_options + end return param_class.from_hash(params: params) if params.is_a?(Hash) params diff --git a/sig/mindee/input/base_parameters.rbs b/sig/mindee/input/base_parameters.rbs index dab8603d..cae0a84e 100644 --- a/sig/mindee/input/base_parameters.rbs +++ b/sig/mindee/input/base_parameters.rbs @@ -26,6 +26,7 @@ module Mindee def append_form_data: (Array[Array[untyped]]) -> Array[Array[untyped]] def validate_async_params: () -> void + def polling_options=: (Hash[Symbol | String, untyped] | PollingOptions?) -> PollingOptions private diff --git a/sig/mindee/v2/client.rbs b/sig/mindee/v2/client.rbs index 143e185f..e89f1b25 100644 --- a/sig/mindee/v2/client.rbs +++ b/sig/mindee/v2/client.rbs @@ -17,13 +17,13 @@ module Mindee def enqueue: [T] (HTTP::_ProductClass[T] product, Input::Source::LocalInputSource | Input::Source::URLInputSource, Hash[String | Symbol, untyped] | Input::BaseParameters params) -> V2::Parsing::JobResponse - def enqueue_and_get_result: [T] (HTTP::_ProductClass[T] product, Input::Source::LocalInputSource | Input::Source::URLInputSource, Hash[String | Symbol, untyped] | Input::BaseParameters params) -> T + def enqueue_and_get_result: [T] (HTTP::_ProductClass[T] product, Input::Source::LocalInputSource | Input::Source::URLInputSource, Hash[String | Symbol, untyped] | Input::BaseParameters params, ?polling_options: Hash[String | Symbol, untyped] | Input::PollingOptions?) -> T def search_models: (String?, String?) -> Mindee::V2::Parsing::Search::SearchResponse def validate_async_params: (Integer | Float, Integer | Float, Integer) -> void - def normalize_parameters: (singleton(Input::BaseParameters) param_class, Hash[String | Symbol, untyped] | Input::BaseParameters params) -> Input::BaseParameters + def normalize_parameters: (singleton(Input::BaseParameters) param_class, Hash[String | Symbol, untyped] | Input::BaseParameters params, ?polling_options: Hash[String | Symbol, untyped] | Input::PollingOptions?) -> Input::BaseParameters end end end diff --git a/spec/v2/client_v2_integration.rb b/spec/v2/client_v2_integration.rb index 062ff853..a0be5e10 100644 --- a/spec/v2/client_v2_integration.rb +++ b/spec/v2/client_v2_integration.rb @@ -30,11 +30,15 @@ polygon: false, confidence: false, file_alias: 'rb_integration_test', - polling_options: polling, text_context: 'this is a test' ) - response = client.enqueue_and_get_result(Mindee::V2::Product::Extraction::Extraction, input, inference_params) + response = client.enqueue_and_get_result( + Mindee::V2::Product::Extraction::Extraction, + input, + inference_params, + polling + ) expect(response).not_to be_nil expect(response.inference).not_to be_nil From fed7c28ab38dd445f535920cc186b8d45ad9c311 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Wed, 27 May 2026 12:01:04 +0200 Subject: [PATCH 2/3] add & fix test --- .../input/sources/local_input_source.rb | 6 +++-- spec/v2/client_v2_integration.rb | 25 +++++++++++++++++++ 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/lib/mindee/input/sources/local_input_source.rb b/lib/mindee/input/sources/local_input_source.rb index ce646e27..8ebbc57b 100644 --- a/lib/mindee/input/sources/local_input_source.rb +++ b/lib/mindee/input/sources/local_input_source.rb @@ -83,12 +83,14 @@ def fix_pdf!(maximum_offset: 500) # @return [StringIO] The fixed stream. # @raise [Mindee::Error::MindeePDFError] def self.fix_pdf(stream, maximum_offset: 500) - out_stream = StringIO.new + out_stream = StringIO.new(''.b) stream.gets('%PDF-') raise Error::MindeePDFError if stream.eof? || stream.pos > maximum_offset stream.pos = stream.pos - 5 - out_stream << stream.read + out_stream.write(stream.read.to_s.b) + out_stream.rewind + out_stream end # Cuts a PDF file according to provided options. diff --git a/spec/v2/client_v2_integration.rb b/spec/v2/client_v2_integration.rb index a0be5e10..740f8b9e 100644 --- a/spec/v2/client_v2_integration.rb +++ b/spec/v2/client_v2_integration.rb @@ -72,6 +72,31 @@ expect(result.fields).not_to be_nil end + it 'parses with legacy polling options successfully' do + src_path = File.join(V1_PRODUCT_DATA_DIR, 'financial_document', 'default_sample.jpg') + input = Mindee::Input::Source::FileInputSource.new(File.open(src_path, 'rb'), 'multipage_cut-2.pdf') + + polling = Mindee::Input::PollingOptions.new( + initial_delay_sec: 3.0, + delay_sec: 1.5, + max_retries: 80 + ) + + inference_params = Mindee::V2::Product::Extraction::Params::ExtractionParameters.new( + model_id, + rag: false, + raw_text: true, + polygon: false, + confidence: false, + file_alias: 'rb_integration_test', + polling_options: polling, + text_context: 'this is a test' + ) + response = client.enqueue_and_get_result(Mindee::V2::Product::Extraction::Extraction, input, inference_params) + expect(response).not_to be_nil + expect(response.inference).not_to be_nil + end + it 'parses a filled single-page image successfully' do src_path = File.join(V1_PRODUCT_DATA_DIR, 'financial_document', 'default_sample.jpg') input = Mindee::Input::Source::FileInputSource.new(File.open(src_path, 'rb'), 'default_sample.jpg') From 4009361054606712830cb8a84a37b1707e77f69d Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Wed, 27 May 2026 13:55:53 +0200 Subject: [PATCH 3/3] fix syntax --- lib/mindee/dependencies.rb | 5 +++++ lib/mindee/v2/client.rb | 2 +- .../classification/params/classification_parameters.rb | 2 +- lib/mindee/v2/product/crop/params/crop_parameters.rb | 2 +- lib/mindee/v2/product/extraction/params/data_schema.rb | 2 +- .../v2/product/extraction/params/extraction_parameters.rb | 2 +- lib/mindee/v2/product/ocr/params/ocr_parameters.rb | 2 +- lib/mindee/v2/product/split/params/split_parameters.rb | 2 +- sig/mindee/v2/client.rbs | 2 +- 9 files changed, 13 insertions(+), 8 deletions(-) diff --git a/lib/mindee/dependencies.rb b/lib/mindee/dependencies.rb index cf7ee926..83fe3a15 100644 --- a/lib/mindee/dependencies.rb +++ b/lib/mindee/dependencies.rb @@ -3,6 +3,7 @@ module Mindee # Centralized check for optional heavy dependencies module Dependencies + # Checks the presence of dependencies. def self.check_all_dependencies require 'origami' require 'mini_magick' @@ -12,16 +13,20 @@ def self.check_all_dependencies false end + # Memoized check. @all_deps_available = check_all_dependencies + # Checks whether all dependencies are available. def self.all_deps_available? check_all_dependencies end + # Raises an error if dependencies are not available. def self.require_all_deps! raise LoadError, MINDEE_DEPENDENCIES_LOAD_ERROR unless all_deps_available? end + # Error message to display when dependencies are not available. MINDEE_DEPENDENCIES_LOAD_ERROR = 'Attempted to load Mindee PDF/Image tools without required dependencies. ' \ "If you need to process local files, please replace the 'mindee-lite' gem " \ "with the standard 'mindee' gem in your Gemfile." diff --git a/lib/mindee/v2/client.rb b/lib/mindee/v2/client.rb index bc3d51f3..7165e6ce 100644 --- a/lib/mindee/v2/client.rb +++ b/lib/mindee/v2/client.rb @@ -63,7 +63,7 @@ def enqueue_and_get_result( product, input_source, params, - polling_options: nil + polling_options = nil ) enqueue_response = enqueue(product, input_source, params) normalized_params = normalize_parameters(product.params_type, params, polling_options: polling_options) diff --git a/lib/mindee/v2/product/classification/params/classification_parameters.rb b/lib/mindee/v2/product/classification/params/classification_parameters.rb index 2fcd57da..0a037e9f 100644 --- a/lib/mindee/v2/product/classification/params/classification_parameters.rb +++ b/lib/mindee/v2/product/classification/params/classification_parameters.rb @@ -16,7 +16,7 @@ def self.slug # @param [String] model_id ID of the model # @param [String, nil] file_alias File alias, if applicable. # @param [Array, nil] webhook_ids List of webhook IDs to propagate the API response to. - # @param [Hash, nil] polling_options Options for polling. Set only if having timeout issues. + # @param [Hash, PollingOptions, nil] polling_options Options for polling. Set only if having timeout issues. # @param [Boolean, nil] close_file Whether to close the file after parsing. def initialize( model_id, diff --git a/lib/mindee/v2/product/crop/params/crop_parameters.rb b/lib/mindee/v2/product/crop/params/crop_parameters.rb index 0b52c4ec..4113f7d8 100644 --- a/lib/mindee/v2/product/crop/params/crop_parameters.rb +++ b/lib/mindee/v2/product/crop/params/crop_parameters.rb @@ -16,7 +16,7 @@ def self.slug # @param [String] model_id ID of the model # @param [String, nil] file_alias File alias, if applicable. # @param [Array, nil] webhook_ids List of webhook IDs to propagate the API response to. - # @param [Hash, nil] polling_options Options for polling. Set only if having timeout issues. + # @param [Hash, PollingOptions, nil] polling_options Options for polling. Set only if having timeout issues. # @param [Boolean, nil] close_file Whether to close the file after parsing. def initialize( model_id, diff --git a/lib/mindee/v2/product/extraction/params/data_schema.rb b/lib/mindee/v2/product/extraction/params/data_schema.rb index 84b72cbf..c816a0b5 100644 --- a/lib/mindee/v2/product/extraction/params/data_schema.rb +++ b/lib/mindee/v2/product/extraction/params/data_schema.rb @@ -13,7 +13,7 @@ class DataSchema # @return [Mindee::V2::Product::Extraction::Params::DataSchemaReplace] attr_reader :replace - # @param data_schema [Hash, String] + # @param data_schema [Hash, DataSchema, String] def initialize(data_schema) case data_schema when String diff --git a/lib/mindee/v2/product/extraction/params/extraction_parameters.rb b/lib/mindee/v2/product/extraction/params/extraction_parameters.rb index 4eb17afd..ab782476 100644 --- a/lib/mindee/v2/product/extraction/params/extraction_parameters.rb +++ b/lib/mindee/v2/product/extraction/params/extraction_parameters.rb @@ -46,7 +46,7 @@ def self.slug # @param [String, nil] file_alias File alias, if applicable. # @param [Array, nil] webhook_ids # @param [String, nil] text_context - # @param [Hash, nil] polling_options + # @param [Hash, PollingOptions, nil] polling_options # @param [Boolean, nil] close_file # @param [DataSchemaField, String, Hash nil] data_schema def initialize( diff --git a/lib/mindee/v2/product/ocr/params/ocr_parameters.rb b/lib/mindee/v2/product/ocr/params/ocr_parameters.rb index 97642315..7774c068 100644 --- a/lib/mindee/v2/product/ocr/params/ocr_parameters.rb +++ b/lib/mindee/v2/product/ocr/params/ocr_parameters.rb @@ -16,7 +16,7 @@ def self.slug # @param [String] model_id ID of the model # @param [String, nil] file_alias File alias, if applicable. # @param [Array, nil] webhook_ids List of webhook IDs to propagate the API response to. - # @param [Hash, nil] polling_options Options for polling. Set only if having timeout issues. + # @param [Hash, PollingOptions, nil] polling_options Options for polling. Set only if having timeout issues. # @param [Boolean, nil] close_file Whether to close the file after parsing. def initialize( model_id, diff --git a/lib/mindee/v2/product/split/params/split_parameters.rb b/lib/mindee/v2/product/split/params/split_parameters.rb index f06b7fd0..2c30a0de 100644 --- a/lib/mindee/v2/product/split/params/split_parameters.rb +++ b/lib/mindee/v2/product/split/params/split_parameters.rb @@ -17,7 +17,7 @@ def self.slug # @param [String] model_id ID of the model # @param [String, nil] file_alias File alias, if applicable. # @param [Array, nil] webhook_ids List of webhook IDs to propagate the API response to. - # @param [Hash, nil] polling_options Options for polling. Set only if having timeout issues. + # @param [Hash, PollingOptions, nil] polling_options Options for polling. Set only if having timeout issues. # @param [Boolean, nil] close_file Whether to close the file after parsing. def initialize( model_id, diff --git a/sig/mindee/v2/client.rbs b/sig/mindee/v2/client.rbs index e89f1b25..e6586834 100644 --- a/sig/mindee/v2/client.rbs +++ b/sig/mindee/v2/client.rbs @@ -17,7 +17,7 @@ module Mindee def enqueue: [T] (HTTP::_ProductClass[T] product, Input::Source::LocalInputSource | Input::Source::URLInputSource, Hash[String | Symbol, untyped] | Input::BaseParameters params) -> V2::Parsing::JobResponse - def enqueue_and_get_result: [T] (HTTP::_ProductClass[T] product, Input::Source::LocalInputSource | Input::Source::URLInputSource, Hash[String | Symbol, untyped] | Input::BaseParameters params, ?polling_options: Hash[String | Symbol, untyped] | Input::PollingOptions?) -> T + def enqueue_and_get_result: [T] (HTTP::_ProductClass[T] product, Input::Source::LocalInputSource | Input::Source::URLInputSource, Hash[String | Symbol, untyped] | Input::BaseParameters params, ?Hash[String | Symbol, untyped] | Input::PollingOptions?) -> T def search_models: (String?, String?) -> Mindee::V2::Parsing::Search::SearchResponse