diff --git a/docs/serialization.md b/docs/serialization.md index 8c1b47c690..a7ba533566 100644 --- a/docs/serialization.md +++ b/docs/serialization.md @@ -106,6 +106,7 @@ The header is structured like the following table: | error* | errors | | varuint | number of warnings | | warning* | warnings | +| `1` | `1` if the source is continuable (incomplete but could become valid with more input), `0` otherwise | | `4` | content pool offset | | varuint | content pool size | diff --git a/ext/prism/extension.c b/ext/prism/extension.c index 7c90e48845..147434c975 100644 --- a/ext/prism/extension.c +++ b/ext/prism/extension.c @@ -641,10 +641,11 @@ parse_result_create(VALUE class, const pm_parser_t *parser, VALUE value, rb_enco parser_data_loc(parser, source, freeze), parser_errors(parser, encoding, source, freeze), parser_warnings(parser, encoding, source, freeze), + parser->continuable ? Qtrue : Qfalse, source }; - return rb_class_new_instance_freeze(7, result_argv, class, freeze); + return rb_class_new_instance_freeze(8, result_argv, class, freeze); } /******************************************************************************/ diff --git a/include/prism/parser.h b/include/prism/parser.h index ed4871197c..5ebace10c6 100644 --- a/include/prism/parser.h +++ b/include/prism/parser.h @@ -895,6 +895,14 @@ struct pm_parser { /** Whether or not we're currently recovering from a syntax error. */ bool recovering; + /** + * Whether or not the source being parsed could become valid if more input + * were appended. This is set to false when the parser encounters a token + * that is definitively wrong (e.g., a stray `end` or `]`) as opposed to + * merely incomplete. + */ + bool continuable; + /** * This is very specialized behavior for when you want to parse in a context * that does not respect encoding comments. Its main use case is translating diff --git a/java/org/ruby_lang/prism/ParseResult.java b/java/org/ruby_lang/prism/ParseResult.java index aad05a892d..7931327b9a 100644 --- a/java/org/ruby_lang/prism/ParseResult.java +++ b/java/org/ruby_lang/prism/ParseResult.java @@ -69,14 +69,16 @@ public Warning(Nodes.WarningType type, String message, Nodes.Location location, public final Nodes.Location dataLocation; public final Error[] errors; public final Warning[] warnings; + public final boolean continuable; public final Nodes.Source source; - public ParseResult(Nodes.Node value, MagicComment[] magicComments, Nodes.Location dataLocation, Error[] errors, Warning[] warnings, Nodes.Source source) { + public ParseResult(Nodes.Node value, MagicComment[] magicComments, Nodes.Location dataLocation, Error[] errors, Warning[] warnings, boolean continuable, Nodes.Source source) { this.value = value; this.magicComments = magicComments; this.dataLocation = dataLocation; this.errors = errors; this.warnings = warnings; + this.continuable = continuable; this.source = source; } } diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb index 0bc56ec592..e1b04fc6ce 100644 --- a/lib/prism/lex_compat.rb +++ b/lib/prism/lex_compat.rb @@ -43,10 +43,10 @@ class Result < Prism::Result # Create a new lex compat result object with the given values. #-- - #: (Array[lex_compat_token] value, Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, Source source) -> void - def initialize(value, comments, magic_comments, data_loc, errors, warnings, source) + #: (Array[lex_compat_token] value, Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, bool continuable, Source source) -> void + def initialize(value, comments, magic_comments, data_loc, errors, warnings, continuable, source) @value = value - super(comments, magic_comments, data_loc, errors, warnings, source) + super(comments, magic_comments, data_loc, errors, warnings, continuable, source) end # Implement the hash pattern matching interface for Result. @@ -825,7 +825,7 @@ def result tokens = post_process_tokens(tokens, source, result.data_loc, bom, eof_token) - Result.new(tokens, result.comments, result.magic_comments, result.data_loc, result.errors, result.warnings, source) + Result.new(tokens, result.comments, result.magic_comments, result.data_loc, result.errors, result.warnings, result.continuable?, source) end private diff --git a/lib/prism/parse_result.rb b/lib/prism/parse_result.rb index 4898fdd435..5c4d4fcb8a 100644 --- a/lib/prism/parse_result.rb +++ b/lib/prism/parse_result.rb @@ -898,13 +898,14 @@ class Result # Create a new result object with the given values. #-- - #: (Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, Source source) -> void - def initialize(comments, magic_comments, data_loc, errors, warnings, source) + #: (Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, bool continuable, Source source) -> void + def initialize(comments, magic_comments, data_loc, errors, warnings, continuable, source) @comments = comments @magic_comments = magic_comments @data_loc = data_loc @errors = errors @warnings = warnings + @continuable = continuable @source = source end @@ -961,54 +962,8 @@ def failure? #-- #: () -> bool def continuable? - return false if errors.empty? - - offset = source.source.bytesize - errors.all? { |error| CONTINUABLE.include?(error.type) || error.location.start_offset == offset } - end - - # The set of error types whose location the parser places at the opening - # token of an unclosed construct rather than at the end of the source. These - # errors always indicate incomplete input regardless of their byte position, - # so they are checked by type rather than by location. - #-- - #: Array[Symbol] - CONTINUABLE = %i[ - begin_term - begin_upcase_term - block_param_pipe_term - block_term_brace - block_term_end - case_missing_conditions - case_term - class_term - conditional_term - conditional_term_else - def_term - embdoc_term - end_upcase_term - for_term - hash_term - heredoc_term - lambda_term_brace - lambda_term_end - list_i_lower_term - list_i_upper_term - list_w_lower_term - list_w_upper_term - module_term - regexp_term - rescue_term - string_interpolated_term - string_literal_eof - symbol_term_dynamic - symbol_term_interpolated - until_term - while_term - xstring_term - ].freeze - - private_constant :CONTINUABLE + @continuable + end # Create a code units cache for the given encoding. #-- @@ -1033,10 +988,10 @@ class ParseResult < Result # Create a new parse result object with the given values. #-- - #: (ProgramNode value, Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, Source source) -> void - def initialize(value, comments, magic_comments, data_loc, errors, warnings, source) + #: (ProgramNode value, Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, bool continuable, Source source) -> void + def initialize(value, comments, magic_comments, data_loc, errors, warnings, continuable, source) @value = value - super(comments, magic_comments, data_loc, errors, warnings, source) + super(comments, magic_comments, data_loc, errors, warnings, continuable, source) end # Implement the hash pattern matching interface for ParseResult. @@ -1077,10 +1032,10 @@ class LexResult < Result # Create a new lex result object with the given values. #-- - #: (Array[[Token, Integer]] value, Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, Source source) -> void - def initialize(value, comments, magic_comments, data_loc, errors, warnings, source) + #: (Array[[Token, Integer]] value, Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, bool continuable, Source source) -> void + def initialize(value, comments, magic_comments, data_loc, errors, warnings, continuable, source) @value = value - super(comments, magic_comments, data_loc, errors, warnings, source) + super(comments, magic_comments, data_loc, errors, warnings, continuable, source) end # Implement the hash pattern matching interface for LexResult. @@ -1099,10 +1054,10 @@ class ParseLexResult < Result # Create a new parse lex result object with the given values. #-- - #: ([ProgramNode, Array[[Token, Integer]]] value, Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, Source source) -> void - def initialize(value, comments, magic_comments, data_loc, errors, warnings, source) + #: ([ProgramNode, Array[[Token, Integer]]] value, Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, bool continuable, Source source) -> void + def initialize(value, comments, magic_comments, data_loc, errors, warnings, continuable, source) @value = value - super(comments, magic_comments, data_loc, errors, warnings, source) + super(comments, magic_comments, data_loc, errors, warnings, continuable, source) end # Implement the hash pattern matching interface for ParseLexResult. diff --git a/rbi/generated/prism/lex_compat.rbi b/rbi/generated/prism/lex_compat.rbi index 5ce03767c8..ca479b7225 100644 --- a/rbi/generated/prism/lex_compat.rbi +++ b/rbi/generated/prism/lex_compat.rbi @@ -36,8 +36,8 @@ module Prism attr_reader :value # Create a new lex compat result object with the given values. - sig { params(value: T::Array[[[Integer, Integer], Symbol, String, ::T.untyped]], comments: T::Array[Comment], magic_comments: T::Array[MagicComment], data_loc: ::T.nilable(Location), errors: T::Array[ParseError], warnings: T::Array[ParseWarning], source: Source).void } - def initialize(value, comments, magic_comments, data_loc, errors, warnings, source); end + sig { params(value: T::Array[[[Integer, Integer], Symbol, String, ::T.untyped]], comments: T::Array[Comment], magic_comments: T::Array[MagicComment], data_loc: ::T.nilable(Location), errors: T::Array[ParseError], warnings: T::Array[ParseWarning], continuable: T::Boolean, source: Source).void } + def initialize(value, comments, magic_comments, data_loc, errors, warnings, continuable, source); end # Implement the hash pattern matching interface for Result. sig { params(keys: ::T.nilable(T::Array[Symbol])).returns(T::Hash[Symbol, ::T.untyped]) } diff --git a/rbi/generated/prism/parse_result.rbi b/rbi/generated/prism/parse_result.rbi index 4a904e85e1..ddced69934 100644 --- a/rbi/generated/prism/parse_result.rbi +++ b/rbi/generated/prism/parse_result.rbi @@ -558,8 +558,8 @@ module Prism attr_reader :source # Create a new result object with the given values. - sig { params(comments: T::Array[Comment], magic_comments: T::Array[MagicComment], data_loc: ::T.nilable(Location), errors: T::Array[ParseError], warnings: T::Array[ParseWarning], source: Source).void } - def initialize(comments, magic_comments, data_loc, errors, warnings, source); end + sig { params(comments: T::Array[Comment], magic_comments: T::Array[MagicComment], data_loc: ::T.nilable(Location), errors: T::Array[ParseError], warnings: T::Array[ParseWarning], continuable: T::Boolean, source: Source).void } + def initialize(comments, magic_comments, data_loc, errors, warnings, continuable, source); end # Implement the hash pattern matching interface for Result. sig { params(keys: ::T.nilable(T::Array[Symbol])).returns(T::Hash[Symbol, ::T.untyped]) } @@ -602,12 +602,6 @@ module Prism sig { returns(T::Boolean) } def continuable?; end - # The set of error types whose location the parser places at the opening - # token of an unclosed construct rather than at the end of the source. These - # errors always indicate incomplete input regardless of their byte position, - # so they are checked by type rather than by location. - CONTINUABLE = T.let(nil, ::T.untyped) - # Create a code units cache for the given encoding. sig { params(encoding: Encoding).returns(::T.untyped) } def code_units_cache(encoding); end @@ -620,8 +614,8 @@ module Prism attr_reader :value # Create a new parse result object with the given values. - sig { params(value: ProgramNode, comments: T::Array[Comment], magic_comments: T::Array[MagicComment], data_loc: ::T.nilable(Location), errors: T::Array[ParseError], warnings: T::Array[ParseWarning], source: Source).void } - def initialize(value, comments, magic_comments, data_loc, errors, warnings, source); end + sig { params(value: ProgramNode, comments: T::Array[Comment], magic_comments: T::Array[MagicComment], data_loc: ::T.nilable(Location), errors: T::Array[ParseError], warnings: T::Array[ParseWarning], continuable: T::Boolean, source: Source).void } + def initialize(value, comments, magic_comments, data_loc, errors, warnings, continuable, source); end # Implement the hash pattern matching interface for ParseResult. sig { params(keys: ::T.nilable(T::Array[Symbol])).returns(T::Hash[Symbol, ::T.untyped]) } @@ -649,8 +643,8 @@ module Prism attr_reader :value # Create a new lex result object with the given values. - sig { params(value: T::Array[[Token, Integer]], comments: T::Array[Comment], magic_comments: T::Array[MagicComment], data_loc: ::T.nilable(Location), errors: T::Array[ParseError], warnings: T::Array[ParseWarning], source: Source).void } - def initialize(value, comments, magic_comments, data_loc, errors, warnings, source); end + sig { params(value: T::Array[[Token, Integer]], comments: T::Array[Comment], magic_comments: T::Array[MagicComment], data_loc: ::T.nilable(Location), errors: T::Array[ParseError], warnings: T::Array[ParseWarning], continuable: T::Boolean, source: Source).void } + def initialize(value, comments, magic_comments, data_loc, errors, warnings, continuable, source); end # Implement the hash pattern matching interface for LexResult. sig { params(keys: ::T.nilable(T::Array[Symbol])).returns(T::Hash[Symbol, ::T.untyped]) } @@ -665,8 +659,8 @@ module Prism attr_reader :value # Create a new parse lex result object with the given values. - sig { params(value: [ProgramNode, T::Array[[Token, Integer]]], comments: T::Array[Comment], magic_comments: T::Array[MagicComment], data_loc: ::T.nilable(Location), errors: T::Array[ParseError], warnings: T::Array[ParseWarning], source: Source).void } - def initialize(value, comments, magic_comments, data_loc, errors, warnings, source); end + sig { params(value: [ProgramNode, T::Array[[Token, Integer]]], comments: T::Array[Comment], magic_comments: T::Array[MagicComment], data_loc: ::T.nilable(Location), errors: T::Array[ParseError], warnings: T::Array[ParseWarning], continuable: T::Boolean, source: Source).void } + def initialize(value, comments, magic_comments, data_loc, errors, warnings, continuable, source); end # Implement the hash pattern matching interface for ParseLexResult. sig { params(keys: ::T.nilable(T::Array[Symbol])).returns(T::Hash[Symbol, ::T.untyped]) } diff --git a/rbi/generated/prism/serialize.rbi b/rbi/generated/prism/serialize.rbi index 80bf9be1da..57b4c61acb 100644 --- a/rbi/generated/prism/serialize.rbi +++ b/rbi/generated/prism/serialize.rbi @@ -123,6 +123,9 @@ module Prism sig { returns(Float) } def load_double; end + sig { returns(T::Boolean) } + def load_bool; end + sig { returns(Integer) } def load_uint32; end diff --git a/sig/generated/prism/lex_compat.rbs b/sig/generated/prism/lex_compat.rbs index d6bf985b0a..707a96b9a8 100644 --- a/sig/generated/prism/lex_compat.rbs +++ b/sig/generated/prism/lex_compat.rbs @@ -38,8 +38,8 @@ module Prism # Create a new lex compat result object with the given values. # -- - # : (Array[lex_compat_token] value, Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, Source source) -> void - def initialize: (Array[lex_compat_token] value, Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, Source source) -> void + # : (Array[lex_compat_token] value, Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, bool continuable, Source source) -> void + def initialize: (Array[lex_compat_token] value, Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, bool continuable, Source source) -> void # Implement the hash pattern matching interface for Result. # -- diff --git a/sig/generated/prism/parse_result.rbs b/sig/generated/prism/parse_result.rbs index dc56214867..d2b4035960 100644 --- a/sig/generated/prism/parse_result.rbs +++ b/sig/generated/prism/parse_result.rbs @@ -644,8 +644,8 @@ module Prism # Create a new result object with the given values. # -- - # : (Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, Source source) -> void - def initialize: (Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, Source source) -> void + # : (Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, bool continuable, Source source) -> void + def initialize: (Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, bool continuable, Source source) -> void # Implement the hash pattern matching interface for Result. # -- @@ -693,14 +693,6 @@ module Prism # : () -> bool def continuable?: () -> bool - # The set of error types whose location the parser places at the opening - # token of an unclosed construct rather than at the end of the source. These - # errors always indicate incomplete input regardless of their byte position, - # so they are checked by type rather than by location. - # -- - # : Array[Symbol] - CONTINUABLE: untyped - # Create a code units cache for the given encoding. # -- # : (Encoding encoding) -> _CodeUnitsCache @@ -714,8 +706,8 @@ module Prism # Create a new parse result object with the given values. # -- - # : (ProgramNode value, Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, Source source) -> void - def initialize: (ProgramNode value, Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, Source source) -> void + # : (ProgramNode value, Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, bool continuable, Source source) -> void + def initialize: (ProgramNode value, Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, bool continuable, Source source) -> void # Implement the hash pattern matching interface for ParseResult. # -- @@ -747,8 +739,8 @@ module Prism # Create a new lex result object with the given values. # -- - # : (Array[[Token, Integer]] value, Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, Source source) -> void - def initialize: (Array[[ Token, Integer ]] value, Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, Source source) -> void + # : (Array[[Token, Integer]] value, Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, bool continuable, Source source) -> void + def initialize: (Array[[ Token, Integer ]] value, Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, bool continuable, Source source) -> void # Implement the hash pattern matching interface for LexResult. # -- @@ -764,8 +756,8 @@ module Prism # Create a new parse lex result object with the given values. # -- - # : ([ProgramNode, Array[[Token, Integer]]] value, Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, Source source) -> void - def initialize: ([ ProgramNode, Array[[ Token, Integer ]] ] value, Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, Source source) -> void + # : ([ProgramNode, Array[[Token, Integer]]] value, Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, bool continuable, Source source) -> void + def initialize: ([ ProgramNode, Array[[ Token, Integer ]] ] value, Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, bool continuable, Source source) -> void # Implement the hash pattern matching interface for ParseLexResult. # -- diff --git a/sig/generated/prism/serialize.rbs b/sig/generated/prism/serialize.rbs index e0b944856b..a83dae70d8 100644 --- a/sig/generated/prism/serialize.rbs +++ b/sig/generated/prism/serialize.rbs @@ -132,6 +132,9 @@ module Prism # : () -> Float def load_double: () -> Float + # : () -> bool + def load_bool: () -> bool + # : () -> Integer def load_uint32: () -> Integer diff --git a/src/prism.c b/src/prism.c index 18aa841ec9..6edc67b627 100644 --- a/src/prism.c +++ b/src/prism.c @@ -22016,6 +22016,7 @@ pm_parser_init(pm_arena_t *arena, pm_parser_t *parser, const uint8_t *source, si .partial_script = false, .command_start = true, .recovering = false, + .continuable = true, .encoding_locked = false, .encoding_changed = false, .pattern_matching_newlines = false, @@ -22292,12 +22293,176 @@ pm_parser_free(pm_parser_t *parser) { } } +/** + * Returns true if the given diagnostic ID represents an error that cannot be + * fixed by appending more input. These are errors where the existing source + * contains definitively invalid syntax (as opposed to merely incomplete input). + */ +static bool +pm_parse_err_is_fatal(pm_diagnostic_id_t diag_id) { + switch (diag_id) { + case PM_ERR_ARRAY_EXPRESSION_AFTER_STAR: + case PM_ERR_BEGIN_UPCASE_BRACE: + case PM_ERR_CLASS_VARIABLE_BARE: + case PM_ERR_END_UPCASE_BRACE: + case PM_ERR_ESCAPE_INVALID_HEXADECIMAL: + case PM_ERR_ESCAPE_INVALID_UNICODE_LIST: + case PM_ERR_ESCAPE_INVALID_UNICODE_SHORT: + case PM_ERR_EXPRESSION_NOT_WRITABLE: + case PM_ERR_EXPRESSION_NOT_WRITABLE_SELF: + case PM_ERR_FLOAT_PARSE: + case PM_ERR_GLOBAL_VARIABLE_BARE: + case PM_ERR_HASH_KEY: + case PM_ERR_HEREDOC_IDENTIFIER: + case PM_ERR_INSTANCE_VARIABLE_BARE: + case PM_ERR_INVALID_BLOCK_EXIT: + case PM_ERR_INVALID_ENCODING_MAGIC_COMMENT: + case PM_ERR_INVALID_FLOAT_EXPONENT: + case PM_ERR_INVALID_NUMBER_BINARY: + case PM_ERR_INVALID_NUMBER_DECIMAL: + case PM_ERR_INVALID_NUMBER_HEXADECIMAL: + case PM_ERR_INVALID_NUMBER_OCTAL: + case PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING: + case PM_ERR_NO_LOCAL_VARIABLE: + case PM_ERR_PARAMETER_ORDER: + case PM_ERR_STATEMENT_UNDEF: + case PM_ERR_VOID_EXPRESSION: + return true; + default: + return false; + } +} + +/** + * Determine whether the source parsed by the given parser could become valid if + * more input were appended. This is used by tools like IRB to decide whether to + * prompt for continuation or to display an error. + * + * The parser starts with continuable=true. This function scans all errors to + * detect two categories of non-continuable errors: + * + * 1. Fatal errors: errors like invalid number literals or bare global variables + * that indicate definitively invalid syntax. These are only considered fatal + * if they occur before EOF (at EOF they could be from truncated input, e.g. + * `"\x` is an incomplete hex escape). + * + * 2. Stray tokens: unexpected_token_ignore and unexpected_token_close_context + * errors indicate tokens that don't belong. A stray token is a cascade + * effect (and does not prevent continuability) if: + * + * a. A non-stray, non-fatal error appeared earlier in the error list at a + * strictly earlier source position (the stray was caused by a preceding + * parse failure, e.g. a truncated heredoc), OR + * b. The stray token is at EOF, starts after position 0 (there is valid + * code before it), and either is a single byte (likely a truncated + * token like `\`) or there are non-stray errors elsewhere. + * + * Closing delimiters (`)`, `]`, `}`) at EOF are always genuinely stray — + * they are complete tokens and cannot become part of a longer valid + * construct by appending more input. + * + * c. The stray token is `=` at the start of a line, which could be the + * beginning of `=begin` (an embedded document). The remaining bytes + * after `=` may parse as an identifier, so the error is not at EOF, + * but the construct is genuinely incomplete. + */ +static void +pm_parse_continuable(pm_parser_t *parser) { + // If there are no errors then there is nothing to continue. + if (parser->error_list.size == 0) { + parser->continuable = false; + return; + } + + if (!parser->continuable) return; + + size_t source_length = (size_t) (parser->end - parser->start); + + // First pass: check if there are any non-stray, non-fatal errors. + bool has_non_stray_error = false; + for (pm_diagnostic_t *error = (pm_diagnostic_t *) parser->error_list.head; error != NULL; error = (pm_diagnostic_t *) error->node.next) { + if (error->diag_id != PM_ERR_UNEXPECTED_TOKEN_IGNORE && error->diag_id != PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT && !pm_parse_err_is_fatal(error->diag_id)) { + has_non_stray_error = true; + break; + } + } + + // Second pass: check each error. We track the minimum source position + // among non-stray, non-fatal errors seen so far in list order, which + // lets us detect cascade stray tokens. + size_t non_stray_min_start = SIZE_MAX; + + for (pm_diagnostic_t *error = (pm_diagnostic_t *) parser->error_list.head; error != NULL; error = (pm_diagnostic_t *) error->node.next) { + size_t error_start = (size_t) error->location.start; + size_t error_end = error_start + (size_t) error->location.length; + bool at_eof = error_end >= source_length; + + // Fatal errors are non-continuable unless they occur at EOF. + if (pm_parse_err_is_fatal(error->diag_id) && !at_eof) { + parser->continuable = false; + return; + } + + // Track non-stray, non-fatal error positions in list order. + if (error->diag_id != PM_ERR_UNEXPECTED_TOKEN_IGNORE && + error->diag_id != PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT) { + if (error_start < non_stray_min_start) non_stray_min_start = error_start; + continue; + } + + // This is a stray token. Determine if it is a cascade effect + // of a preceding error or genuinely stray. + + // Rule (a): a non-stray error was seen earlier in the list at a + // strictly earlier position — this stray is a cascade effect. + if (non_stray_min_start < error_start) continue; + + // Rule (b): this stray is at EOF with valid code before it. + // Single-byte stray tokens at EOF (like `\` for line continuation) + // are likely truncated tokens. Multi-byte stray tokens (like the + // keyword `end`) need additional evidence that they are cascade + // effects (i.e. non-stray errors exist elsewhere). + if (at_eof && error_start > 0) { + // Exception: closing delimiters at EOF are genuinely stray. + if (error->location.length == 1) { + const uint8_t *byte = parser->start + error_start; + if (*byte == ')' || *byte == ']' || *byte == '}') { + parser->continuable = false; + return; + } + + // Single-byte non-delimiter stray at EOF: cascade. + continue; + } + + // Multi-byte stray at EOF: cascade only if there are + // non-stray errors (evidence of a preceding parse failure). + if (has_non_stray_error) continue; + } + + // Rule (c): a stray `=` at the start of a line could be the + // beginning of an embedded document (`=begin`). The remaining + // bytes after `=` parse as an identifier, so the error is not + // at EOF, but the construct is genuinely incomplete. + if (error->location.length == 1) { + const uint8_t *byte = parser->start + error_start; + if (*byte == '=' && (error_start == 0 || *(byte - 1) == '\n')) continue; + } + + // This stray token is genuinely non-continuable. + parser->continuable = false; + return; + } +} + /** * Parse the Ruby source associated with the given parser and return the tree. */ PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser) { - return parse_program(parser); + pm_node_t *node = parse_program(parser); + pm_parse_continuable(parser); + return node; } /** diff --git a/templates/java/org/ruby_lang/prism/Loader.java.erb b/templates/java/org/ruby_lang/prism/Loader.java.erb index 3c2a0cfd09..23ba5d8544 100644 --- a/templates/java/org/ruby_lang/prism/Loader.java.erb +++ b/templates/java/org/ruby_lang/prism/Loader.java.erb @@ -112,6 +112,7 @@ public class Loader { Nodes.Location dataLocation = loadOptionalLocation(); ParseResult.Error[] errors = loadErrors(); ParseResult.Warning[] warnings = loadWarnings(); + boolean continuable = buffer.get() != 0; int constantPoolBufferOffset = buffer.getInt(); int constantPoolLength = loadVarUInt(); @@ -133,7 +134,7 @@ public class Loader { node = null; } - return new ParseResult(node, magicComments, dataLocation, errors, warnings, source); + return new ParseResult(node, magicComments, dataLocation, errors, warnings, continuable, source); } private byte[] loadString() { diff --git a/templates/javascript/src/deserialize.js.erb b/templates/javascript/src/deserialize.js.erb index 2aeb142f8d..34ff1574da 100644 --- a/templates/javascript/src/deserialize.js.erb +++ b/templates/javascript/src/deserialize.js.erb @@ -334,10 +334,12 @@ export function deserialize(array) { level: warningLevels[buffer.readByte()] })); + const continuable = buffer.readByte() !== 0; + const constantPoolOffset = buffer.readUint32(); const constants = Array.from({ length: buffer.readVarInt() }, () => null); - return new ParseResult(readRequiredNode(), comments, magicComments, dataLoc, errors, warnings); + return new ParseResult(readRequiredNode(), comments, magicComments, dataLoc, errors, warnings, continuable); function readRequiredNode() { const type = buffer.readByte(); diff --git a/templates/lib/prism/serialize.rb.erb b/templates/lib/prism/serialize.rb.erb index 4e61c89a89..c272d84bb4 100644 --- a/templates/lib/prism/serialize.rb.erb +++ b/templates/lib/prism/serialize.rb.erb @@ -43,6 +43,7 @@ module Prism data_loc = loader.load_optional_location_object(freeze) errors = loader.load_errors(encoding, freeze) warnings = loader.load_warnings(encoding, freeze) + continuable = loader.load_bool cpool_base = loader.load_uint32 cpool_size = loader.load_varuint @@ -52,7 +53,7 @@ module Prism loader.load_constant_pool(constant_pool) raise unless loader.eof? - result = ParseResult.new(node, comments, magic_comments, data_loc, errors, warnings, source) + result = ParseResult.new(node, comments, magic_comments, data_loc, errors, warnings, continuable, source) result.freeze if freeze input.force_encoding(encoding) @@ -97,9 +98,10 @@ module Prism data_loc = loader.load_optional_location_object(freeze) errors = loader.load_errors(encoding, freeze) warnings = loader.load_warnings(encoding, freeze) + continuable = loader.load_bool raise unless loader.eof? - result = LexResult.new(tokens, comments, magic_comments, data_loc, errors, warnings, source) + result = LexResult.new(tokens, comments, magic_comments, data_loc, errors, warnings, continuable, source) tokens.each do |token| token[0].value.force_encoding(encoding) @@ -168,6 +170,7 @@ module Prism data_loc = loader.load_optional_location_object(freeze) errors = loader.load_errors(encoding, freeze) warnings = loader.load_warnings(encoding, freeze) + continuable = loader.load_bool cpool_base = loader.load_uint32 cpool_size = loader.load_varuint @@ -178,7 +181,7 @@ module Prism raise unless loader.eof? value = [node, tokens] #: [ProgramNode, Array[[Token, Integer]]] - result = ParseLexResult.new(value, comments, magic_comments, data_loc, errors, warnings, source) + result = ParseLexResult.new(value, comments, magic_comments, data_loc, errors, warnings, continuable, source) tokens.each do |token| token[0].value.force_encoding(encoding) @@ -488,6 +491,11 @@ module Prism (io.read(8) or raise).unpack1("D") #: Float end + #: () -> bool + def load_bool + (io.getbyte or raise) != 0 + end + #: () -> Integer def load_uint32 (io.read(4) or raise).unpack1("L") #: Integer diff --git a/templates/src/serialize.c.erb b/templates/src/serialize.c.erb index 4fe0cb88c1..1f90a2160e 100644 --- a/templates/src/serialize.c.erb +++ b/templates/src/serialize.c.erb @@ -247,6 +247,7 @@ pm_serialize_metadata(pm_parser_t *parser, pm_buffer_t *buffer) { pm_serialize_data_loc(parser, buffer); pm_serialize_diagnostic_list(&parser->error_list, buffer); pm_serialize_diagnostic_list(&parser->warning_list, buffer); + pm_buffer_append_byte(buffer, (uint8_t) parser->continuable); } #line <%= __LINE__ + 1 %> "prism/templates/src/<%= File.basename(__FILE__) %>" diff --git a/test/prism/errors_test.rb b/test/prism/errors_test.rb index c3362eaaf5..9a54203f74 100644 --- a/test/prism/errors_test.rb +++ b/test/prism/errors_test.rb @@ -105,41 +105,6 @@ def test_unclosed_heredoc_and_interpolation assert_nil(statement.parts[0].statements) end - def test_continuable - # Valid input is not continuable (nothing to continue). - refute_predicate Prism.parse("1 + 1"), :continuable? - refute_predicate Prism.parse(""), :continuable? - - # Stray closing tokens make input non-continuable regardless of what - # follows (matches the feature-request examples exactly). - refute_predicate Prism.parse("1 + ]"), :continuable? - refute_predicate Prism.parse("end.tap do"), :continuable? - - # Unclosed constructs are continuable. - assert_predicate Prism.parse("1 + ["), :continuable? - assert_predicate Prism.parse("tap do"), :continuable? - - # Unclosed keywords. - assert_predicate Prism.parse("def foo"), :continuable? - assert_predicate Prism.parse("class Foo"), :continuable? - assert_predicate Prism.parse("module Foo"), :continuable? - assert_predicate Prism.parse("if true"), :continuable? - assert_predicate Prism.parse("while true"), :continuable? - assert_predicate Prism.parse("begin"), :continuable? - assert_predicate Prism.parse("for x in [1]"), :continuable? - - # Unclosed delimiters. - assert_predicate Prism.parse("{"), :continuable? - assert_predicate Prism.parse("foo("), :continuable? - assert_predicate Prism.parse('"hello'), :continuable? - assert_predicate Prism.parse("'hello"), :continuable? - assert_predicate Prism.parse("<<~HEREDOC\nhello"), :continuable? - - # A mix: stray end plus an unclosed block is not continuable because the - # stray end cannot be fixed by appending more input. - refute_predicate Prism.parse("end\ntap do"), :continuable? - end - private def assert_errors(filepath, version) diff --git a/test/prism/result/continuable_test.rb b/test/prism/result/continuable_test.rb new file mode 100644 index 0000000000..3533552167 --- /dev/null +++ b/test/prism/result/continuable_test.rb @@ -0,0 +1,124 @@ +# frozen_string_literal: true + +require_relative "../test_helper" + +module Prism + class ContinuableTest < TestCase + def test_valid_input + # Valid input is not continuable (nothing to continue). + refute_predicate Prism.parse("1 + 1"), :continuable? + refute_predicate Prism.parse(""), :continuable? + end + + def test_stray_closing_tokens + # Stray closing tokens make input non-continuable regardless of what + # follows (matches the feature-request examples exactly). + refute_predicate Prism.parse("1 + ]"), :continuable? + refute_predicate Prism.parse("end.tap do"), :continuable? + + # A mix: stray end plus an unclosed block is not continuable because the + # stray end cannot be fixed by appending more input. + refute_predicate Prism.parse("end\ntap do"), :continuable? + end + + def test_unclosed_constructs + # Unclosed constructs are continuable. + assert_predicate Prism.parse("1 + ["), :continuable? + assert_predicate Prism.parse("tap do"), :continuable? + end + + def test_unclosed_keywords + assert_predicate Prism.parse("def foo"), :continuable? + assert_predicate Prism.parse("class Foo"), :continuable? + assert_predicate Prism.parse("module Foo"), :continuable? + assert_predicate Prism.parse("if true"), :continuable? + assert_predicate Prism.parse("while true"), :continuable? + assert_predicate Prism.parse("begin"), :continuable? + assert_predicate Prism.parse("for x in [1]"), :continuable? + end + + def test_unclosed_delimiters + assert_predicate Prism.parse("{"), :continuable? + assert_predicate Prism.parse("foo("), :continuable? + assert_predicate Prism.parse('"hello'), :continuable? + assert_predicate Prism.parse("'hello"), :continuable? + assert_predicate Prism.parse("<<~HEREDOC\nhello"), :continuable? + end + + def test_trailing_whitespace + # Trailing whitespace or newlines should not affect continuability. + assert_predicate Prism.parse("class A\n"), :continuable? + assert_predicate Prism.parse("def f "), :continuable? + assert_predicate Prism.parse("def f\n"), :continuable? + assert_predicate Prism.parse("def f\n "), :continuable? + assert_predicate Prism.parse("( "), :continuable? + assert_predicate Prism.parse("(\n"), :continuable? + assert_predicate Prism.parse("1 +\n"), :continuable? + end + + def test_incomplete_expressions + assert_predicate Prism.parse("-"), :continuable? + assert_predicate Prism.parse("[1,"), :continuable? + assert_predicate Prism.parse("f arg1,"), :continuable? + assert_predicate Prism.parse("def f ="), :continuable? + assert_predicate Prism.parse("def $a"), :continuable? + assert_predicate Prism.parse("a ="), :continuable? + assert_predicate Prism.parse("a,b"), :continuable? + end + + def test_modifier_keywords + assert_predicate Prism.parse("return if"), :continuable? + assert_predicate Prism.parse("return unless"), :continuable? + assert_predicate Prism.parse("while"), :continuable? + assert_predicate Prism.parse("until"), :continuable? + end + + def test_ternary_operator + assert_predicate Prism.parse("x ?"), :continuable? + assert_predicate Prism.parse("x ? y :"), :continuable? + end + + def test_class_with_superclass + assert_predicate Prism.parse("class Foo <"), :continuable? + end + + def test_keyword_expressions + assert_predicate Prism.parse("not"), :continuable? + assert_predicate Prism.parse("defined?"), :continuable? + assert_predicate Prism.parse("module"), :continuable? + end + + def test_for_loops + assert_predicate Prism.parse("for"), :continuable? + assert_predicate Prism.parse("for x in"), :continuable? + end + + def test_pattern_matching + assert_predicate Prism.parse("foo => ["), :continuable? + assert_predicate Prism.parse("case foo; when"), :continuable? + end + + def test_splat_and_block_pass + assert_predicate Prism.parse("[*"), :continuable? + assert_predicate Prism.parse("f(**"), :continuable? + assert_predicate Prism.parse("f(&"), :continuable? + end + + def test_default_parameter_value + assert_predicate Prism.parse("def f(x ="), :continuable? + end + + def test_line_continuation + assert_predicate Prism.parse("1 +\\"), :continuable? + assert_predicate Prism.parse("\"foo\" \\"), :continuable? + end + + def test_embedded_document + # Embedded document (=begin) truncated at various points. + assert_predicate Prism.parse("=b"), :continuable? + assert_predicate Prism.parse("=beg"), :continuable? + assert_predicate Prism.parse("=begin"), :continuable? + assert_predicate Prism.parse("foo\n=b"), :continuable? + end + end +end