Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,33 @@

## Lrama 0.7.1 (2025-xx-xx)

### Semantic Predicates

Support semantic predicates to conditionally enable grammar rules based on runtime conditions.
Predicates are evaluated at parse time, similar to ANTLR4's semantic predicates.

```yacc
rule : {expression}? TOKEN { action }
| TOKEN { action }
;
```

The predicate `{expression}?` is evaluated at parse time. If it returns true (non-zero), the alternative is enabled.

Example:

```yacc
widget
: {new_syntax}? WIDGET ID NEW_ARG
{ printf("New syntax\n"); }
| {!new_syntax}? WIDGET ID OLD_ARG
{ printf("Old syntax\n"); }
;
```

Predicates are compiled into static functions in the generated parser.
Leading predicates (at the start of a rule) affect prediction, while trailing predicates act as validation.

### Syntax Diagrams

Lrama provides an API for generating HTML syntax diagrams. These visual diagrams are highly useful as grammar development tools and can also serve as a form of automatic self-documentation.
Expand Down
8 changes: 5 additions & 3 deletions lib/lrama/grammar.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
require_relative "grammar/reference"
require_relative "grammar/rule"
require_relative "grammar/rule_builder"
require_relative "grammar/semantic_predicate"
require_relative "grammar/symbol"
require_relative "grammar/symbols"
require_relative "grammar/type"
Expand Down Expand Up @@ -106,9 +107,10 @@ class Grammar
:find_symbol_by_s_value!, :fill_symbol_number, :fill_nterm_type,
:fill_printer, :fill_destructor, :fill_error_token, :sort_by_number!

# @rbs (Counter rule_counter, bool locations, Hash[String, String] define) -> void
def initialize(rule_counter, locations, define = {})
# @rbs (Counter rule_counter, Counter predicate_counter, bool locations, Hash[String, String] define) -> void
def initialize(rule_counter, predicate_counter, locations, define = {})
@rule_counter = rule_counter
@predicate_counter = predicate_counter

# Code defined by "%code"
@percent_codes = []
Expand Down Expand Up @@ -139,7 +141,7 @@ def initialize(rule_counter, locations, define = {})

# @rbs (Counter rule_counter, Counter midrule_action_counter) -> RuleBuilder
def create_rule_builder(rule_counter, midrule_action_counter)
RuleBuilder.new(rule_counter, midrule_action_counter, @parameterized_resolver)
RuleBuilder.new(rule_counter, midrule_action_counter, @parameterized_resolver, @predicate_counter)
end

# @rbs (id: Lexer::Token::Base, code: Lexer::Token::UserCode) -> Array[PercentCode]
Expand Down
1 change: 1 addition & 0 deletions lib/lrama/grammar/inline/resolver.rb
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def build_rule(rhs, token, index, rule)
@rule_builder.rule_counter,
@rule_builder.midrule_action_counter,
@rule_builder.parameterized_resolver,
@rule_builder.predicate_counter,
lhs_tag: @rule_builder.lhs_tag
)
resolve_rhs(builder, rhs, index, token, rule)
Expand Down
13 changes: 7 additions & 6 deletions lib/lrama/grammar/rule.rb
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,15 @@ class Rule < Struct.new(:id, :_lhs, :lhs, :lhs_tag, :_rhs, :rhs, :token_code, :p
# attr_accessor nullable: bool
# attr_accessor precedence_sym: Grammar::Symbol?
# attr_accessor lineno: Integer?
#
# def initialize: (
# ?id: Integer, ?_lhs: Lexer::Token::Base?, ?lhs: Lexer::Token::Base, ?lhs_tag: Lexer::Token::Tag?, ?_rhs: Array[Lexer::Token::Base], ?rhs: Array[Grammar::Symbol],
# ?token_code: Lexer::Token::UserCode?, ?position_in_original_rule_rhs: Integer?, ?nullable: bool,
# ?precedence_sym: Grammar::Symbol?, ?lineno: Integer?
# ) -> void

attr_accessor :original_rule #: Rule
attr_accessor :predicates #: Array[Grammar::SemanticPredicate]

# @rbs (**untyped kwargs) -> void
def initialize(**kwargs)
super(**kwargs)
@predicates = []
end

# @rbs (Rule other) -> bool
def ==(other)
Expand Down
29 changes: 25 additions & 4 deletions lib/lrama/grammar/rule_builder.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,22 +17,26 @@ class RuleBuilder
# @parameterized_rules: Array[Rule]
# @midrule_action_rules: Array[Rule]
# @replaced_rhs: Array[Lexer::Token::Base]?
# @predicates: Array[[Lexer::Token::SemanticPredicate, bool]]

attr_accessor :lhs #: Lexer::Token::Base?
attr_accessor :line #: Integer?
attr_reader :rule_counter #: Counter
attr_reader :midrule_action_counter #: Counter
attr_reader :parameterized_resolver #: Grammar::Parameterized::Resolver
attr_reader :predicate_counter #: Counter
attr_reader :lhs_tag #: Lexer::Token::Tag?
attr_reader :rhs #: Array[Lexer::Token::Base]
attr_reader :user_code #: Lexer::Token::UserCode?
attr_reader :precedence_sym #: Grammar::Symbol?
attr_reader :predicates

# @rbs (Counter rule_counter, Counter midrule_action_counter, Grammar::Parameterized::Resolver parameterized_resolver, ?Integer position_in_original_rule_rhs, ?lhs_tag: Lexer::Token::Tag?, ?skip_preprocess_references: bool) -> void
def initialize(rule_counter, midrule_action_counter, parameterized_resolver, position_in_original_rule_rhs = nil, lhs_tag: nil, skip_preprocess_references: false)
# @rbs (Counter rule_counter, Counter midrule_action_counter, Grammar::Parameterized::Resolver parameterized_resolver, Counter? predicate_counter, ?Integer position_in_original_rule_rhs, ?lhs_tag: Lexer::Token::Tag?, ?skip_preprocess_references: bool) -> void
def initialize(rule_counter, midrule_action_counter, parameterized_resolver, predicate_counter = nil, position_in_original_rule_rhs = nil, lhs_tag: nil, skip_preprocess_references: false)
@rule_counter = rule_counter
@midrule_action_counter = midrule_action_counter
@parameterized_resolver = parameterized_resolver
@predicate_counter = predicate_counter || Counter.new(0)
@position_in_original_rule_rhs = position_in_original_rule_rhs
@skip_preprocess_references = skip_preprocess_references

Expand All @@ -41,6 +45,7 @@ def initialize(rule_counter, midrule_action_counter, parameterized_resolver, pos
@rhs = []
@user_code = nil
@precedence_sym = nil
@predicates = []
@line = nil
@rules = []
@rule_builders_for_parameterized = []
Expand Down Expand Up @@ -74,6 +79,14 @@ def precedence_sym=(precedence_sym)
@precedence_sym = precedence_sym
end

# @rbs (Lexer::Token::SemanticPredicate predicate) -> void
def add_predicate(predicate)
@line ||= predicate.line
flush_user_code
predicate_with_position = [predicate, @rhs.empty?]
@predicates << predicate_with_position
end

# @rbs () -> void
def complete_input
freeze_rhs
Expand Down Expand Up @@ -118,6 +131,14 @@ def build_rules
id: @rule_counter.increment, _lhs: lhs, _rhs: tokens, lhs_tag: lhs_tag, token_code: user_code,
position_in_original_rule_rhs: @position_in_original_rule_rhs, precedence_sym: precedence_sym, lineno: line
)

rule.predicates = @predicates.map do |(pred_token, is_leading)|
pred = Grammar::SemanticPredicate.new(pred_token)
pred.index = @predicate_counter.increment
pred.position = is_leading ? :leading : :trailing
pred
end

@rules = [rule]
@parameterized_rules = @rule_builders_for_parameterized.map do |rule_builder|
rule_builder.rules
Expand Down Expand Up @@ -158,7 +179,7 @@ def process_rhs
replaced_rhs << lhs_token
@parameterized_resolver.created_lhs_list << lhs_token
parameterized_rule.rhs.each do |r|
rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, @parameterized_resolver, lhs_tag: token.lhs_tag || parameterized_rule.tag)
rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, @parameterized_resolver, @predicate_counter, lhs_tag: token.lhs_tag || parameterized_rule.tag)
rule_builder.lhs = lhs_token
r.symbols.each { |sym| rule_builder.add_rhs(bindings.resolve_symbol(sym)) }
rule_builder.line = line
Expand All @@ -175,7 +196,7 @@ def process_rhs
new_token = Lrama::Lexer::Token::Ident.new(s_value: prefix + @midrule_action_counter.increment.to_s)
replaced_rhs << new_token

rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, @parameterized_resolver, i, lhs_tag: tag, skip_preprocess_references: true)
rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, @parameterized_resolver, @predicate_counter, i, lhs_tag: tag, skip_preprocess_references: true)
rule_builder.lhs = new_token
rule_builder.user_code = token
rule_builder.complete_input
Expand Down
50 changes: 50 additions & 0 deletions lib/lrama/grammar/semantic_predicate.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# rbs_inline: enabled
# frozen_string_literal: true

module Lrama
class Grammar
class SemanticPredicate
# @rbs!
# type position = :leading | :trailing | :middle | :unknown

attr_reader :token #: Lexer::Token::SemanticPredicate
attr_reader :code #: String
attr_accessor :position #: position
attr_accessor :index #: Integer?

# @rbs (Lexer::Token::SemanticPredicate token) -> void
def initialize(token)
@token = token
@code = token.code
@position = :unknown
@index = nil
end

# @rbs () -> bool
def visible?
@position == :leading
end

# @rbs () -> String
def function_name
raise "Predicate index not set" if @index.nil?
"yypredicate_#{@index}"
end

# @rbs () -> String
def error_message
"semantic predicate failed: {#{code}}?"
end

# @rbs () -> Lexer::Location
def location
@token.location
end

# @rbs () -> String
def to_s
"{#{code}}?"
end
end
end
end
74 changes: 73 additions & 1 deletion lib/lrama/lexer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ class Lexer
# [::Symbol, Token::Char] |
# [::Symbol, Token::Str] |
# [::Symbol, Token::Int] |
# [::Symbol, Token::Ident]
# [::Symbol, Token::Ident] |
# [::Symbol, Token::SemanticPredicate]
#
# type c_token = [:C_DECLARATION, Token::UserCode]

Expand Down Expand Up @@ -119,6 +120,13 @@ def lex_token
case
when @scanner.eos?
return
when @scanner.check(/{/)
if predicate_token = try_scan_semantic_predicate
return [:SEMANTIC_PREDICATE, predicate_token]
else
@scanner.scan(/{/)
return [@scanner.matched, Lrama::Lexer::Token::Token.new(s_value: @scanner.matched, location: location)]
end
when @scanner.scan(/#{SYMBOLS.join('|')}/)
return [@scanner.matched, Lrama::Lexer::Token::Token.new(s_value: @scanner.matched, location: location)]
when @scanner.scan(/#{PERCENT_TOKENS.join('|')}/)
Expand Down Expand Up @@ -191,6 +199,70 @@ def lex_c_code

private

# @rbs () -> Lrama::Lexer::Token::SemanticPredicate?
def try_scan_semantic_predicate
start_pos = @scanner.pos
start_line = @line
start_head = @head
return nil unless @scanner.scan(/{/)

code = +''
nested = 1
until @scanner.eos? do
case
when @scanner.scan(/{/)
code << @scanner.matched
nested += 1
when @scanner.scan(/}/)
if nested == 1
if @scanner.scan(/\?/)
return Lrama::Lexer::Token::SemanticPredicate.new(
s_value: "{#{code}}?",
code: code.strip,
location: location
)
else
@scanner.pos = start_pos
@line = start_line
@head = start_head
return nil
end
else
code << @scanner.matched
nested -= 1
end
when @scanner.scan(/\n/)
code << @scanner.matched
newline
when @scanner.scan(/"[^"]*"/)
code << @scanner.matched
@line += @scanner.matched.count("\n")
when @scanner.scan(/'[^']*'/)
code << @scanner.matched
when @scanner.scan(/\/\*/)
code << @scanner.matched
until @scanner.eos?
if @scanner.scan_until(/\*\//)
code << @scanner.matched
@scanner.matched.count("\n").times { newline }
break
end
end
when @scanner.scan(/\/\/[^\n]*/)
code << @scanner.matched
when @scanner.scan(/[^{}"'\n\/]+/)
code << @scanner.matched
else
code << @scanner.getch
end
end

@scanner.pos = start_pos
@line = start_line
@head = start_head
nil
end

# @rbs () -> void
def lex_comment
until @scanner.eos? do
Expand Down
1 change: 1 addition & 0 deletions lib/lrama/lexer/token.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
require_relative 'token/ident'
require_relative 'token/instantiate_rule'
require_relative 'token/int'
require_relative 'token/semantic_predicate'
require_relative 'token/str'
require_relative 'token/tag'
require_relative 'token/token'
Expand Down
23 changes: 23 additions & 0 deletions lib/lrama/lexer/token/semantic_predicate.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# rbs_inline: enabled
# frozen_string_literal: true

module Lrama
class Lexer
module Token
class SemanticPredicate < Base
attr_reader :code #: String

# @rbs (s_value: String, code: String, ?location: Location) -> void
def initialize(s_value:, code:, location: nil)
super(s_value: s_value, location: location)
@code = code.freeze
end

# @rbs () -> String
def to_s
"semantic_predicate: `{#{code}}?`, location: #{location}"
end
end
end
end
end
20 changes: 20 additions & 0 deletions lib/lrama/output.rb
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,26 @@ def symbol_actions_for_error_token
end.join
end

# Generate semantic predicate functions
def predicate_functions
all_predicates = @grammar.rules.flat_map(&:predicates).compact.uniq { |p| p.index }
return "" if all_predicates.empty?

functions = all_predicates.map do |predicate|
<<-STR
/* Semantic predicate: {#{predicate.code}}? */
static int
#{predicate.function_name} (void)
{
return (#{predicate.code});
}

STR
end

functions.join
end

# b4_user_actions
def user_actions
action = @context.states.rules.map do |rule|
Expand Down
Loading