lib/lrama/grammar/rule_builder.rb in lrama-0.6.0 vs lib/lrama/grammar/rule_builder.rb in lrama-0.6.1
- old
+ new
@@ -1,25 +1,26 @@
require 'lrama/grammar/parameterizing_rules/builder'
module Lrama
class Grammar
class RuleBuilder
- attr_accessor :lhs, :lhs_tag, :line
- attr_reader :rhs, :user_code, :precedence_sym
+ attr_accessor :lhs, :line
+ attr_reader :lhs_tag, :rhs, :user_code, :precedence_sym
- def initialize(rule_counter, midrule_action_counter, position_in_original_rule_rhs = nil, skip_preprocess_references: false)
+ def initialize(rule_counter, midrule_action_counter, position_in_original_rule_rhs = nil, lhs_tag: nil, skip_preprocess_references: false)
@rule_counter = rule_counter
@midrule_action_counter = midrule_action_counter
@position_in_original_rule_rhs = position_in_original_rule_rhs
@skip_preprocess_references = skip_preprocess_references
@lhs = nil
+ @lhs_tag = lhs_tag
@rhs = []
- @lhs_tag = nil
@user_code = nil
@precedence_sym = nil
@line = nil
+ @rule_builders_for_parameterizing_rules = []
@rule_builders_for_derived_rules = []
end
def add_rhs(rhs)
if !@line
@@ -31,11 +32,11 @@
@rhs << rhs
end
def user_code=(user_code)
if !@line
- @line = user_code.line
+ @line = user_code&.line
end
flush_user_code
@user_code = user_code
@@ -49,18 +50,18 @@
def complete_input
freeze_rhs
end
- def setup_rules(parameterizing_resolver)
+ def setup_rules(parameterizing_rule_resolver)
preprocess_references unless @skip_preprocess_references
- process_rhs(parameterizing_resolver)
+ process_rhs(parameterizing_rule_resolver)
build_rules
end
def rules
- @parameterizing_rules + @midrule_action_rules + @rules
+ @parameterizing_rules + @old_parameterizing_rules + @midrule_action_rules + @rules
end
private
def freeze_rhs
@@ -73,65 +74,100 @@
def build_rules
tokens = @replaced_rhs
rule = Rule.new(
- id: @rule_counter.increment, _lhs: lhs, _rhs: tokens, token_code: user_code,
+ id: @rule_counter.increment, _lhs: lhs, _rhs: tokens, lhs_tag: lhs_tag, token_code: user_code,
position_in_original_rule_rhs: @position_in_original_rule_rhs, precedence_sym: precedence_sym, lineno: line
)
@rules = [rule]
+ @parameterizing_rules = @rule_builders_for_parameterizing_rules.map do |rule_builder|
+ rule_builder.rules
+ end.flatten
@midrule_action_rules = @rule_builders_for_derived_rules.map do |rule_builder|
rule_builder.rules
end.flatten
@midrule_action_rules.each do |r|
r.original_rule = rule
end
end
# rhs is a mixture of variety type of tokens like `Ident`, `InstantiateRule`, `UserCode` and so on.
# `#process_rhs` replaces some kind of tokens to `Ident` so that all `@replaced_rhs` are `Ident` or `Char`.
- def process_rhs(parameterizing_resolver)
+ def process_rhs(parameterizing_rule_resolver)
return if @replaced_rhs
@replaced_rhs = []
- @parameterizing_rules = []
+ @old_parameterizing_rules = []
rhs.each_with_index do |token, i|
case token
when Lrama::Lexer::Token::Char
@replaced_rhs << token
when Lrama::Lexer::Token::Ident
@replaced_rhs << token
when Lrama::Lexer::Token::InstantiateRule
- if parameterizing_resolver.defined?(token.rule_name)
- parameterizing = parameterizing_resolver.build_rules(token, @rule_counter, @lhs_tag, line)
- @parameterizing_rules = @parameterizing_rules + parameterizing.map(&:rules).flatten
- @replaced_rhs = @replaced_rhs + parameterizing.map(&:token).flatten.uniq
+ if parameterizing_rule_resolver.defined?(token)
+ parameterizing_rule = parameterizing_rule_resolver.find(token)
+ raise "Unexpected token. #{token}" unless parameterizing_rule
+
+ bindings = Binding.new(parameterizing_rule, token.args)
+ lhs_s_value = lhs_s_value(token, bindings)
+ if (created_lhs = parameterizing_rule_resolver.created_lhs(lhs_s_value))
+ @replaced_rhs << created_lhs
+ else
+ lhs_token = Lrama::Lexer::Token::Ident.new(s_value: lhs_s_value, location: token.location)
+ @replaced_rhs << lhs_token
+ parameterizing_rule_resolver.created_lhs_list << lhs_token
+ parameterizing_rule.rhs_list.each do |r|
+ rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, i, lhs_tag: token.lhs_tag, skip_preprocess_references: true)
+ rule_builder.lhs = lhs_token
+ r.symbols.each { |sym| rule_builder.add_rhs(bindings.resolve_symbol(sym)) }
+ rule_builder.line = line
+ rule_builder.user_code = r.user_code
+ rule_builder.precedence_sym = r.precedence_sym
+ rule_builder.complete_input
+ rule_builder.setup_rules(parameterizing_rule_resolver)
+ @rule_builders_for_parameterizing_rules << rule_builder
+ end
+ end
else
# TODO: Delete when the standard library will defined as a grammar file.
- parameterizing = ParameterizingRules::Builder.new(token, @rule_counter, @lhs_tag, user_code, precedence_sym, line)
- @parameterizing_rules = @parameterizing_rules + parameterizing.build
- @replaced_rhs << parameterizing.build_token
+ parameterizing_rule = ParameterizingRules::Builder.new(token, @rule_counter, token.lhs_tag, user_code, precedence_sym, line)
+ @old_parameterizing_rules = @old_parameterizing_rules + parameterizing_rule.build
+ @replaced_rhs << parameterizing_rule.build_token
end
when Lrama::Lexer::Token::UserCode
prefix = token.referred ? "@" : "$@"
new_token = Lrama::Lexer::Token::Ident.new(s_value: prefix + @midrule_action_counter.increment.to_s)
@replaced_rhs << new_token
- rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, i, skip_preprocess_references: true)
+ rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, i, lhs_tag: lhs_tag, skip_preprocess_references: true)
rule_builder.lhs = new_token
rule_builder.user_code = token
rule_builder.complete_input
- rule_builder.setup_rules(parameterizing_resolver)
+ rule_builder.setup_rules(parameterizing_rule_resolver)
@rule_builders_for_derived_rules << rule_builder
else
raise "Unexpected token. #{token}"
end
end
end
+ def lhs_s_value(token, bindings)
+ s_values = token.args.map do |arg|
+ resolved = bindings.resolve_symbol(arg)
+ if resolved.is_a?(Lexer::Token::InstantiateRule)
+ [resolved.s_value, resolved.args.map(&:s_value)]
+ else
+ resolved.s_value
+ end
+ end
+ "#{token.rule_name}_#{s_values.join('_')}"
+ end
+
def numberize_references
# Bison n'th component is 1-origin
(rhs + [user_code]).compact.each.with_index(1) do |token, i|
next unless token.is_a?(Lrama::Lexer::Token::UserCode)
@@ -142,17 +178,15 @@
ref.name = '$'
else
candidates = rhs.each_with_index.select {|token, i| token.referred_by?(ref_name) }
if candidates.size >= 2
- location = token.location.partial_location(ref.first_column, ref.last_column)
- raise location.generate_error_message("Referring symbol `#{ref_name}` is duplicated.")
+ token.invalid_ref(ref, "Referring symbol `#{ref_name}` is duplicated.")
end
unless (referring_symbol = candidates.first)
- location = token.location.partial_location(ref.first_column, ref.last_column)
- raise location.generate_error_message("Referring symbol `#{ref_name}` is not found.")
+ token.invalid_ref(ref, "Referring symbol `#{ref_name}` is not found.")
end
ref.index = referring_symbol[1] + 1
end
end
@@ -161,10 +195,10 @@
next if ref.type == :at
if ref.index
# TODO: Prohibit $0 even so Bison allows it?
# See: https://www.gnu.org/software/bison/manual/html_node/Actions.html
- raise "Can not refer following component. #{ref.index} >= #{i}. #{token}" if ref.index >= i
+ token.invalid_ref(ref, "Can not refer following component. #{ref.index} >= #{i}.") if ref.index >= i
rhs[ref.index - 1].referred = true
end
end
end
end