lib/lrama/grammar.rb in lrama-0.6.2 vs lib/lrama/grammar.rb in lrama-0.6.3
- old
+ new
@@ -1,41 +1,52 @@
+require "forwardable"
require "lrama/grammar/auxiliary"
require "lrama/grammar/binding"
require "lrama/grammar/code"
require "lrama/grammar/counter"
require "lrama/grammar/error_token"
+require "lrama/grammar/parameterizing_rule"
require "lrama/grammar/percent_code"
require "lrama/grammar/precedence"
require "lrama/grammar/printer"
require "lrama/grammar/reference"
require "lrama/grammar/rule"
require "lrama/grammar/rule_builder"
-require "lrama/grammar/parameterizing_rule"
require "lrama/grammar/symbol"
+require "lrama/grammar/symbols"
require "lrama/grammar/type"
require "lrama/grammar/union"
require "lrama/lexer"
module Lrama
# Grammar is the result of parsing an input grammar file
class Grammar
+ extend Forwardable
+
attr_reader :percent_codes, :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux
attr_accessor :union, :expect,
:printers, :error_tokens,
:lex_param, :parse_param, :initial_action,
- :symbols, :types,
+ :after_shift, :before_reduce, :after_reduce, :after_shift_error_token, :after_pop_stack,
+ :symbols_resolver, :types,
:rules, :rule_builders,
:sym_to_rules, :no_stdlib
+ def_delegators "@symbols_resolver", :symbols, :nterms, :terms, :add_nterm, :add_term,
+ :find_symbol_by_number!, :find_symbol_by_id!, :token_to_symbol,
+ :find_symbol_by_s_value!, :fill_symbol_number, :fill_nterm_type,
+ :fill_printer, :fill_error_token, :sort_by_number!
+
+
def initialize(rule_counter)
@rule_counter = rule_counter
# Code defined by "%code"
@percent_codes = []
@printers = []
@error_tokens = []
- @symbols = []
+ @symbols_resolver = Grammar::Symbols::Resolver.new
@types = []
@rule_builders = []
@rules = []
@sym_to_rules = {}
@parameterizing_rule_resolver = ParameterizingRule::Resolver.new
@@ -60,48 +71,10 @@
def add_error_token(ident_or_tags:, token_code:, lineno:)
@error_tokens << ErrorToken.new(ident_or_tags: ident_or_tags, token_code: token_code, lineno: lineno)
end
- def add_term(id:, alias_name: nil, tag: nil, token_id: nil, replace: false)
- if token_id && (sym = @symbols.find {|s| s.token_id == token_id })
- if replace
- sym.id = id
- sym.alias_name = alias_name
- sym.tag = tag
- end
-
- return sym
- end
-
- if (sym = @symbols.find {|s| s.id == id })
- return sym
- end
-
- sym = Symbol.new(
- id: id, alias_name: alias_name, number: nil, tag: tag,
- term: true, token_id: token_id, nullable: false
- )
- @symbols << sym
- @terms = nil
-
- return sym
- end
-
- def add_nterm(id:, alias_name: nil, tag: nil)
- return if @symbols.find {|s| s.id == id }
-
- sym = Symbol.new(
- id: id, alias_name: alias_name, number: nil, tag: tag,
- term: false, token_id: nil, nullable: nil,
- )
- @symbols << sym
- @nterms = nil
-
- return sym
- end
-
def add_type(id:, tag:)
@types << Type.new(id: id, tag: tag)
end
def add_nonassoc(sym, precedence)
@@ -163,83 +136,33 @@
def prepare
normalize_rules
collect_symbols
set_lhs_and_rhs
- fill_symbol_number
fill_default_precedence
+ fill_symbols
fill_sym_to_rules
- fill_nterm_type
- fill_symbol_printer
- fill_symbol_error_token
- @symbols.sort_by!(&:number)
compute_nullable
compute_first_set
end
# TODO: More validation methods
#
# * Validation for no_declared_type_reference
def validate!
- validate_symbol_number_uniqueness!
- validate_symbol_alias_name_uniqueness!
+ @symbols_resolver.validate!
validate_rule_lhs_is_nterm!
end
- def find_symbol_by_s_value(s_value)
- @symbols.find do |sym|
- sym.id.s_value == s_value
- end
- end
-
- def find_symbol_by_s_value!(s_value)
- find_symbol_by_s_value(s_value) || (raise "Symbol not found: #{s_value}")
- end
-
- def find_symbol_by_id(id)
- @symbols.find do |sym|
- sym.id == id || sym.alias_name == id.s_value
- end
- end
-
- def find_symbol_by_id!(id)
- find_symbol_by_id(id) || (raise "Symbol not found: #{id}")
- end
-
- def find_symbol_by_number!(number)
- sym = @symbols[number]
-
- raise "Symbol not found: #{number}" unless sym
- raise "[BUG] Symbol number mismatch. #{number}, #{sym}" if sym.number != number
-
- sym
- end
-
def find_rules_by_symbol!(sym)
find_rules_by_symbol(sym) || (raise "Rules for #{sym} not found")
end
def find_rules_by_symbol(sym)
@sym_to_rules[sym.number]
end
- def terms_count
- terms.count
- end
-
- def terms
- @terms ||= @symbols.select(&:term?)
- end
-
- def nterms_count
- nterms.count
- end
-
- def nterms
- @nterms ||= @symbols.select(&:nterm?)
- end
-
private
def compute_nullable
@rules.each do |rule|
case
@@ -282,11 +205,11 @@
rules.select {|r| r.nullable.nil? }.each do |rule|
rule.nullable = false
end
- nterms.select {|r| r.nullable.nil? }.each do |nterm|
+ nterms.select {|e| e.nullable.nil? }.each do |nterm|
nterm.nullable = false
end
end
def compute_first_set
@@ -328,16 +251,10 @@
@rule_builders.each do |builder|
builder.setup_rules(@parameterizing_rule_resolver)
end
end
- def find_nterm_by_id!(id)
- nterms.find do |nterm|
- nterm.id == id
- end || (raise "Nterm not found: #{id}")
- end
-
def append_special_symbols
# YYEMPTY (token_id: -2, number: -2) is added when a template is evaluated
# term = add_term(id: Token.new(Token::Ident, "YYEMPTY"), token_id: -2)
# term.number = -2
# @empty_symbol = term
@@ -395,102 +312,20 @@
raise "Unknown class: #{s}"
end
end
end
- # Fill #number and #token_id
- def fill_symbol_number
- # Character literal in grammar file has
- # token id corresponding to ASCII code by default,
- # so start token_id from 256.
- token_id = 256
-
- # YYEMPTY = -2
- # YYEOF = 0
- # YYerror = 1
- # YYUNDEF = 2
- number = 3
-
- nterm_token_id = 0
- used_numbers = {}
-
- @symbols.map(&:number).each do |n|
- used_numbers[n] = true
- end
-
- (@symbols.select(&:term?) + @symbols.select(&:nterm?)).each do |sym|
- while used_numbers[number] do
- number += 1
- end
-
- if sym.number.nil?
- sym.number = number
- number += 1
- end
-
- # If id is Token::Char, it uses ASCII code
- if sym.term? && sym.token_id.nil?
- if sym.id.is_a?(Lrama::Lexer::Token::Char)
- # Ignore ' on the both sides
- case sym.id.s_value[1..-2]
- when "\\b"
- sym.token_id = 8
- when "\\f"
- sym.token_id = 12
- when "\\n"
- sym.token_id = 10
- when "\\r"
- sym.token_id = 13
- when "\\t"
- sym.token_id = 9
- when "\\v"
- sym.token_id = 11
- when "\""
- sym.token_id = 34
- when "'"
- sym.token_id = 39
- when "\\\\"
- sym.token_id = 92
- when /\A\\(\d+)\z/
- sym.token_id = Integer($1, 8)
- when /\A(.)\z/
- sym.token_id = $1.bytes.first
- else
- raise "Unknown Char s_value #{sym}"
- end
- else
- sym.token_id = token_id
- token_id += 1
- end
- end
-
- if sym.nterm? && sym.token_id.nil?
- sym.token_id = nterm_token_id
- nterm_token_id += 1
- end
- end
- end
-
def set_lhs_and_rhs
@rules.each do |rule|
rule.lhs = token_to_symbol(rule._lhs) if rule._lhs
rule.rhs = rule._rhs.map do |t|
token_to_symbol(t)
end
end
end
- def token_to_symbol(token)
- case token
- when Lrama::Lexer::Token
- find_symbol_by_id!(token)
- else
- raise "Unknown class: #{token}"
- end
- end
-
# Rule inherits precedence from the last term in RHS.
#
# https://www.gnu.org/software/bison/manual/html_node/How-Precedence.html
def fill_default_precedence
@rules.each do |rule|
@@ -504,77 +339,23 @@
rule.precedence_sym = precedence_sym
end
end
+ def fill_symbols
+ fill_symbol_number
+ fill_nterm_type(@types)
+ fill_printer(@printers)
+ fill_error_token(@error_tokens)
+ sort_by_number!
+ end
+
def fill_sym_to_rules
@rules.each do |rule|
key = rule.lhs.number
@sym_to_rules[key] ||= []
@sym_to_rules[key] << rule
end
- end
-
- # Fill nterm's tag defined by %type decl
- def fill_nterm_type
- @types.each do |type|
- nterm = find_nterm_by_id!(type.id)
- nterm.tag = type.tag
- end
- end
-
- def fill_symbol_printer
- @symbols.each do |sym|
- @printers.each do |printer|
- printer.ident_or_tags.each do |ident_or_tag|
- case ident_or_tag
- when Lrama::Lexer::Token::Ident
- sym.printer = printer if sym.id == ident_or_tag
- when Lrama::Lexer::Token::Tag
- sym.printer = printer if sym.tag == ident_or_tag
- else
- raise "Unknown token type. #{printer}"
- end
- end
- end
- end
- end
-
- def fill_symbol_error_token
- @symbols.each do |sym|
- @error_tokens.each do |error_token|
- error_token.ident_or_tags.each do |ident_or_tag|
- case ident_or_tag
- when Lrama::Lexer::Token::Ident
- sym.error_token = error_token if sym.id == ident_or_tag
- when Lrama::Lexer::Token::Tag
- sym.error_token = error_token if sym.tag == ident_or_tag
- else
- raise "Unknown token type. #{error_token}"
- end
- end
- end
- end
- end
-
- def validate_symbol_number_uniqueness!
- invalid = @symbols.group_by(&:number).select do |number, syms|
- syms.count > 1
- end
-
- return if invalid.empty?
-
- raise "Symbol number is duplicated. #{invalid}"
- end
-
- def validate_symbol_alias_name_uniqueness!
- invalid = @symbols.select(&:alias_name).group_by(&:alias_name).select do |alias_name, syms|
- syms.count > 1
- end
-
- return if invalid.empty?
-
- raise "Symbol alias name is duplicated. #{invalid}"
end
def validate_rule_lhs_is_nterm!
errors = []