parser.y in lrama-0.5.9 vs parser.y in lrama-0.5.10
- old
+ new
@@ -1,7 +1,7 @@
class Lrama::Parser
- expect 7
+ expect 1
token C_DECLARATION CHARACTER IDENT_COLON IDENTIFIER INTEGER STRING TAG
rule
@@ -34,19 +34,17 @@
| "%require" STRING
| "%param" params
| "%lex-param" params
{
val[1].each {|token|
- token.references = []
- @grammar.lex_param = @grammar.build_code(:lex_param, token).token_code.s_value
+ @grammar.lex_param = Grammar::Code::NoReferenceCode.new(type: :lex_param, token_code: token).token_code.s_value
}
}
| "%parse-param" params
{
val[1].each {|token|
- token.references = []
- @grammar.parse_param = @grammar.build_code(:parse_param, token).token_code.s_value
+ @grammar.parse_param = Grammar::Code::NoReferenceCode.new(type: :parse_param, token_code: token).token_code.s_value
}
}
| "%code" IDENTIFIER "{"
{
begin_c_declaration("}")
@@ -67,11 +65,11 @@
{
end_c_declaration
}
"}"
{
- @grammar.initial_action = @grammar.build_code(:initial_action, val[3])
+ @grammar.initial_action = Grammar::Code::InitialActionCode.new(type: :initial_action, token_code: val[3])
}
| ";"
grammar_declaration: "%union" "{"
{
@@ -81,11 +79,14 @@
{
end_c_declaration
}
"}"
{
- @grammar.set_union(@grammar.build_code(:union, val[3]), val[3].line)
+ @grammar.set_union(
+ Grammar::Code::NoReferenceCode.new(type: :union, token_code: val[3]),
+ val[3].line
+ )
}
| symbol_declaration
| "%destructor" "{"
{
begin_c_declaration("}")
@@ -103,11 +104,15 @@
{
end_c_declaration
}
"}" generic_symlist
{
- @grammar.add_printer(ident_or_tags: val[6], code: @grammar.build_code(:printer, val[3]), lineno: val[3].line)
+ @grammar.add_printer(
+ ident_or_tags: val[6],
+ token_code: val[3],
+ lineno: val[3].line
+ )
}
| "%error-token" "{"
{
begin_c_declaration("}")
}
@@ -115,11 +120,15 @@
{
end_c_declaration
}
"}" generic_symlist
{
- @grammar.add_error_token(ident_or_tags: val[6], code: @grammar.build_code(:error_token, val[3]), lineno: val[3].line)
+ @grammar.add_error_token(
+ ident_or_tags: val[6],
+ token_code: val[3],
+ lineno: val[3].line
+ )
}
symbol_declaration: "%token" token_declarations
| "%type" symbol_declarations
{
@@ -250,22 +259,22 @@
}
| TAG token_declaration_list_for_precedence
{
result = [{tag: val[0], tokens: val[1]}]
}
- | token_declarations_for_precedence token_declaration_list_for_precedence
+ | token_declarations_for_precedence TAG token_declaration_list_for_precedence
{
- result = val[0].append({tag: nil, tokens: val[1]})
+ result = val[0].append({tag: val[1], tokens: val[2]})
}
token_declaration_list_for_precedence: token_declaration_for_precedence { result = [val[0]] }
| token_declaration_list_for_precedence token_declaration_for_precedence { result = val[0].append(val[1]) }
token_declaration_for_precedence: id
- id: IDENTIFIER { raise "Ident after %prec" if @prec_seen }
- | CHARACTER { raise "Char after %prec" if @prec_seen }
+ id: IDENTIFIER { on_action_error("ident after %prec", val[0]) if @prec_seen }
+ | CHARACTER { on_action_error("char after %prec", val[0]) if @prec_seen }
grammar: rules_or_grammar_declaration
| grammar rules_or_grammar_declaration
rules_or_grammar_declaration: rules
@@ -273,60 +282,78 @@
rules: id_colon named_ref_opt ":" rhs_list
{
lhs = val[0]
lhs.alias_name = val[1]
- val[3].each {|hash|
- @grammar.add_rule(lhs: lhs, rhs: hash[:rhs], lineno: hash[:lineno])
- }
+ val[3].each do |builder|
+ builder.lhs = lhs
+ builder.complete_input
+ @grammar.add_rule_builder(builder)
+ end
}
rhs_list: rhs
{
- result = [{rhs: val[0], lineno: val[0].first&.line || @lexer.line - 1}]
+ builder = val[0]
+ if !builder.line
+ builder.line = @lexer.line - 1
+ end
+ result = [builder]
}
| rhs_list "|" rhs
{
- result = val[0].append({rhs: val[2], lineno: val[2].first&.line || @lexer.line - 1})
+ builder = val[2]
+ if !builder.line
+ builder.line = @lexer.line - 1
+ end
+ result = val[0].append(builder)
}
| rhs_list ";"
rhs: /* empty */
{
reset_precs
- result = []
+ result = Grammar::RuleBuilder.new(@rule_counter, @midrule_action_counter)
}
| "%empty"
{
reset_precs
- result = []
+ result = Grammar::RuleBuilder.new(@rule_counter, @midrule_action_counter)
}
| rhs symbol named_ref_opt
{
token = val[1]
token.alias_name = val[2]
- result = val[0].append(token)
+ builder = val[0]
+ builder.add_rhs(token)
+ result = builder
}
- | rhs "?"
- {
- token = Lrama::Lexer::Token::Parameterizing.new(s_value: val[1])
- result = val[0].append(token)
- }
- | rhs "+"
- {
- token = Lrama::Lexer::Token::Parameterizing.new(s_value: val[1])
- result = val[0].append(token)
- }
- | rhs "*"
- {
- token = Lrama::Lexer::Token::Parameterizing.new(s_value: val[1])
- result = val[0].append(token)
- }
+ | rhs IDENTIFIER parameterizing_suffix
+ {
+ token = Lrama::Lexer::Token::Parameterizing.new(s_value: val[2], location: @lexer.location, args: [val[1]])
+ builder = val[0]
+ builder.add_rhs(token)
+ result = builder
+ }
+ | rhs IDENTIFIER "(" symbol ")"
+ {
+ token = Lrama::Lexer::Token::Parameterizing.new(s_value: val[1].s_value, location: @lexer.location, args: [val[3]])
+ builder = val[0]
+ builder.add_rhs(token)
+ result = builder
+ }
+ | rhs IDENTIFIER "(" symbol "," symbol ")"
+ {
+ token = Lrama::Lexer::Token::Parameterizing.new(s_value: val[1].s_value, location: @lexer.location, args: [val[3], val[5]])
+ builder = val[0]
+ builder.add_rhs(token)
+ result = builder
+ }
| rhs "{"
{
if @prec_seen
- raise "Multiple User_code after %prec" if @code_after_prec
+ on_action_error("multiple User_code after %prec", val[0]) if @code_after_prec
@code_after_prec = true
end
begin_c_declaration("}")
}
C_DECLARATION
@@ -335,19 +362,27 @@
}
"}" named_ref_opt
{
token = val[3]
token.alias_name = val[6]
- result = val[0].append(token)
+ builder = val[0]
+ builder.user_code = token
+ result = builder
}
| rhs "%prec" symbol
{
sym = @grammar.find_symbol_by_id!(val[2])
- result = val[0].append(sym)
@prec_seen = true
+ builder = val[0]
+ builder.precedence_sym = sym
+ result = builder
}
+ parameterizing_suffix: "?"
+ | "+"
+ | "*"
+
named_ref_opt: # empty
| '[' IDENTIFIER ']' { result = val[1].s_value }
id_colon: IDENT_COLON
@@ -385,47 +420,69 @@
def initialize(text, path, debug = false)
@text = text
@path = path
@yydebug = debug
+ @rule_counter = Lrama::Grammar::Counter.new(0)
+ @midrule_action_counter = Lrama::Grammar::Counter.new(1)
end
def parse
report_duration(:parse) do
@lexer = Lrama::Lexer.new(@text)
- @grammar = Lrama::Grammar.new
+ @grammar = Lrama::Grammar.new(@rule_counter)
@precedence_number = 0
reset_precs
do_parse
@grammar.prepare
- @grammar.compute_nullable
- @grammar.compute_first_set
@grammar.validate!
@grammar
end
end
def next_token
@lexer.next_token
end
def on_error(error_token_id, error_value, value_stack)
- if error_value.respond_to?(:line) && error_value.respond_to?(:column)
- line = error_value.line
- first_column = error_value.column
+ if error_value.is_a?(Lrama::Lexer::Token)
+ line = error_value.first_line
+ first_column = error_value.first_column
+ last_column = error_value.last_column
+ value = "'#{error_value.s_value}'"
else
line = @lexer.line
first_column = @lexer.head_column
+ last_column = @lexer.column
+ value = error_value.inspect
end
raise ParseError, <<~ERROR
- #{@path}:#{line}:#{first_column}: parse error on value #{error_value.inspect} (#{token_to_str(error_token_id) || '?'})
+ #{@path}:#{line}:#{first_column}: parse error on value #{value} (#{token_to_str(error_token_id) || '?'})
#{@text.split("\n")[line - 1]}
- #{carrets(first_column)}
+ #{carrets(first_column, last_column)}
ERROR
end
+def on_action_error(error_message, error_value)
+ if error_value.is_a?(Lrama::Lexer::Token)
+ line = error_value.first_line
+ first_column = error_value.first_column
+ last_column = error_value.last_column
+ else
+ line = @lexer.line
+ first_column = @lexer.head_column
+ last_column = @lexer.column
+ end
+
+ raise ParseError, <<~ERROR
+ #{@path}:#{line}: #{error_message}
+ #{@text.split("\n")[line - 1]}
+ #{carrets(first_column, last_column)}
+ ERROR
+end
+
private
def reset_precs
@prec_seen = false
@code_after_prec = false
@@ -439,8 +496,8 @@
def end_c_declaration
@lexer.status = :initial
@lexer.end_symbol = nil
end
-def carrets(first_column)
- ' ' * (first_column + 1) + '^' * (@lexer.column - first_column)
+def carrets(first_column, last_column)
+ ' ' * (first_column + 1) + '^' * (last_column - first_column)
end