examples/general/SRL/lib/ast_builder.rb in rley-0.5.10 vs examples/general/SRL/lib/ast_builder.rb in rley-0.5.11

- old
+ new

@@ -1,5 +1,6 @@ +require 'stringio' require_relative 'ast_building' require_relative 'regex_repr' # The purpose of a ASTBuilder is to build piece by piece an AST # (Abstract Syntax Tree) from a sequence of input tokens and @@ -33,57 +34,132 @@ # @param aRange [Range] Range of tokens matched by the rule # @param theTokens [Array] The input tokens # @param theChildren [Array] Children nodes (one per rhs symbol) def new_parent_node(aProduction, aRange, theTokens, theChildren) node = case aProduction.name - when 'srl_0' # rule 'srl' => 'term' + when 'srl_0' # rule 'srl' => 'pattern' return_first_child(aRange, theTokens, theChildren) + + when 'pattern_0' # rule 'pattern' => %w[pattern COMMA quantifiable] + reduce_pattern_0(aProduction, aRange, theTokens, theChildren) + when 'pattern_1' # rule 'pattern' => %w[pattern quantifiable] + reduce_pattern_1(aProduction, aRange, theTokens, theChildren) + + when 'pattern_2' # rule 'pattern' => 'quantifiable' + return_first_child(aRange, theTokens, theChildren) + + when 'quantifiable_0' # rule 'quantifiable' => 'term' + return_first_child(aRange, theTokens, theChildren) + + when 'quantifiable_1' # rule 'quantifiable' = %w[term quantifier] + reduce_quantifiable_1(aProduction, aRange, theTokens, theChildren) + when 'term_0' # rule 'term' => 'atom' return_first_child(aRange, theTokens, theChildren) - when 'term_1' # rule 'term' => %w[atom quantifier] - reduce_term_1(aProduction, aRange, theTokens, theChildren) + when 'term_1' # rule 'term' => 'alternation' + return_first_child(aRange, theTokens, theChildren) + + when 'term_2' # rule 'term' => 'grouping' + return_first_child(aRange, theTokens, theChildren) when 'atom_0' # rule 'atom' => 'letter_range' return_first_child(aRange, theTokens, theChildren) - + when 'atom_1' # rule 'atom' => 'digit_range' return_first_child(aRange, theTokens, theChildren) - + + when 'atom_2' # rule 'atom' => 'character_class' + return_first_child(aRange, theTokens, theChildren) + + when 'atom_3' # rule 'atom' => 'special_char' + return_first_child(aRange, theTokens, theChildren) + + when 'atom_4' # rule 'atom' => 'literal' + return_first_child(aRange, theTokens, theChildren) + # rule 'letter_range' => %w[LETTER FROM LETTER_LIT TO LETTER_LIT] - when 'letter_range_0' + when 'letter_range_0' reduce_letter_range_0(aProduction, aRange, theTokens, theChildren) - #rule 'letter_range' => %w[UPPERCASE LETTER FROM LETTER_LIT TO LETTER_LIT] - when 'letter_range_1' + #rule 'letter_range' => %w[UPPERCASE LETTER FROM LETTER_LIT TO LETTER_LIT] + when 'letter_range_1' reduce_letter_range_1(aProduction, aRange, theTokens, theChildren) when 'letter_range_2' # rule 'letter_range' => 'LETTER' reduce_letter_range_2(aProduction, aRange, theTokens, theChildren) when 'letter_range_3' # rule 'letter_range' => %w[UPPERCASE LETTER] reduce_letter_range_3(aProduction, aRange, theTokens, theChildren) # rule 'digit_range' => %w[digit_or_number FROM DIGIT_LIT TO DIGIT_LIT] - when 'digit_range_0' + when 'digit_range_0' reduce_digit_range_0(aProduction, aRange, theTokens, theChildren) - when 'digit_range_1' #rule 'digit_range' => 'digit_or_number' + when 'digit_range_1' # rule 'digit_range' => 'digit_or_number' reduce_digit_range_1(aProduction, aRange, theTokens, theChildren) + when 'character_class_0' # rule 'character_class' => %w[ANY CHARACTER] + reduce_character_class_0(aProduction, aRange, theTokens, theChildren) + + when 'character_class_1' # rule 'character_class' => %w[NO CHARACTER] + reduce_character_class_1(aProduction, aRange, theTokens, theChildren) + + when 'character_class_2' # rule 'character_class' => 'WHITESPACE' + reduce_character_class_2(aProduction, aRange, theTokens, theChildren) + + when 'character_class_3' # rule 'character_class' => %w[NO WHITESPACE] + reduce_character_class_3(aProduction, aRange, theTokens, theChildren) + + when 'character_class_4' # rule 'character_class' => 'ANYTHING' + reduce_character_class_4(aProduction, aRange, theTokens, theChildren) + + when 'character_class_5' # rule 'character_class' => %w[ONE OF STRING_LIT] + reduce_character_class_5(aProduction, aRange, theTokens, theChildren) + + when 'special_char_0' # rule 'special_char' => 'TAB' + reduce_special_char_0(aProduction, aRange, theTokens, theChildren) + + when 'special_char_1' # rule 'special_char' => 'BACKSLASH' + reduce_special_char_1(aProduction, aRange, theTokens, theChildren) + + when 'special_char_2' # rule 'special_char' => %w[NEW LINE] + reduce_special_char_2(aProduction, aRange, theTokens, theChildren) + + when 'literal_0' # rule 'literal' => %[LITERALLY STRING_LIT] + reduce_literal_0(aProduction, aRange, theTokens, theChildren) + + # rule 'alternation' => %w[ANY OF LPAREN alternatives RPAREN] + when 'alternation_0' + reduce_alternation_0(aProduction, aRange, theTokens, theChildren) + + # rule 'alternatives' => %w[alternatives COMMA quantifiable] + when 'alternatives_0' + reduce_alternatives_0(aProduction, aRange, theTokens, theChildren) + + # rule 'alternatives' => %w[alternatives quantifiable] + when 'alternatives_1' + reduce_alternatives_1(aProduction, aRange, theTokens, theChildren) + + when 'alternatives_2' # rule 'alternatives' => 'quantifiable' + reduce_alternatives_2(aProduction, aRange, theTokens, theChildren) + + when 'grouping' # rule 'grouping' => %w[LPAREN pattern RPAREN] + reduce_grouping_0(aProduction, aRange, theTokens, theChildren) + when 'quantifier_0' # rule 'quantifier' => 'ONCE' multiplicity(1, 1) when 'quantifier_1' # rule 'quantifier' => 'TWICE' multiplicity(2, 2) when 'quantifier_2' # rule 'quantifier' => %w[EXACTLY count TIMES] reduce_quantifier_2(aProduction, aRange, theTokens, theChildren) # rule 'quantifier' => %w[BETWEEN count AND count times_suffix] - when 'quantifier_3' + when 'quantifier_3' reduce_quantifier_3(aProduction, aRange, theTokens, theChildren) when 'quantifier_4' # rule 'quantifier' => 'OPTIONAL' multiplicity(0, 1) @@ -93,14 +169,14 @@ when 'quantifier_6' # rule 'quantifier' => %w[NEVER OR MORE] multiplicity(0, :more) when 'quantifier_7' # rule 'quantifier' => %w[AT LEAST count TIMES] reduce_quantifier_7(aProduction, aRange, theTokens, theChildren) - + # rule 'digit_or_number' => 'DIGIT' # rule 'digit_or_number' => 'NUMER' - when 'digit_or_number_0', 'digit_or_number_1' + when 'digit_or_number_0', 'digit_or_number_1' return_first_child(aRange, theTokens, theChildren) when 'count_0', 'count_1' return_first_child(aRange, theTokens, theChildren) @@ -115,10 +191,32 @@ def multiplicity(lowerBound, upperBound) return SRL::Regex::Multiplicity.new(lowerBound, upperBound, :greedy) end + def string_literal(aString, to_escape = true) + if aString.size > 1 + chars = [] + aString.each_char do |ch| + if to_escape && Regex::Character::MetaChars.include?(ch) + chars << Regex::Character.new("\\") + end + chars << Regex::Character.new(ch) + end + result = Regex::Concatenation.new(*chars) + else + if to_escape && Regex::Character::MetaChars.include?(aString) + result = Regex::Concatenation.new(Regex::Character.new("\\"), + Regex::Character.new(aString)) + else + result = Regex::Character.new(aString) + end + end + + return result + end + def char_range(lowerBound, upperBound) # TODO fix module nesting lower = Regex::Character.new(lowerBound) upper = Regex::Character.new(upperBound) return Regex::CharRange.new(lower, upper) @@ -126,19 +224,37 @@ def char_class(toNegate, *theChildren) Regex::CharClass.new(toNegate, *theChildren) end + def char_shorthand(shortName) + Regex::CharShorthand.new(shortName) + end + + def wildcard() + Regex::Wildcard.new + end + def repetition(expressionToRepeat, aMultiplicity) return Regex::Repetition.new(expressionToRepeat, aMultiplicity) end + + # rule 'pattern' => %w[pattern COMMA quantifiable] + def reduce_pattern_0(aProduction, aRange, theTokens, theChildren) + return Regex::Concatenation.new(theChildren[0], theChildren[2]) + end - # rule 'term' => %w[atom quantifier] - def reduce_term_1(aProduction, aRange, theTokens, theChildren) + # rule 'pattern' => %w[pattern quantifiable] + def reduce_pattern_1(aProduction, aRange, theTokens, theChildren) + return Regex::Concatenation.new(theChildren[0], theChildren[1]) + end + + # rule 'quantifiable' => %w[term quantifier] + def reduce_quantifiable_1(aProduction, aRange, theTokens, theChildren) quantifier = theChildren.last - atom = theChildren.first - repetition(atom, quantifier) + term = theChildren.first + repetition(term, quantifier) end # rule 'letter_range' => %w[LETTER FROM LETTER_LIT TO LETTER_LIT] def reduce_letter_range_0(aProduction, aRange, theTokens, theChildren) lower = theChildren[2].token.lexeme @@ -164,26 +280,106 @@ #rule 'letter_range' => %w[UPPERCASE LETTER] def reduce_letter_range_3(aProduction, aRange, theTokens, theChildren) ch_range = char_range('A', 'Z') char_class(false, ch_range) end - + # rule 'digit_range' => %w[digit_or_number FROM DIGIT_LIT TO DIGIT_LIT] def reduce_digit_range_0(aProduction, aRange, theTokens, theChildren) reduce_letter_range_0(aProduction, aRange, theTokens, theChildren) end # rule 'digit_range' => 'digit_or_number' def reduce_digit_range_1(aProduction, aRange, theTokens, theChildren) - ch_range = char_range('0', '9') - char_class(false, ch_range) + char_shorthand('d') end + # rule 'character_class' => %w[ANY CHARACTER] + def reduce_character_class_0(aProduction, aRange, theTokens, theChildren) + char_shorthand('w') + end + + # rule 'character_class' => %w[NO CHARACTER] + def reduce_character_class_1(aProduction, aRange, theTokens, theChildren) + char_shorthand('W') + end + + # rule 'character_class' => 'WHITESPACE' + def reduce_character_class_2(aProduction, aRange, theTokens, theChildren) + char_shorthand('s') + end + + # rule 'character_class' => %w[NO WHITESPACE] + def reduce_character_class_3(aProduction, aRange, theTokens, theChildren) + char_shorthand('S') + end + + # rule 'character_class' => 'ANYTHING' + def reduce_character_class_4(aProduction, aRange, theTokens, theChildren) + wildcard + end + + # rule 'character_class' => %w[ONE OF STRING_LIT] + def reduce_character_class_5(aProduction, aRange, theTokens, theChildren) + raw_literal = theChildren[-1].token.lexeme.dup + alternatives = raw_literal.chars.map { |ch| Regex::Character.new(ch) } + return Regex::CharClass.new(false, *alternatives) # TODO check other implementations + end + + # rule 'special_char' => 'TAB' + def reduce_special_char_0(aProduction, aRange, theTokens, theChildren) + Regex::Character.new('\t') + end + + # rule 'special_char' => 'BACKSLASH' + def reduce_special_char_1(aProduction, aRange, theTokens, theChildren) + Regex::Character.new('\\') + end + + # rule 'special_char' => %w[NEW LINE] + def reduce_special_char_2(aProduction, aRange, theTokens, theChildren) + # TODO: control portability + Regex::Character.new('\n') + end + + # rule 'literal' => %[LITERALLY STRING_LIT] + def reduce_literal_0(aProduction, aRange, theTokens, theChildren) + # What if literal is empty?... + + raw_literal = theChildren[-1].token.lexeme.dup + return string_literal(raw_literal) + end + + # rule 'alternation' => %w[ANY OF LPAREN alternatives RPAREN] + def reduce_alternation_0(aProduction, aRange, theTokens, theChildren) + return Regex::Alternation.new(*theChildren[3]) + end + + # rule 'alternatives' => %w[alternatives COMMA quantifiable] + def reduce_alternatives_0(aProduction, aRange, theTokens, theChildren) + return theChildren[0] << theChildren[-1] + end + + # rule 'alternatives' => %w[alternatives quantifiable] + def reduce_alternatives_1(aProduction, aRange, theTokens, theChildren) + return theChildren[0] << theChildren[-1] + end + + # rule 'alternatives' => 'quantifiable' + def reduce_alternatives_2(aProduction, aRange, theTokens, theChildren) + return [theChildren.last] + end + + # rule 'grouping' => %w[LPAREN pattern RPAREN] + def reduce_grouping_0(aProduction, aRange, theTokens, theChildren) + return Regex::NonCapturingGroup.new(theChildren[1]) + end + # rule 'quantifier' => %w[EXACTLY count TIMES] def reduce_quantifier_2(aProduction, aRange, theTokens, theChildren) count = theChildren[1].token.lexeme.to_i multiplicity(count, count) - end + end # rule 'quantifier' => %w[BETWEEN count AND count times_suffix] def reduce_quantifier_3(aProduction, aRange, theTokens, theChildren) lower = theChildren[1].token.lexeme.to_i upper = theChildren[3].token.lexeme.to_i