lib/packrat/grammar.rb in rockit-0.7.1 vs lib/packrat/grammar.rb in rockit-0.7.2

- old
+ new

@@ -55,15 +55,16 @@ # A StringLiteral works like a RegexpLiteral. The only reason we use # a special class for it is so that we can inspect it in a more natural way # (as a string instead of a Regexp). class Packrat::StringLiteral < Packrat::RegexpLiteral + attr_reader :string def initialize(str) super(Regexp.new(Regexp.escape(str))) - @str = str + @string = str end - def inspect; @str.inspect; end + def inspect; @string.inspect; end end class Packrat::RuleRef < Packrat::GrammarElement attr_reader :rule_name def initialize(ruleName) @@ -97,24 +98,27 @@ # A grammar Production is sequence of rhs elements describing how the lhs # symbol should be parsed. class Packrat::Production attr_accessor :grammar - attr_reader :name, :rhs, :result_modifier + attr_reader :name, :rhs, :result_producer def initialize(name, rhs) @name, @rhs = name, rhs - if Packrat::ResultModifier === rhs.last - @result_modifier = @rhs.pop + if Packrat::ResultProducer === rhs.last + @result_producer = @rhs.pop else - # Default modifier is to create a Sexpr with the production name + # Default producer is to create a Sexpr with the production name # as the head of the returned array. - @result_modifier = Packrat::SexprModifier.new(@name) + @result_producer = Packrat::SexprProducer.new(@name) end @rhs.map! {|e| e.to_packrat_grammar_element} end + def finalize! + @result_producer.production = self + end def inspect(withLhs = true) - rhs = @rhs.map {|e| e.inspect}.join(' ') + rhs = @rhs.map {|e| e.hidden ? nil : e.inspect}.compact.join(' ') withLhs ? "#{name.to_s} -> " + rhs : rhs end end # Report results of parsing a prod or grammar element @@ -122,29 +126,30 @@ def initialize(sub) @sub = sub end def parse(parser) res = @sub.parse(parser) - if res == false + if false == res lputs "\t\t\t FAIL #{@sub.inspect}" puts "" else - lputs " Match #{@sub.inspect}" + lputs " #{parser.pos}: Match #{@sub.inspect}" puts "" end res end + def inspect; @sub.inspect; end def method_missing(method, *args) @sub.send(method, *args) end end module Packrat::GrammarBuild attr_reader :start def start_symbol(name); @start = name; end def rules; @rules ||= (Hash.new {|h,k| h[k] = Packrat::Rule.new(k)}); end - def rule(name, rhss) + def rule(name, *rhss) rhss.each {|rhs| prod(name, rhs)} end def prod(name, rhs) pr = Packrat::Production.new(name, rhs) pr = Packrat::ErrorReporter.new(pr) if $DEBUG @@ -158,24 +163,20 @@ e end # Finalize the building of the grammar by conducting postprocessing. def finalize! postprocess_set_grammar_on_rules - postprocess_create_ast_classes + each_prod {|p| p.finalize!} end def postprocess_set_grammar_on_rules - rules.values.each {|r| r.grammar = self} + each_prod {|r| r.grammar = self} end - def postprocess_create_ast_classes - each_prod do |p| - if Packrat::ASTBuilder === p.result_modifier - ast_class(p.result_modifier.name, p) - end - end + def each_rule + rules.values.each {|r| yield(r)} end def each_prod - rules.values.each {|r| r.prods.each {|p| yield(p)}} + each_rule {|r| r.prods.each {|p| yield(p)}} end end class Packrat::Grammar extend Packrat::GrammarBuild @@ -223,55 +224,64 @@ def initialize(sub) @sub = sub.to_packrat_grammar_element end def parse(parser) res = @sub.parse(parser) - return res if res - return nil + false == res ? nil : res end def inspect "(#{@sub.inspect})?" end end module Packrat::GrammarBuild def maybe(element); Packrat::Maybe.new(element); end end -# The last element of a prod can be a result modifier that modifies -# the result to be returned by the prod when parsing. -class Packrat::ResultModifier +# The last element of a prod can be a result producer that produces +# the result to be returned by the prod in case of a successfull parse. +class Packrat::ResultProducer + # Before any results are produced we need to know the prod we are in + def production=(prod); @prod = prod; end + + # A ResultProducer returns a result which it then updates. This is needed + # since multiple results can be in production at the same time. + def new_result; end + def update_result(res, subres, elem, index); end + def finalize_result(res); res; end end # Create a Sexpr based on the name of the matched production and the # result-array. -class Packrat::SexprModifier < Packrat::ResultModifier +class Packrat::SexprProducer < Packrat::ResultProducer def initialize(name) @name = name end - def modify_result(prod, result) - # Add the production name in the front - result.unshift @name - result - end + def new_result; [@name]; end + def update_result(res, subres, elem, index, nhi); res << subres; end end # Lift one of the sub-results as the result from parsing a production. -class Packrat::LiftModifier < Packrat::ResultModifier +# Optionally a block can be given. If so the block will get called with +# the lifted result and can modify it. +class Packrat::LiftOneResultProducer < Packrat::ResultProducer def initialize(valueIndex, &block) @value_index = valueIndex @block = block end - def modify_result(prod, result) - extracted_result = result[@value_index] - @block? @block.call(extracted_result) : extracted_result + def new_result; nil; end + def update_result(res, subres, elem, index, nonhiddenIndex) + index == @value_index ? subres : res end + def finalize_result(res) + @block ? @block.call(res) : res + end end module Packrat::GrammarBuild - def sexpr(name); Packrat::SexprModifier.new(name); end - def lift(index, &b); Packrat::LiftModifier.new(index, &b); end + def sexpr(name); Packrat::SexprProducer.new(name); end + def lift(index, &b); Packrat::LiftOneResultProducer.new(index, &b); end end module Packrat::GrammarBuild # any() can be implemented in many ways but if all the sub-elements are # strings we simply create a regexp matching any of them. If they are not @@ -280,11 +290,11 @@ if subs.all? {|e| String === e} re_string = subs.map {|s| "(" + Regexp.escape(s) + ")"}.join("|") Packrat::RegexpLiteral.new(Regexp.new(re_string)) else name = internal_rule_name() - rule(name, subs.map {|s| [s, lift(0)]}) + rule(name, *subs.map {|s| [s, lift(0)]}) Packrat::RuleRef.new(name) end end def next_internal_rule_num @@ -306,54 +316,68 @@ module Packrat::GrammarBuild def eos(); hidden(Packrat::EOS.new); end end -class Packrat::ASTBuilder < Packrat::ResultModifier +# Build AST tree as result of parsing a Production. +class Packrat::ASTBuilder < Packrat::ResultProducer attr_reader :name - def initialize(nodeName) - @name = nodeName + def initialize(nodeName, nameMap = {}) + @name, @name_map = nodeName, nameMap end - def modify_result(prod, result) - astklass = prod.grammar.ast_class(@name, prod) - astklass.new(*result) + def production=(prod) + super + @ast_class = prod.grammar.ast_class(@name, prod, @name_map) end + + def new_result; Array.new; end + def update_result(res, subres, elem, index, nhIndex) + res << subres unless @ast_class.constant_elem_at?(nhIndex) + res + end + def finalize_result(res) + @ast_class.new(res, {:only_nonconstant => true}) + end end module Packrat::GrammarBuild - def ast(name) - Packrat::ASTBuilder.new(name) + def ast(name, options = {}) + Packrat::ASTBuilder.new(name, options) end # Return the ast class with the given <nodeName> for the given <production>. # If not previously created we create it and add it to the Tree module. - def ast_class(name, prod) + def ast_class(name, prod, nameMap) acn = ast_class_name(name) begin const_get("ASTs").const_get(acn) rescue - const_get("ASTs").const_set(acn, make_ast_class(acn, prod)) + const_get("ASTs").const_set(acn, make_ast_class(acn, prod, nameMap)) end end def ast_class_name(name) s = name.to_s s[0,1].upcase + s[1..-1] end - def make_ast_class(klassName, production) - Packrat::AST.new_subclass(klassName, production) + def make_ast_class(klassName, production, nameMap) + Packrat::AST.new_subclass(klassName, production, nameMap) end end # Node in AST trees. class Packrat::AST class <<self attr_accessor :sig - def new_subclass(name, production) + # Create a new AST subclass. The <nameMap> hash can specify names + # for certain element indices (such explicitly specified names + # will override the default naming scheme which is to use a downcase + # version of the production name). + def new_subclass(name, production, nameMap = {}) klass = Class.new(self) - klass.sig = extract_sig(production) + klass.sig = extract_sig(production, nameMap) # Add accessor methods for all symbols in the sig num_strings = 0 klass.sig.each_with_index do |sn, i| if Symbol === sn # We should subtract the num_strings in the index below @@ -372,45 +396,72 @@ end # Return a sig for the given <production>. The sig has strings in the # positions where the production rhs has a String or StringLiteral, # has symbols in the positions where a rhs element refer to another - # production, and has nil in other positions. - def extract_sig(production) - production.rhs.map do |e| - case e - when String - e - when Packrat::StringLiteral - e.inspect # gives us the string itself - when Packrat::RuleRef - sub_element_name(e.rule_name) - else - nil # Expand this so that names are lifted out of Maybe, and "s" is added when plus and mult etc + # production, and has nil in other positions. The <nameMap> can contain + # explicit names for certing indices (indices as key and name as symbol + # value). + def extract_sig(production, nameMap = {}) + sig = [] + production.rhs.each_with_index do |e, i| + unless e.hidden + case e + when String + sig << e + when Packrat::StringLiteral + sig << e.string + when Packrat::RuleRef + sig << sub_element_name(e.rule_name) + else + sig << nil # Expand this so that names are lifted out of Maybe, and "s" is added when plus and mult etc + end end end + number_multioccurences(sig).map {|n| nameMap[n] || n} end - + + def number_multioccurences(sig) + num_sigs = sig.inject(Hash.new(0)) {|h, s| h[s] += 1 if Symbol === s; h} + counters = Hash.new(0) + sig.map do |s| + (num_sigs[s] > 1) ? (s.to_s + (counters[s] += 1).to_s).intern : s + end + end + def sub_element_name(name) parts = name.to_s.split(/([A-Z][a-z0-9]*)/).select {|e| e.length > 0} parts.map {|p| p.downcase}.join("_").intern end - def [](*args); new(*args); end + def constant_elem_at?(index) + self.sig[index].kind_of?(String) + end + + def [](*args); new(args); end end - - def initialize(*children) - @children = children + + DefaultOptions = {:only_nonconstant => true} + + def initialize(children, options = {}) + options = DefaultOptions.clone.update(options) + if options[:only_nonconstant] + @children = self.class.sig.map do |n| + n.kind_of?(String) ? n : children.shift + end + else + @children = children + end end attr_reader :children def [](index); @children[index]; end def ==(other) self.class == other.class && @children == other.children end - + def inspect self.class.inspect.split("::").last + "[" + @children.map {|c| c.inspect}.join(", ") + "]" end end @@ -421,14 +472,14 @@ def new_subclass(grammar) klass = Class.new(self) klass.grammar = grammar klass end - def parse_string(str) + def parse_string(str, startSymbol = nil) # We always add a whitespace since StringScanner cannot match /\s*/ # (typically used as whitespace) at EOS - new(str + " ").parse_string + new(str + " ").parse_string(startSymbol) end end attr_reader :results, :grammar @@ -436,12 +487,13 @@ @str = string @s = StringScanner.new(string) @grammar = self.class.grammar end - def parse_string - @grammar.start_rule.parse(self) + def parse_string(startSymbol = nil) + startSymbol ||= @grammar.start + @grammar[startSymbol].parse(self) end # Get and Set current position in string. def pos; @s.pos; end def pos=(p); @s.pos = p; end @@ -482,29 +534,31 @@ end end class Packrat::Production def parse(parser) - result = [] - @rhs.each do |e| - res = e.parse(parser) - if res == false - return false - else - result << res unless e.hidden + res = @result_producer.new_result + nonhidden_index = 0 + @rhs.each_with_index do |e, i| + subres = e.parse(parser) + return false if false == subres + unless e.hidden + res = @result_producer.update_result(res, subres, e, + i, nonhidden_index) + nonhidden_index += 1 end end - return @result_modifier.modify_result(self, result) + return @result_producer.finalize_result(res) end end class Packrat::Rule def parse(parser) + oldpos = parser.pos prods.each do |prod| - oldpos = parser.pos res = prod.parse(parser) - return res if res + return res unless false == res parser.pos = oldpos end return false end end @@ -517,9 +571,10 @@ class Packrat::Repeat def parse(parser) result_list = [] oldpos = parser.pos + # XXX: Should we take only amx number of results here if max != false? while (res = @sub.parse(parser)) result_list << res end if valid_result?(result_list) return result_list