require 'strscan' module Packrat; end # A version of puts that limits output to 80 columns width def lputs(str) tabs = 0 len = (0...(str.length)).inject(0) do |l,i| if str[i,1] == "\t" tabs += 1 l + 8 else l + 1 end end if len > 80 s = str[0,80-3-(tabs*8)] + "..." else s = str end puts s end class Regexp def to_packrat_grammar_element Packrat::RegexpLiteral.new(self) end end class Symbol def to_packrat_grammar_element Packrat::RuleRef.new(self) end end class String def to_packrat_grammar_element Packrat::StringLiteral.new(self) end end class Packrat::GrammarElement def to_packrat_grammar_element; self; end # A GrammarElement is hidden if it does not produce a result that should # be used in any way. This is mostly used for whitespace. attr_accessor :hidden end class Packrat::RegexpLiteral < Packrat::GrammarElement def initialize(re) @re = re end def inspect; @re.inspect; end end # A StringLiteral works like a RegexpLiteral. The only reason we use # a special class for it is so that we can inspect it in a more natural way # (as a string instead of a Regexp). class Packrat::StringLiteral < Packrat::RegexpLiteral def initialize(str) super(Regexp.new(Regexp.escape(str))) @str = str end def inspect; @str.inspect; end end class Packrat::RuleRef < Packrat::GrammarElement attr_reader :rule_name def initialize(ruleName) @rule_name = ruleName end def inspect; @rule_name.inspect; end end # A grammar Rule is a set of one or more Productions for the same # (lhs) nonterminal. It makes an ordered choice between its productions # by trying to parse with them in order. class Packrat::Rule attr_reader :name, :prods, :grammar def initialize(name, prods = []) @name, @prods = name, prods end def grammar=(grammar) @grammar = grammar @prods.each {|p| p.grammar = grammar} end def <<(prod) @prods << prod end def inspect s = "#{name.to_s} ->" "\n" + s + " " + @prods.map {|p| p.inspect(false)}.join("\n" + " " * (s.length - 1) + "| ") end end # A grammar Production is sequence of rhs elements describing how the lhs # symbol should be parsed. class Packrat::Production attr_accessor :grammar attr_reader :name, :rhs, :result_modifier def initialize(name, rhs) @name, @rhs = name, rhs if Packrat::ResultModifier === rhs.last @result_modifier = @rhs.pop else # Default modifier is to create a Sexpr with the production name # as the head of the returned array. @result_modifier = Packrat::SexprModifier.new(@name) end @rhs.map! {|e| e.to_packrat_grammar_element} end def inspect(withLhs = true) rhs = @rhs.map {|e| e.inspect}.join(' ') withLhs ? "#{name.to_s} -> " + rhs : rhs end end # Report results of parsing a prod or grammar element class Packrat::ErrorReporter < Packrat::GrammarElement def initialize(sub) @sub = sub end def parse(parser) res = @sub.parse(parser) if res == false lputs "\t\t\t FAIL #{@sub.inspect}" puts "" else lputs " Match #{@sub.inspect}" puts "" end res end def method_missing(method, *args) @sub.send(method, *args) end end module Packrat::GrammarBuild attr_reader :start def start_symbol(name); @start = name; end def rules; @rules ||= (Hash.new {|h,k| h[k] = Packrat::Rule.new(k)}); end def rule(name, rhss) rhss.each {|rhs| prod(name, rhs)} end def prod(name, rhs) pr = Packrat::Production.new(name, rhs) pr = Packrat::ErrorReporter.new(pr) if $DEBUG rules[name] << pr end def [](name); @rules[name]; end def start_rule; self[self.start]; end def hidden(elem) e = elem.to_packrat_grammar_element e.hidden = true e end # Finalize the building of the grammar by conducting postprocessing. def finalize! postprocess_set_grammar_on_rules postprocess_create_ast_classes end def postprocess_set_grammar_on_rules rules.values.each {|r| r.grammar = self} end def postprocess_create_ast_classes each_prod do |p| if Packrat::ASTBuilder === p.result_modifier ast_class(p.result_modifier.name, p) end end end def each_prod rules.values.each {|r| r.prods.each {|p| yield(p)}} end end class Packrat::Grammar extend Packrat::GrammarBuild class < for the given . # If not previously created we create it and add it to the Tree module. def ast_class(name, prod) acn = ast_class_name(name) begin const_get("ASTs").const_get(acn) rescue const_get("ASTs").const_set(acn, make_ast_class(acn, prod)) end end def ast_class_name(name) s = name.to_s s[0,1].upcase + s[1..-1] end def make_ast_class(klassName, production) Packrat::AST.new_subclass(klassName, production) end end # Node in AST trees. class Packrat::AST class <. The sig has strings in the # positions where the production rhs has a String or StringLiteral, # has symbols in the positions where a rhs element refer to another # production, and has nil in other positions. def extract_sig(production) production.rhs.map do |e| case e when String e when Packrat::StringLiteral e.inspect # gives us the string itself when Packrat::RuleRef sub_element_name(e.rule_name) else nil # Expand this so that names are lifted out of Maybe, and "s" is added when plus and mult etc end end end def sub_element_name(name) parts = name.to_s.split(/([A-Z][a-z0-9]*)/).select {|e| e.length > 0} parts.map {|p| p.downcase}.join("_").intern end def [](*args); new(*args); end end def initialize(*children) @children = children end attr_reader :children def [](index); @children[index]; end def ==(other) self.class == other.class && @children == other.children end def inspect self.class.inspect.split("::").last + "[" + @children.map {|c| c.inspect}.join(", ") + "]" end end class Packrat::InterpretingParser class < from the given . def lexeme(pos, len) @str[pos, len] end # Skip using at the current position in the string. Returns nil # if the re did not match or the length of the match if it matched. def skip(re) @s.skip(re) end end class Packrat::ErrorLoggingInterpretingParser < Packrat::InterpretingParser def skip(re) oldpos = pos r = super if r endp = pos - ((r > 0) ? 1 : 0) puts "#{oldpos.to_s.rjust(3)} - #{endp.to_s.ljust(3)} #{lexeme(oldpos,r).inspect} #{re.inspect}" else puts "\t\t\tNOT #{re.inspect}" end r end end class Packrat::RegexpLiteral def parse(parser) oldpos = parser.pos len = parser.skip(@re) len ? parser.lexeme(oldpos, len) : false end end class Packrat::Production def parse(parser) result = [] @rhs.each do |e| res = e.parse(parser) if res == false return false else result << res unless e.hidden end end return @result_modifier.modify_result(self, result) end end class Packrat::Rule def parse(parser) prods.each do |prod| oldpos = parser.pos res = prod.parse(parser) return res if res parser.pos = oldpos end return false end end class Packrat::RuleRef def parse(parser) parser.grammar[@rule_name].parse(parser) end end class Packrat::Repeat def parse(parser) result_list = [] oldpos = parser.pos while (res = @sub.parse(parser)) result_list << res end if valid_result?(result_list) return result_list else parser.pos = oldpos return false end end def valid_result?(list) return false if @min && list.length < @min return false if @max && list.length > @max true end end