=begin See README TODO-FEATURE: :pre_delimiter option TODO-FEATURE: The "expecting" feature is so good I wonder if we should add the ability to automatically repair the parse! This would need: a) default values for regex termainals (string terminals are their own default values) default values for regex should be verified to match the regex b) an interactive prompter if there is more than one option TODO-IMPROVEMENT: "Expecting" should show line numbers instead of char numbers, but it should only calculated on demand. This means we need a smarter formatter for our possible-error-logging. TODO-IMPROVEMENT: "Expecting" code lines dump should show line numbers TODO-BUG: "Expecting" doesn't do the right thing of a "dont" clause matched Should say "something other than #{the don't clause}" Ideally, we would continue matching and list all the possible next clauses that would allow us to continue IDEA: could use the "-" prefix operator to mean "dont": -"this" -:that -match(:foo) -many(:foo) TODO-OPTIMIZATION: add memoizing (caching / dynamic-programming) to guarantee linear time parsing http://en.wikipedia.org/wiki/Parsing_expression_grammar#Implementing_parsers_from_parsing_expression_grammars =end require File.dirname(__FILE__) + "/nodes.rb" class String def camelize self.split("_").collect {|a| a.capitalize}.join end def first_lines(n) lines=self.split("\n",-1) lines.length<=n ? self : lines[0..n-1].join("\n") end def last_lines(n) lines=self.split("\n",-1) lines.length<=n ? self : lines[-n..-1].join("\n") end def line_col(offset) lines=self[0..offset-1].split("\n") return lines.length, lines[-1].length end end module BabelBridge VERSION = "0.1.1" # hash which can be used declaratively class PatternElementHash < Hash def method_missing(method_name, *args) #method_name is a symbol return self if args.length==1 && !args[0] # if nil is provided, don't set anything self[method_name]=args[0] || true # on the other hand, if no args are provided, assume true self end end # PatternElement provides optimized parsing for each Element of a pattern # PatternElement provides all the logic for parsing: # :many # :optional class PatternElement attr_accessor :parser,:optional,:negative,:name,:terminal,:could_match attr_accessor :match,:rule_variant #match can be: # true, Hash, Symbol, String, Regexp def initialize(match,rule_variant) self.rule_variant=rule_variant init(match) raise "pattern element cannot be both :dont and :optional" if negative && optional end def to_s match.inspect end def parse(parent_node) # run element parser match=parser.call(parent_node) # Negative patterns (PEG: !element) match=match ? nil : EmptyNode.new(parent_node) if negative # Optional patterns (PEG: element?) match=EmptyNode.new(parent_node) if !match && optional # Could-match patterns (PEG: &element) match.match_length=0 if match && could_match # return match match end private def init(match) self.match=match case match when TrueClass then init_true when Hash then init_hash match when Symbol then init_rule match when String then init_string match when Regexp then init_regex match else raise "invalid pattern type: #{match.inspect}" end end def init_rule(rule_name) rule_name.to_s[/^([^?!]*)([?!])?$/] rule_name=$1.to_sym option=$2 match_rule=rule_variant.rule.parser.rules[rule_name] raise "no rule for #{rule_name}" unless match_rule self.parser =lambda {|parent_node| match_rule.parse(parent_node)} self.name = rule_name case option when "?" then self.optional=true when "!" then self.negative=true end end def init_hash(hash) if hash[:parser] self.parser=hash[:parser] elsif hash[:many] init hash[:many] #generate parser for poly delimiter_pattern_element= PatternElement.new(hash[:delimiter]||true,rule_variant) post_delimiter_element=case hash[:post_delimiter] when TrueClass then delimiter_pattern_element when nil then nil else PatternElement.new(hash[:post_delimiter],rule_variant) end # convert the single element parser into a poly-parser single_parser=parser self.parser= lambda do |parent_node| last_match=single_parser.call(parent_node) many_node=ManyNode.new(parent_node) while last_match many_node<0 # pop the post delimiter matched with delimiter_pattern_element many_node.delimiter_matches.pop if many_node.length==many_node.delimiter_matches.length # If post_delimiter is requested, many_node and delimiter_matches must be the same length if post_delimiter_element post_delimiter_match=post_delimiter_element.parse(many_node) # fail if post_delimiter didn't match return nil unless post_delimiter_match many_node.delimiter_matches< or a :match=> set" end self.name = hash[:as] || self.name self.optional ||= hash[:optional] || hash[:optionally] self.could_match ||= hash[:could] self.negative ||= hash[:dont] end # "true" parser always matches the empty string def init_true self.parser=lambda {|parent_node| EmptyNode.new(parent_node)} end # parser that matches exactly the string specified def init_string(string) self.parser=lambda {|parent_node| parent_node.src[parent_node.next,string.length]==string && TerminalNode.new(parent_node,string.length,string)} self.terminal=true end # parser that matches the given regex def init_regex(regex) self.parser=lambda {|parent_node| offset=parent_node.next;parent_node.src.index(regex,offset)==offset && (o=$~.offset(0)) && TerminalNode.new(parent_node,o[1]-o[0],regex)} self.terminal=true end end # Each Rule has one or more RuleVariant # Rules attempt to match each of their Variants in order. The first one to succeed returns true and the Rule succeeds. class RuleVariant attr_accessor :pattern,:rule,:node_class def initialize(pattern,rule,node_class=nil) self.pattern=pattern self.rule=rule self.node_class=node_class end def inspect pattern.collect{|a|a.inspect}.join(', ') end def to_s "variant_class: #{node_class}, pattern: #{inspect}" end # convert the pattern into a set of lamba functions def pattern_elements @pattern_elements||=pattern.collect { |match| PatternElement.new match, self } end # returns a Node object if it matches, nil otherwise def parse(parent_node) #return parse_nongreedy_optional(src,offset,parent_node) # nongreedy optionals break standard PEG node=node_class.new(parent_node) pattern_elements.each do |pe| match=pe.parse(node) # if parse failed if !match if pe.terminal # log failures on Terminal patterns for debug output if overall parse fails node.parser.log_parsing_failure(node.next,:pattern=>pe.match,:node=>node) end return nil end # parse succeeded, add to node and continue node.add_match(match,pe.name) end node end end # Rules define one or more patterns (RuleVariants) to match for a given non-terminal class Rule attr_accessor :name,:variants,:parser,:node_class def initialize(name,parser) self.name=name self.variants=[] self.parser=parser class_name = "#{parser.module_name}_#{name}_node".camelize self.node_class = parser.const_set(class_name,Class.new(NodeNT)) end def add_variant(pattern, &block) rule_variant_class_name = "#{name}_node#{self.variants.length+1}".camelize rule_variant_class = parser.const_set(rule_variant_class_name,Class.new(node_class)) self.variants << RuleVariant.new(pattern,self,rule_variant_class) rule_variant_class.class_eval &block if block rule_variant_class end def parse(node) if cached=node.parser.cached(name,node.next) return cached==:no_match ? nil : cached # return nil if cached==:no_matched end variants.each do |v| match=v.parse(node) if match node.parser.cache_match(name,match) return match end end node.parser.cache_no_match(name,node.next) nil end # inspect returns a string which approximates the syntax for generating the rule and all its variants def inspect variants.collect do |v| "rule #{name.inspect}, #{v.inspect}" end.join("\n") end # returns a more human-readable explanation of the rule def to_s "rule #{name.inspect}, node_class: #{node_class}\n\t"+ "#{variants.collect {|v|v.to_s}.join("\n\t")}" end end # primary object used by the client # Used to generate the grammer with .rule methods # Used to parse with .parse class Parser # Parser sub-class grammaer definition # These methods are used in the creation of a Parser Sub-Class to define # its grammar class <failure_index key=expecting[:pattern] @expecting_list={key=>expecting} @failure_index = index elsif index == failure_index key=expecting[:pattern] self.expecting_list[key]=expecting else # ignored end end def parse(src,offset=0,rule=nil) reset_parser_tracking @start_time=Time.now self.src=src root_node=RootNode.new(self) ret=self.class[rule||self.class.root_rule].parse(root_node) unless rule if ret if ret.next ") end def parser_failure_info return unless src bracketing_lines=5 line,col=src.line_col(failure_index) ret=<<-ENDTXT Parsing error at line #{line} column #{col} offset #{failure_index} Source: ... #{(failure_index==0 ? "" : src[0..(failure_index-1)]).last_lines(bracketing_lines)}#{src[(failure_index)..-1].first_lines(bracketing_lines)} ... ENDTXT if @parsing_did_not_match_entire_input ret+="\nParser did not match entire input." else common_root=nil expecting_list.values.each do |e| node=e[:node] pl=node.parent_list if common_root common_root.each_index do |i| if pl[i]!=common_root[i] common_root=common_root[0..i-1] break end end else common_root=node.parent_list end end ret+=<1 ? ' one of' : ''}: #{expecting_list.values.collect do |a| list=node_list_string(a[:node].parent_list,common_root) [list,"#{a[:pattern].inspect} (#{list})"] end.sort.map{|i|i[1]}.join("\n ")} ENDTXT end ret end end end