lib/citrus.rb in citrus-1.7.0 vs lib/citrus.rb in citrus-1.8.0

- old
+ new

@@ -2,21 +2,24 @@ # elegance and expressiveness of the language with the simplicity and power of # parsing expressions. # # http://mjijackson.com/citrus module Citrus - VERSION = [1, 7, 0] - - Infinity = 1.0 / 0 - autoload :File, 'citrus/file' + VERSION = [1, 8, 0] + # Returns the current version of Citrus as a string. def self.version VERSION.join('.') end + # A pattern to match any character, including \n. + DOT = /./m + + Infinity = 1.0 / 0 + F = ::File # Loads the grammar from the given +file+ into the global scope using #eval. def self.load(file) file << '.citrus' unless F.file?(file) @@ -24,11 +27,12 @@ raise "Cannot read file #{file}" unless F.readable?(file) self.eval(F.read(file)) end # Evaluates the given Citrus parsing expression grammar +code+ in the global - # scope. Returns an array of any grammar modules that were created. + # scope. The +code+ may contain the definition of any number of modules. + # Returns an array of any grammar modules that are created. def self.eval(code) File.parse(code).value end # This error is raised whenever a parse fails. @@ -175,10 +179,16 @@ @root = name.to_sym if name # The first rule in a grammar is the default root. @root || rule_names.first end + # Creates a new rule that will match any single character. A block may be + # provided to specify semantic behavior (via #ext). + def dot(&block) + ext(Rule.new(DOT), block) + end + # Creates a new Super for the rule currently being defined in the grammar. A # block may be provided to specify semantic behavior (via #ext). def sup(&block) ext(Super.new, block) end @@ -193,21 +203,29 @@ # to specify semantic behavior (via #ext). def notp(rule, &block) ext(NotPredicate.new(rule), block) end + # Creates a new ButPredicate using the given +rule+. A block may be provided + # to specify semantic behavior (via #ext). + def but(rule, &block) + ext(ButPredicate.new(rule), block) + end + + alias butp but # For consistency with #andp and #notp. + # Creates a new Label using the given +rule+ and +label+. A block may be # provided to specify semantic behavior (via #ext). def label(rule, label, &block) - ext(Label.new(label, rule), block) + ext(Label.new(rule, label), block) end # Creates a new Repeat using the given +rule+. +min+ and +max+ specify the # minimum and maximum number of times the rule must match. A block may be # provided to specify semantic behavior (via #ext). def rep(rule, min=1, max=Infinity, &block) - ext(Repeat.new(min, max, rule), block) + ext(Repeat.new(rule, min, max), block) end # An alias for #rep. def one_or_more(rule, &block) rep(rule, &block) @@ -254,11 +272,17 @@ root_rule = rule(opts[:root]) raise 'No rule named "%s"' % root unless root_rule input = Input.new(string, opts[:memoize]) - input.match(root_rule, opts[:offset]) or raise ParseError.new(input) + match = input.match(root_rule, opts[:offset]) + + if match.nil? || (opts[:consume] && input.length != match.length) + raise ParseError.new(input) + end + + match end # The default set of options that is used in #parse. The options hash may # have any of the following keys: # @@ -268,14 +292,18 @@ # memoize:: If this is +true+ the matches generated during a parse are # memoized. This technique (also known as Packrat parsing) # guarantees parsers will operate in linear time but costs # significantly more in terms of time and memory required. # Defaults to +false+. + # consume:: If this is +true+ a ParseError will be raised during a parse + # unless the entire input string is consumed. Defaults to + # +false+. def default_parse_options { :offset => 0, :root => root, - :memoize => false + :memoize => false, + :consume => false } end end # This class represents the core of the parsing algorithm. It wraps the input @@ -339,10 +367,18 @@ end # A Rule is an object that is used by a grammar to create matches on the # Input during parsing. module Rule + # Evaluates the given expression and creates a new rule object from it. + # + # Citrus::Rule.eval('"a" | "b"') + # + def self.eval(expr) + File.parse(expr, :root => :rule_body).value + end + # Returns a new Rule object depending on the type of object given. def self.new(obj) case obj when Rule then obj when Symbol then Alias.new(obj) @@ -354,18 +390,10 @@ else raise ArgumentError, "Invalid rule object: #{obj.inspect}" end end - # Creates a new rule object from the given expression. - # - # Citrus::Rule.create('"a" | "b"') - # - def self.create(expr) - File.parse(expr, :root => :rule_body).value - end - @unique_id = 0 # Generates a new rule id. def self.new_id @unique_id += 1 @@ -389,11 +417,22 @@ # Specifies a module that will be used to extend all Match objects that # result from this rule. If +mod+ is a Proc, it is used to create an # anonymous module. def extension=(mod) - mod = Module.new(&mod) if Proc === mod + if Proc === mod + begin + tmp = Module.new(&mod) + raise ArgumentError unless tmp.instance_methods.any? + mod = tmp + rescue ArgumentError, NameError, NoMethodError + mod = Module.new { define_method(:value, &mod) } + end + end + + raise ArgumentError unless Module === mod + @extension = mod end # The module this rule uses to extend new matches. attr_reader :extension @@ -420,11 +459,11 @@ end private def extend_match(match, name) - match.extensions << extension if extension + match.extend(extension) if extension match.names << name if name match end def create_match(data, offset) @@ -552,11 +591,11 @@ end # Returns the Match for this rule on +input+ at the given +offset+, +nil+ if # no match can be made. def match(input, offset=0) - create_match(rule.dup, offset) if rule == input[offset, rule.length] + create_match(rule.dup, offset) if input[offset, rule.length] == rule end end # An Expression is a Terminal that has the same semantics as a regular # expression in Ruby. The expression must match at the beginning of the input @@ -610,11 +649,11 @@ # A Predicate is a Nonterminal that contains one other rule. module Predicate include Nonterminal def initialize(rule='') - super([ rule ]) + super([rule]) end # Returns the Rule object this rule uses to match. def rule rules[0] @@ -661,44 +700,76 @@ def to_s '!' + rule.embed end end + # A ButPredicate is a Predicate that consumes all characters until its rule + # matches. It must match at least one character in order to succeed. The + # Citrus notation is any expression preceded by a tilde, e.g.: + # + # ~expr + # + class ButPredicate + include Predicate + + DOT_RULE = Rule.new(DOT) + + # Returns the Match for this rule on +input+ at the given +offset+, +nil+ if + # no match can be made. + def match(input, offset=0) + matches = [] + os = offset + while input.match(rule, os).nil? + m = input.match(DOT_RULE, os) + break unless m + matches << m + os += m.length + end + # Create a single match from the aggregate text value of all submatches. + create_match(matches.join, offset) if matches.any? + end + + # Returns the Citrus notation of this rule as a string. + def to_s + '~' + rule.embed + end + end + # A Label is a Predicate that applies a new name to any matches made by its # rule. The Citrus notation is any sequence of word characters (i.e. # <tt>[a-zA-Z0-9_]</tt>) followed by a colon, followed by any other # expression, e.g.: # # label:expr # class Label include Predicate - def initialize(label_name='<label>', rule='') + def initialize(rule='', label='<label>') super(rule) - self.label_name = label_name + self.label = label end # Sets the name of this label. - def label_name=(label_name) - @label_name = label_name.to_sym + def label=(label) + @label = label.to_sym end - # The name this rule adds to all its matches. - attr_reader :label_name + # The label this rule adds to all its matches. + attr_reader :label # Returns the Match for this rule on +input+ at the given +offset+, +nil+ if # no match can be made. When a Label makes a match, it re-names the match to # the value of its label. def match(input, offset=0) - m = rule.match(input, offset) - extend_match(m, label_name) if m + m = input.match(rule, offset) + extend_match(m, label) if m end # Returns the Citrus notation of this rule as a string. def to_s - label_name.to_s + ':' + rule.embed + label.to_s + ':' + rule.embed end end # A Repeat is a Predicate that specifies a minimum and maximum number of times # its rule must match. The Citrus notation is an integer, +N+, followed by an @@ -720,14 +791,14 @@ # expr? # class Repeat include Predicate - def initialize(min=1, max=Infinity, rule='') + def initialize(rule='', min=1, max=Infinity) + super(rule) raise ArgumentError, "Min cannot be greater than max" if min > max @range = Range.new(min, max) - super(rule) end # Returns the Match for this rule on +input+ at the given +offset+, +nil+ if # no match can be made. def match(input, offset=0) @@ -833,19 +904,20 @@ end # The base class for all matches. Matches are organized into a tree where any # match may contain any number of other matches. This class provides several # convenient tree traversal methods that help when examining parse results. - class Match + class Match < String def initialize(data, offset=0) case data when String - @text = data + super(data) when MatchData - @text = data[0] + super(data[0]) @captures = data.captures when Array + super(data.join) @matches = data end @offset = offset end @@ -868,15 +940,10 @@ # Returns +true+ if this match has the given +name+. def has_name?(name) names.include?(name) end - # An array of all extension modules of this match. - def extensions - @extensions ||= [] - end - # An array of all sub-matches of this match. def matches @matches ||= [] end @@ -884,35 +951,17 @@ # created by an Expression. def captures @captures ||= [] end - # Returns the raw text value of this match, which may simply be an - # aggregate of the text of all sub-matches if this match is not #terminal?. - def text - @text ||= matches.inject('') {|s, m| s << m.text } - end - - alias to_s text - - # Returns the length of this match's #text value as an Integer. - def length - text.length - end - - # Passes all arguments to the #text of this match. - def [](*args) - text.__send__(:[], *args) - end - # Returns an array of all sub-matches with the given +name+. If +deep+ is # +false+, returns only sub-matches that are immediate descendants of this # match. def find(name, deep=true) sym = name.to_sym ms = matches.select {|m| m.has_name?(sym) } - ms.concat(matches.map {|m| m.find(name, deep) }.flatten) if deep + matches.each {|m| ms.concat(m.find(name, deep)) } if deep ms end # A shortcut for retrieving the first immediate sub-match of this match. If # +name+ is given, attempts to retrieve the first immediate sub-match named @@ -925,44 +974,24 @@ # Terminal). def terminal? matches.length == 0 end - # Checks equality by comparing this match's #text value to +obj+. - def ==(obj) - obj == text + # Creates a new String object from the contents of this match. + def to_s + String.new(self) end - alias eql? == - - private - - def redefine_method_missing! # :nodoc: - instance_eval(<<-RUBY, __FILE__, __LINE__ + 1) - def method_missing(sym, *args) - if sym == :to_ary - original_method_missing(sym, *args) - else - m = first(sym) - return m if m - raise 'No match named "%s" in %s (%s)' % [sym, self, name] - end - end - RUBY - end - - alias original_method_missing method_missing - - public - # Allows sub-matches of this match to be retrieved by name as instance # methods. def method_missing(sym, *args) - # Extend this object only when needed and immediately redefine - # #method_missing so that the new version is used on all future calls. - extensions.each {|e| extend(e) } if @extensions - redefine_method_missing! - __send__(sym, *args) + m = first(sym) + return m if m + raise 'No match named "%s" in %s (%s)' % [sym, self, name] + end + + def to_ary + # This method intentionally left blank to work around a bug in Ruby 1.9. end end end class Object