lib/citrus.rb in citrus-2.1.2 vs lib/citrus.rb in citrus-2.2.0
- old
+ new
@@ -6,11 +6,11 @@
#
# http://mjijackson.com/citrus
module Citrus
autoload :File, 'citrus/file'
- VERSION = [2, 1, 2]
+ VERSION = [2, 2, 0]
# Returns the current version of Citrus as a string.
def self.version
VERSION.join('.')
end
@@ -20,10 +20,12 @@
Infinity = 1.0 / 0
F = ::File
+ CLOSE = -1
+
# Loads the grammar from the given +file+ into the global scope using #eval.
def self.load(file)
file << '.citrus' unless F.file?(file)
raise "Cannot find file #{file}" unless F.file?(file)
raise "Cannot read file #{file}" unless F.readable?(file)
@@ -38,30 +40,16 @@
end
# Parses the given Citrus +code+ using the given +options+. Returns the
# generated match tree. Raises a +SyntaxError+ if the parse fails.
def self.parse(code, options={})
- begin
- File.parse(code, options)
- rescue ParseError => e
- raise SyntaxError.new(e)
- end
+ File.parse(code, options)
end
# A standard error class that all Citrus errors extend.
class Error < RuntimeError; end
- # Raised when there is an error parsing Citrus code.
- class SyntaxError < Error
- # The +error+ given here should be a +ParseError+ object.
- def initialize(error)
- msg = "Syntax error on line %d at offset %d\n%s" %
- [error.line_number, error.line_offset, error.detail]
- super(msg)
- end
- end
-
# Raised when a match cannot be found.
class NoMatchError < Error; end
# Raised when a parse fails.
class ParseError < Error
@@ -69,12 +57,12 @@
def initialize(input)
@offset = input.max_offset
@line_offset = input.line_offset(offset)
@line_number = input.line_number(offset)
@line = input.line(offset)
- msg = "Failed to parse input at offset %d\n" % offset
- msg << detail
+ msg = "Failed to parse input on line %d at offset %d\n%s" %
+ [line_number, line_offset, detail]
super(msg)
end
# The 0-based offset at which the error occurred in the input, i.e. the
# maximum offset in the input that was successfully parsed before the error
@@ -104,25 +92,25 @@
def initialize(string)
super(string)
@max_offset = 0
end
- # The maximum offset that has been achieved during a parse.
+ # The maximum offset in the input that was successfully parsed.
attr_reader :max_offset
# A nested hash of rule id's to offsets and their respective matches. Only
# present if memoing is enabled.
attr_reader :cache
# The number of times the cache was hit. Only present if memoing is enabled.
attr_reader :cache_hits
- # Resets all internal variables so that this object may be used in
- # another parse.
- def reset
- super
+ # Resets all internal variables so that this object may be used in another
+ # parse.
+ def reset # :nodoc:
@max_offset = 0
+ super
end
# Returns the length of this input.
def length
string.length
@@ -151,11 +139,11 @@
end
# Returns the 0-based number of the line that contains the character at the
# given +pos+. +pos+ defaults to the current pointer position.
def line_index(pos=pos)
- p, n = 0, 0
+ p = n = 0
each_line do |line|
p += line.length
return n if p >= pos
n += 1
end
@@ -174,26 +162,35 @@
# +pos+. +pos+ defaults to the current pointer position.
def line(pos=pos)
lines[line_index(pos)]
end
- # Returns the match for the given +rule+ at the current pointer position,
- # which is +nil+ if no match can be made.
- def match(rule)
- offset = pos
- match = rule.match(self)
+ # Returns an array of events for the given +rule+ at the current pointer
+ # position. Objects in this array may be one of three types: a rule id,
+ # Citrus::CLOSE, or a length.
+ def exec(rule, events=[])
+ start = pos
+ index = events.size
- if match
+ rule.exec(self, events)
+
+ if index < events.size
+ self.pos = start + events[-1]
@max_offset = pos if pos > @max_offset
else
- # Reset the position for the next attempt at a match.
- self.pos = offset unless match
+ self.pos = start
end
- match
+ events
end
+ # Returns the length of a match for the given +rule+ at the current pointer
+ # position, +nil+ if none can be made.
+ def test(rule)
+ rule.exec(self)[-1]
+ end
+
# Returns +true+ when using memoization to cache match results.
def memoized?
!! @cache
end
@@ -203,33 +200,35 @@
# required to perform a parse. For more information, please read the paper
# on Packrat parsing at http://pdos.csail.mit.edu/~baford/packrat/icfp02/.
def memoize!
return if memoized?
+ @cache = {}
+ @cache_hits = 0
+
# Using +instance_eval+ here preserves access to +super+ within the
# methods we define inside the block.
instance_eval do
- def match(rule) # :nodoc:
+ def exec(rule, events=[]) # :nodoc:
c = @cache[rule.id] ||= {}
- if c.key?(pos)
+ e = if c[pos]
@cache_hits += 1
c[pos]
else
- c[pos] = super
+ c[pos] = super(rule)
end
+
+ events.concat(e)
end
def reset # :nodoc:
- super
- @cache = {}
+ @cache.clear
@cache_hits = 0
+ super
end
end
-
- @cache = {}
- @cache_hits = 0
end
end
# Inclusion of this module into another extends the receiver with the grammar
# helper methods in GrammarMethods. Although this module does not actually
@@ -264,10 +263,20 @@
def self.extend_object(obj)
raise ArgumentError, "Grammars must be Modules" unless Module === obj
super
end
+ # Parses the given +string+ using this grammar's root rule. Optionally, the
+ # name of a different rule may be given here as the value of the +:root+
+ # option. Otherwise, all options are the same as in Rule#parse.
+ def parse(string, options={})
+ rule_name = options.delete(:root) || root
+ rule = rule(rule_name)
+ raise 'No rule named "%s"' % rule_name unless rule
+ rule.parse(string, options)
+ end
+
# Returns the name of this grammar as a string.
def name
super.to_s
end
@@ -308,13 +317,13 @@
# Searches the inheritance hierarchy of this grammar for a rule named +name+
# and returns it on success. Returns +nil+ on failure.
def super_rule(name)
sym = name.to_sym
- included_grammars.each do |g|
- r = g.rule(sym)
- return r if r
+ included_grammars.each do |grammar|
+ rule = grammar.rule(sym)
+ return rule if rule
end
nil
end
# Gets/sets the rule with the given +name+. If +obj+ is given the rule
@@ -431,52 +440,10 @@
rule = Rule.new(rule)
mod = block if block
rule.extension = mod if mod
rule
end
-
- # Parses the given input +string+ using the given +options+. If no match can
- # be made, a ParseError is raised. See #default_parse_options for a detailed
- # description of available parse options.
- def parse(string, options={})
- opts = default_parse_options.merge(options)
- raise 'No root rule specified' unless opts[:root]
-
- root_rule = rule(opts[:root])
- raise 'No rule named "%s"' % root unless root_rule
-
- input = Input.new(string)
- input.memoize! if opts[:memoize]
- input.pos = opts[:offset] if opts[:offset] > 0
-
- match = input.match(root_rule)
- if match.nil? || (opts[:consume] && input.length != match.length)
- raise ParseError.new(input)
- end
-
- match
- end
-
- # The default set of options that is used in #parse. The options hash may
- # have any of the following keys:
- #
- # offset:: The offset at which the parse should start. Defaults to 0.
- # root:: The name of the root rule to use for the parse. Defaults
- # to the name supplied by calling #root.
- # memoize:: If this is +true+ the matches generated during a parse are
- # memoized. See Input#memoize! for more information. Defaults to
- # +false+.
- # consume:: If this is +true+ a ParseError will be raised during a parse
- # unless the entire input string is consumed. Defaults to
- # +false+.
- def default_parse_options
- { :offset => 0,
- :root => root,
- :memoize => false,
- :consume => false
- }
- end
end
# A Rule is an object that is used by a grammar to create matches on the
# Input during parsing.
module Rule
@@ -489,54 +456,73 @@
end
# Returns a new Rule object depending on the type of object given.
def self.new(obj)
case obj
- when Rule then obj
- when Symbol then Alias.new(obj)
- when String, Regexp then Terminal.new(obj)
- when Array then Sequence.new(obj)
- when Range then Choice.new(obj.to_a)
- when Numeric then Terminal.new(obj.to_s)
+ when Rule then obj
+ when Symbol then Alias.new(obj)
+ when String then StringTerminal.new(obj)
+ when Regexp then Terminal.new(obj)
+ when Array then Sequence.new(obj)
+ when Range then Choice.new(obj.to_a)
+ when Numeric then StringTerminal.new(obj.to_s)
else
raise ArgumentError, "Invalid rule object: %s" % obj.inspect
end
end
@unique_id = 0
- # Generates a new rule id.
- def self.new_id
- @unique_id += 1
+ # A global registry for Rule objects. Keyed by rule id.
+ @rules = {}
+
+ # Adds the given +rule+ to the global registry and gives it an id.
+ def self.<<(rule) # :nodoc:
+ rule.id = (@unique_id += 1)
+ @rules[rule.id] = rule
end
- # The grammar this rule belongs to.
- attr_accessor :grammar
+ # Returns the Rule object with the given +id+.
+ def self.[](id)
+ @rules[id]
+ end
- # An integer id that is unique to this rule.
- def id
- @id ||= Rule.new_id
+ def initialize(*args) # :nodoc:
+ Rule << self
end
+ # An integer id that is unique to this rule.
+ attr_accessor :id
+
+ # The grammar this rule belongs to.
+ attr_accessor :grammar
+
# Sets the name of this rule.
def name=(name)
@name = name.to_sym
end
- # The name of this rule.
- attr_reader :name
+ # Returns the name of this rule.
+ def name
+ @name || '<anonymous>'
+ end
+ # Returns +true+ if this rule has a name, +false+ otherwise.
+ def named?
+ !! @name
+ end
+
# Specifies a module that will be used to extend all Match objects that
# result from this rule. If +mod+ is a Proc, it is used to create an
# anonymous module.
def extension=(mod)
if Proc === mod
begin
tmp = Module.new(&mod)
- raise ArgumentError unless tmp.instance_methods.any?
+ raise ArgumentError if tmp.instance_methods.empty?
mod = tmp
- rescue ArgumentError, NameError, NoMethodError
+ rescue NoMethodError, ArgumentError, NameError
mod = Module.new { define_method(:value, &mod) }
end
end
raise ArgumentError unless Module === mod
@@ -545,52 +531,179 @@
end
# The module this rule uses to extend new matches.
attr_reader :extension
+ # Attempts to parse the given +string+ and return a Match if any can be
+ # made. The +options+ may contain any of the following keys:
+ #
+ # offset:: The offset in +string+ at which to start the parse. Defaults
+ # to 0.
+ # memoize:: If this is +true+ the matches generated during a parse are
+ # memoized. See Input#memoize! for more information. Defaults to
+ # +false+.
+ # consume:: If this is +true+ a ParseError will be raised during a parse
+ # unless the entire input string is consumed. Defaults to
+ # +false+.
+ def parse(string, options={})
+ opts = default_parse_options.merge(options)
+
+ input = Input.new(string)
+ input.memoize! if opts[:memoize]
+ input.pos = opts[:offset] if opts[:offset] > 0
+
+ start = input.pos
+ events = input.exec(self)
+ length = events[-1]
+
+ if !length || (opts[:consume] && length < (input.length - opts[:offset]))
+ raise ParseError.new(input)
+ end
+
+ Match.new(string.slice(start, length), events)
+ end
+
+ # The default set of options to use when parsing.
+ def default_parse_options # :nodoc:
+ { :offset => 0,
+ :memoize => false,
+ :consume => false
+ }
+ end
+
+ # Tests whether or not this rule matches on the given +string+. Returns the
+ # length of the match if any can be made, +nil+ otherwise.
+ def test(string)
+ input = Input.new(string)
+ input.test(self)
+ end
+
# Returns +true+ if this rule is a Terminal.
def terminal?
is_a?(Terminal)
end
+ # Returns +true+ if this rule is able to propagate extensions from child
+ # rules to the scope of the parent, +false+ otherwise. In general, this will
+ # return +false+ for any rule whose match value is derived from an arbitrary
+ # number of child rules, such as a Repeat or a Sequence. Note that this is
+ # not true for Choice objects because they rely on exactly 1 rule to match,
+ # as do Proxy objects.
+ def propagates_extensions?
+ case self
+ when AndPredicate, NotPredicate, ButPredicate, Repeat, Sequence
+ false
+ else
+ true
+ end
+ end
+
# Returns +true+ if this rule needs to be surrounded by parentheses when
# using #embed.
def paren?
false
end
# Returns a string version of this rule that is suitable to be used in the
# string representation of another rule.
def embed
- name ? name.to_s : (paren? ? '(%s)' % to_s : to_s)
+ named? ? name.to_s : (paren? ? '(%s)' % to_s : to_s)
end
def inspect # :nodoc:
to_s
end
+ end
- private
+ # A Terminal is a Rule that matches directly on the input stream and may not
+ # contain any other rule. Terminals are essentially wrappers for regular
+ # expressions. As such, the Citrus notation is identical to Ruby's regular
+ # expression notation, e.g.:
+ #
+ # /expr/
+ #
+ # Character classes and the dot symbol may also be used in Citrus notation for
+ # compatibility with other parsing expression implementations, e.g.:
+ #
+ # [a-zA-Z]
+ # .
+ #
+ class Terminal
+ include Rule
- def extend_match(match, name)
- match.extend(extension) if extension
- match.names << name if name
- match
+ def initialize(rule=/^/)
+ super
+ @rule = rule
end
- def create_match(data)
- extend_match(Match.new(data), name)
+ # The actual Regexp object this rule uses to match.
+ attr_reader :rule
+
+ # Returns an array of events for this rule on the given +input+.
+ def exec(input, events=[])
+ length = input.scan_full(rule, false, false)
+ if length
+ events << id
+ events << CLOSE
+ events << length
+ end
+ events
end
+
+ # Returns +true+ if this rule is case sensitive.
+ def case_sensitive?
+ !rule.casefold?
+ end
+
+ # Returns the Citrus notation of this rule as a string.
+ def to_s
+ rule.inspect
+ end
end
+ # A StringTerminal is a Terminal that may be instantiated from a String
+ # object. The Citrus notation is any sequence of characters enclosed in either
+ # single or double quotes, e.g.:
+ #
+ # 'expr'
+ # "expr"
+ #
+ # This notation works the same as it does in Ruby; i.e. strings in double
+ # quotes may contain escape sequences while strings in single quotes may not.
+ # In order to specify that a string should ignore case when matching, enclose
+ # it in backticks instead of single or double quotes, e.g.:
+ #
+ # `expr`
+ #
+ # Besides case sensitivity, case-insensitive strings have the same semantics
+ # as double-quoted strings.
+ class StringTerminal < Terminal
+ # The +flags+ will be passed directly to Regexp#new.
+ def initialize(rule='', flags=0)
+ super(Regexp.new(Regexp.escape(rule), flags))
+ @string = rule
+ end
+
+ # Returns the Citrus notation of this rule as a string.
+ def to_s
+ if case_sensitive?
+ @string.inspect
+ else
+ @string.inspect.gsub(/^"|"$/, '`')
+ end
+ end
+ end
+
# A Proxy is a Rule that is a placeholder for another rule. It stores the
# name of some other rule in the grammar internally and resolves it to the
# actual Rule object at runtime. This lazy evaluation permits us to create
# Proxy objects for rules that we may not know the definition of yet.
module Proxy
include Rule
def initialize(rule_name='<proxy>')
+ super
self.rule_name = rule_name
end
# Sets the name of the rule this rule is proxy for.
def rule_name=(rule_name)
@@ -603,14 +716,13 @@
# Returns the underlying Rule for this proxy.
def rule
@rule ||= resolve!
end
- # Returns the Match for this rule on +input+, +nil+ if no match can be made.
- def match(input)
- m = input.match(rule)
- extend_match(m, name) if m
+ # Returns an array of events for this rule on the given +input+.
+ def exec(input, events=[])
+ input.exec(rule, events)
end
end
# An Alias is a Proxy for a rule in the same grammar. It is used in rule
# definitions when a rule calls some other rule by name. The Citrus notation
@@ -629,14 +741,12 @@
private
# Searches this proxy's grammar and any included grammars for a rule with
# this proxy's #rule_name. Raises an error if one cannot be found.
def resolve!
- rule = grammar.rule(rule_name)
- raise RuntimeError, 'No rule named "%s" in grammar %s' %
- [rule_name, grammar.name] unless rule
- rule
+ grammar.rule(rule_name) or raise RuntimeError,
+ 'No rule named "%s" in grammar %s' % [rule_name, grammar.name]
end
end
# A Super is a Proxy for a rule of the same name that was defined previously
# in the grammar's inheritance chain. Thus, Super's work like Ruby's +super+,
@@ -656,84 +766,33 @@
private
# Searches this proxy's included grammars for a rule with this proxy's
# #rule_name. Raises an error if one cannot be found.
def resolve!
- rule = grammar.super_rule(rule_name)
- raise RuntimeError, 'No rule named "%s" in hierarchy of grammar %s' %
- [rule_name, grammar.name] unless rule
- rule
+ grammar.super_rule(rule_name) or raise RuntimeError,
+ 'No rule named "%s" in hierarchy of grammar %s' % [rule_name, grammar.name]
end
end
- # A Terminal is a Rule that matches directly on the input stream and may not
- # contain any other rule. Terminals may be created from either a String or a
- # Regexp object. When created from strings, the Citrus notation is any
- # sequence of characters enclosed in either single or double quotes, e.g.:
- #
- # 'expr'
- # "expr"
- #
- # When created from a regular expression, the Citrus notation is identical to
- # Ruby's regular expression notation, e.g.:
- #
- # /expr/
- #
- # Character classes and the dot symbol may also be used in Citrus notation for
- # compatibility with other parsing expression implementations, e.g.:
- #
- # [a-zA-Z]
- # .
- #
- class Terminal
- include Rule
-
- def initialize(rule='')
- case rule
- when String
- @string = rule
- @rule = Regexp.new(Regexp.escape(rule))
- when Regexp
- @rule = rule
- else
- raise ArgumentError, "Cannot create terminal from object: %s" %
- rule.inspect
- end
- end
-
- # The actual Regexp object this rule uses to match.
- attr_reader :rule
-
- # Returns the Match for this rule on +input+, +nil+ if no match can be made.
- def match(input)
- m = input.scan(rule)
- create_match(m) if m
- end
-
- # Returns the Citrus notation of this rule as a string.
- def to_s
- (@string || @rule).inspect
- end
- end
-
# A Nonterminal is a Rule that augments the matching behavior of one or more
# other rules. Nonterminals may not match directly on the input, but instead
# invoke the rule(s) they contain to determine if a match can be made from
# the collective result.
module Nonterminal
include Rule
def initialize(rules=[])
+ super
@rules = rules.map {|r| Rule.new(r) }
end
# An array of the actual Rule objects this rule uses to match.
attr_reader :rules
- def grammar=(grammar)
- @rules.each {|r| r.grammar = grammar }
+ def grammar=(grammar) # :nodoc:
super
+ @rules.each {|r| r.grammar = grammar }
end
end
# A Predicate is a Nonterminal that contains one other rule.
module Predicate
@@ -756,13 +815,18 @@
# &expr
#
class AndPredicate
include Predicate
- # Returns the Match for this rule on +input+, +nil+ if no match can be made.
- def match(input)
- create_match('') if input.match(rule)
+ # Returns an array of events for this rule on the given +input+.
+ def exec(input, events=[])
+ if input.test(rule)
+ events << id
+ events << CLOSE
+ events << 0
+ end
+ events
end
# Returns the Citrus notation of this rule as a string.
def to_s
'&' + rule.embed
@@ -776,13 +840,18 @@
# !expr
#
class NotPredicate
include Predicate
- # Returns the Match for this rule on +input+, +nil+ if no match can be made.
- def match(input)
- create_match('') unless input.match(rule)
+ # Returns an array of events for this rule on the given +input+.
+ def exec(input, events=[])
+ unless input.test(rule)
+ events << id
+ events << CLOSE
+ events << 0
+ end
+ events
end
# Returns the Citrus notation of this rule as a string.
def to_s
'!' + rule.embed
@@ -798,20 +867,24 @@
class ButPredicate
include Predicate
DOT_RULE = Rule.new(DOT)
- # Returns the Match for this rule on +input+, +nil+ if no match can be made.
- def match(input)
- matches = []
- while input.match(rule).nil?
- m = input.match(DOT_RULE)
- break unless m
- matches << m
+ # Returns an array of events for this rule on the given +input+.
+ def exec(input, events=[])
+ length = 0
+ until input.test(rule)
+ len = input.exec(DOT_RULE)[-1]
+ break unless len
+ length += len
end
- # Create a single match from the aggregate text value of all submatches.
- create_match(matches.join) if matches.any?
+ if length > 0
+ events << id
+ events << CLOSE
+ events << length
+ end
+ events
end
# Returns the Citrus notation of this rule as a string.
def to_s
'~' + rule.embed
@@ -839,16 +912,13 @@
end
# The label this rule adds to all its matches.
attr_reader :label
- # Returns the Match for this rule on +input+, +nil+ if no match can be made.
- # When a Label makes a match, it re-names the match to the value of its
- # #label.
- def match(input)
- m = input.match(rule)
- extend_match(m, label) if m
+ # Returns an array of events for this rule on the given +input+.
+ def exec(input, events=[])
+ input.exec(rule, events)
end
# Returns the Citrus notation of this rule as a string.
def to_s
label.to_s + ':' + rule.embed
@@ -876,24 +946,36 @@
#
class Repeat
include Predicate
def initialize(rule='', min=1, max=Infinity)
- super(rule)
raise ArgumentError, "Min cannot be greater than max" if min > max
+ super(rule)
@range = Range.new(min, max)
end
- # Returns the Match for this rule on +input+, +nil+ if no match can be made.
- def match(input)
- matches = []
- while matches.length < @range.end
- m = input.match(rule)
- break unless m
- matches << m
+ # Returns an array of events for this rule on the given +input+.
+ def exec(input, events=[])
+ events << id
+
+ index = events.size
+ start = index - 1
+ length = n = 0
+ while n < max && input.exec(rule, events).size > index
+ index = events.size
+ length += events[-1]
+ n += 1
end
- create_match(matches) if @range.include?(matches.length)
+
+ if n >= min
+ events << CLOSE
+ events << length
+ else
+ events.slice!(start, events.size)
+ end
+
+ events
end
# The minimum number of times this rule must match.
def min
@range.begin
@@ -939,17 +1021,29 @@
# expr | expr
#
class Choice
include List
- # Returns the Match for this rule on +input+, +nil+ if no match can be made.
- def match(input)
- rules.each do |rule|
- m = input.match(rule)
- return extend_match(m, name) if m
+ # Returns an array of events for this rule on the given +input+.
+ def exec(input, events=[])
+ events << id
+
+ index = events.size
+ start = index - 1
+ n = 0
+ while n < rules.length && input.exec(rules[n], events).size == index
+ n += 1
end
- nil
+
+ if index < events.size
+ events << CLOSE
+ events << events[-2]
+ else
+ events.slice!(start, events.size)
+ end
+
+ events
end
# Returns the Citrus notation of this rule as a string.
def to_s
rules.map {|r| r.embed }.join(' | ')
@@ -962,19 +1056,31 @@
# expr expr
#
class Sequence
include List
- # Returns the Match for this rule on +input+, +nil+ if no match can be made.
- def match(input)
- matches = []
- rules.each do |rule|
- m = input.match(rule)
- break unless m
- matches << m
+ # Returns an array of events for this rule on the given +input+.
+ def exec(input, events=[])
+ events << id
+
+ index = events.size
+ start = index - 1
+ length = n = 0
+ while n < rules.length && input.exec(rules[n], events).size > index
+ index = events.size
+ length += events[-1]
+ n += 1
end
- create_match(matches) if matches.length == rules.length
+
+ if n == rules.length
+ events << CLOSE
+ events << length
+ else
+ events.slice!(start, events.size)
+ end
+
+ events
end
# Returns the Citrus notation of this rule as a string.
def to_s
rules.map {|r| r.embed }.join(' ')
@@ -983,23 +1089,23 @@
# The base class for all matches. Matches are organized into a tree where any
# match may contain any number of other matches. This class provides several
# convenient tree traversal methods that help when examining parse results.
class Match < String
- def initialize(data)
- case data
- when String
- super(data)
- when Array
- super(data.join)
- @matches = data
- else
- raise ArgumentError, "Cannot create match from object: %s" %
- data.inspect
- end
+ def initialize(string, events=[])
+ raise ArgumentError, "Invalid events for match length %d" %
+ string.length if events[-1] && string.length != events[-1]
+
+ super(string)
+ @events = events
+
+ extend!
end
+ # The array of events that was passed to the constructor.
+ attr_reader :events
+
# An array of all names of this match. A name is added to a match object
# for each rule that returns that object when matching. These names can then
# be used to determine which rules were satisfied by a given match.
def names
@names ||= []
@@ -1010,56 +1116,113 @@
names.first
end
# Returns +true+ if this match has the given +name+.
def has_name?(name)
- names.include?(name)
+ names.include?(name.to_sym)
end
- # An array of all sub-matches of this match.
+ # Returns an array of all Rule objects that extend this match.
+ def extenders
+ @extenders ||= begin
+ extenders = []
+ @events.each do |event|
+ break if event == CLOSE
+ rule = Rule[event]
+ extenders.unshift(rule)
+ break unless rule.propagates_extensions?
+ end
+ extenders
+ end
+ end
+
+ # Returns a reference to the Rule object that first created this match.
+ def creator
+ extenders.first
+ end
+
+ # Returns an array of Match objects that are submatches of this match in the
+ # order they appeared in the input.
def matches
- @matches ||= []
+ @matches ||= begin
+ matches = []
+ stack = []
+ offset = 0
+ close = false
+ index = 0
+
+ while index < @events.size
+ event = @events[index]
+ if close
+ start = stack.pop
+ if stack.size == extenders.size
+ matches << Match.new(slice(offset, event), @events[start..index])
+ offset += event
+ end
+ close = false
+ elsif event == CLOSE
+ close = true
+ else
+ stack << index
+ end
+ index += 1
+ end
+
+ matches
+ end
end
# Returns an array of all sub-matches with the given +name+. If +deep+ is
# +false+, returns only sub-matches that are immediate descendants of this
# match.
def find(name, deep=true)
- sym = name.to_sym
- ms = matches.select {|m| m.has_name?(sym) }
+ ms = matches.select {|m| m.has_name?(name) }
matches.each {|m| ms.concat(m.find(name, deep)) } if deep
ms
end
# A shortcut for retrieving the first immediate sub-match of this match. If
# +name+ is given, attempts to retrieve the first immediate sub-match named
# +name+.
def first(name=nil)
- name.nil? ? matches.first : find(name, false).first
+ name ? find(name, false).first : matches.first
end
- # Returns +true+ if this match has no descendants (was created from a
- # Terminal).
- def terminal?
- matches.length == 0
+ # Allows sub-matches of this match to be retrieved by name as instance
+ # methods.
+ def method_missing(sym, *args)
+ if sym == :to_ary
+ # This is a workaround for a bug in Ruby 1.9 with classes that
+ # extend String.
+ super
+ else
+ first(sym) or raise NoMatchError, 'No match named "%s" in %s (%s)' %
+ [sym, self, name]
+ end
end
- # Creates a new String object from the contents of this match.
- def to_s
- String.new(self)
+ # Returns a string representation of this match that displays the entire
+ # match tree for easy viewing in the console.
+ def dump
+ dump_lines.join("\n")
end
- # Allows sub-matches of this match to be retrieved by name as instance
- # methods.
- def method_missing(sym, *args)
- m = first(sym)
- return m if m
- raise NoMatchError, 'No match named "%s" in %s (%s)' %
- [sym, self, name || '<anonymous>']
+ def dump_lines(indent=' ') # :nodoc:
+ line = to_s.inspect
+ line << ' (%s)' % names.join(',') unless names.empty?
+ matches.inject([line]) do |lines, m|
+ lines.concat(m.dump_lines(indent).map {|line| indent + line })
+ end
end
- def to_ary
- # This method intentionally left blank to work around a bug in Ruby 1.9.
+ private
+
+ # Extends this match with the extensions provided by its #rules.
+ def extend! # :nodoc:
+ extenders.each do |rule|
+ self.names << rule.name if rule.named?
+ extend(rule.extension) if rule.extension
+ end
end
end
end
class Object