lib/citrus.rb in citrus-2.3.1 vs lib/citrus.rb in citrus-2.3.2
- old
+ new
@@ -6,27 +6,23 @@
#
# http://mjijackson.com/citrus
module Citrus
autoload :File, 'citrus/file'
- VERSION = [2, 3, 1]
+ # The current version of Citrus as [major, minor, patch].
+ VERSION = [2, 3, 2]
- # Returns the current version of Citrus as a string.
- def self.version
- VERSION.join('.')
- end
-
- # A pattern to match any character, including \\n.
+ # A pattern to match any character, including newline.
DOT = /./m
Infinity = 1.0 / 0
CLOSE = -1
- # Parses the given Citrus +code+ using +options+.
- def self.parse(code, options={})
- File.parse(code, options)
+ # Returns the current version of Citrus as a string.
+ def self.version
+ VERSION.join('.')
end
# Evaluates the given Citrus parsing expression grammar +code+ in the global
# scope. Returns an array of any grammar modules that are created.
#
@@ -35,29 +31,35 @@
# rule abc
# "abc"
# end
# end
# CITRUS
+ # # => [MyGrammar]
#
- def self.eval(code)
- parse(code).value
+ def self.eval(code, options={})
+ File.parse(code, options).value
end
# Evaluates the given expression and creates a new Rule object from it.
#
# Citrus.rule('"a" | "b"')
+ # # => #<Citrus::Rule: ... >
#
- def self.rule(expr)
- parse(expr, :root => :rule_body).value
+ def self.rule(expr, options={})
+ File.parse(expr, options.merge(:root => :rule_body)).value
end
# Loads the grammar from the given +file+ into the global scope using #eval.
- def self.load(file)
+ #
+ # Citrus.load('mygrammar')
+ # # => [MyGrammar]
+ #
+ def self.load(file, options={})
file << '.citrus' unless ::File.file?(file)
- raise "Cannot find file #{file}" unless ::File.file?(file)
- raise "Cannot read file #{file}" unless ::File.readable?(file)
- eval(::File.read(file))
+ raise ArgumentError, "Cannot find file #{file}" unless ::File.file?(file)
+ raise ArgumentError, "Cannot read file #{file}" unless ::File.readable?(file)
+ eval(::File.read(file), options)
end
# A standard error class that all Citrus errors extend.
class Error < RuntimeError; end
@@ -155,18 +157,23 @@
# +pos+. +pos+ defaults to the current pointer position.
def line(pos=pos)
lines[line_index(pos)]
end
+ # Returns +true+ when using memoization to cache match results.
+ def memoized?
+ false
+ end
+
# Returns an array of events for the given +rule+ at the current pointer
# position. Objects in this array may be one of three types: a Rule,
# Citrus::CLOSE, or a length (integer).
def exec(rule, events=[])
- index = events.size
position = pos
+ index = events.size
- if rule.exec(self, events).size > index
+ if apply_rule(rule, position, events).size > index
position += events[-1]
@max_offset = position if position > @max_offset
end
self.pos = position
@@ -175,29 +182,31 @@
end
# Returns the length of a match for the given +rule+ at the current pointer
# position, +nil+ if none can be made.
def test(rule)
- start = pos
- events = rule.exec(self)
- self.pos = start
+ position = pos
+ events = apply_rule(rule, position, [])
+ self.pos = position
events[-1]
end
- # Returns +true+ when using memoization to cache match results.
- def memoized?
- false
+ private
+
+ # Appends all events for +rule+ at the given +position+ to +events+.
+ def apply_rule(rule, position, events)
+ rule.exec(self, events)
end
end
- # A MemoizingInput is an Input that caches segments of the event stream for
+ # A MemoizedInput is an Input that caches segments of the event stream for
# particular rules in a parse. This technique (also known as "Packrat"
# parsing) guarantees parsers will operate in linear time but costs
# significantly more in terms of time and memory required to perform a parse.
# For more information, please read the paper on Packrat parsing at
# http://pdos.csail.mit.edu/~baford/packrat/icfp02/.
- class MemoizingInput < Input
+ class MemoizedInput < Input
def initialize(string)
super(string)
@cache = {}
@cache_hits = 0
end
@@ -212,35 +221,34 @@
@cache.clear
@cache_hits = 0
super
end
- def exec(rule, events=[]) # :nodoc:
- position = pos
+ # Returns +true+ when using memoization to cache match results.
+ def memoized?
+ true
+ end
+
+ private
+
+ def apply_rule(rule, position, events) # :nodoc:
memo = @cache[rule] ||= {}
if memo[position]
@cache_hits += 1
+ events.concat(memo[position])
else
- memo[position] = rule.exec(self)
- end
+ index = events.size
+ rule.exec(self, events)
- if memo[position].size > 0
- events.concat(memo[position])
- position += events[-1]
- @max_offset = position if position > @max_offset
+ # Memoize the result so we can use it next time this same rule is
+ # executed at this position.
+ memo[position] = events.slice(index, events.size)
end
- self.pos = position
-
events
end
-
- # Returns +true+ when using memoization to cache match results.
- def memoized?
- true
- end
end
# Inclusion of this module into another extends the receiver with the grammar
# helper methods in GrammarMethods. Although this module does not actually
# provide any methods, constants, or variables to modules that include it, the
@@ -279,12 +287,13 @@
# Parses the given +string+ using this grammar's root rule. Optionally, the
# name of a different rule may be given here as the value of the +:root+
# option. Otherwise, all options are the same as in Rule#parse.
def parse(string, options={})
rule_name = options.delete(:root) || root
+ raise Error, "No root rule specified" unless rule_name
rule = rule(rule_name)
- raise 'No rule named "%s"' % rule_name unless rule
+ raise Error, "No rule named \"#{rule_name}\"" unless rule
rule.parse(string, options)
end
# Returns the name of this grammar as a string.
def name
@@ -509,34 +518,41 @@
def extension=(mod)
if Proc === mod
mod = Module.new { define_method(:value, &mod) }
end
- raise ArgumentError unless Module === mod
+ raise ArgumentError, "Extension must be a Module" unless Module === mod
@extension = mod
end
# The module this rule uses to extend new matches.
attr_reader :extension
+ # The default set of options to use when calling #parse or #test.
+ def default_options # :nodoc:
+ { :consume => true,
+ :memoize => false,
+ :offset => 0
+ }
+ end
+
# Attempts to parse the given +string+ and return a Match if any can be
- # made. The +options+ may contain any of the following keys:
+ # made. +options+ may contain any of the following keys:
#
- # offset:: The offset in +string+ at which to start the parse. Defaults
- # to 0.
+ # consume:: If this is +true+ a ParseError will be raised unless the
+ # entire input string is consumed. Defaults to +true+.
# memoize:: If this is +true+ the matches generated during a parse are
- # memoized. See Input#memoize! for more information. Defaults to
+ # memoized. See MemoizedInput for more information. Defaults to
# +false+.
- # consume:: If this is +true+ a ParseError will be raised during a parse
- # unless the entire input string is consumed. Defaults to
- # +true+.
+ # offset:: The offset in +string+ at which to start parsing. Defaults
+ # to 0.
def parse(string, options={})
- opts = default_parse_options.merge(options)
+ opts = default_options.merge(options)
input = if opts[:memoize]
- MemoizingInput.new(string)
+ MemoizedInput.new(string)
else
Input.new(string)
end
input.pos = opts[:offset] if opts[:offset] > 0
@@ -549,22 +565,17 @@
end
Match.new(string.slice(opts[:offset], length), events)
end
- # The default set of options to use when parsing.
- def default_parse_options # :nodoc:
- { :offset => 0,
- :memoize => false,
- :consume => true
- }
- end
-
# Tests whether or not this rule matches on the given +string+. Returns the
- # length of the match if any can be made, +nil+ otherwise.
- def test(string)
- Input.new(string).test(self)
+ # length of the match if any can be made, +nil+ otherwise. Accepts the same
+ # +options+ as #parse.
+ def test(string, options={})
+ parse(string, options).length
+ rescue ParseError
+ nil
end
# Returns +true+ if this rule is a Terminal.
def terminal?
false
@@ -1025,11 +1036,11 @@
if n >= min
events << CLOSE
events << length
else
- events.slice!(start, events.size)
+ events.slice!(start, index)
end
events
end
@@ -1086,11 +1097,11 @@
if n == m
events << CLOSE
events << length
else
- events.slice!(start, events.size)
+ events.slice!(start, index)
end
events
end
@@ -1378,8 +1389,8 @@
#
def grammar(name, &block)
namespace = respond_to?(:const_set) ? self : Object
namespace.const_set(name, Citrus::Grammar.new(&block))
rescue NameError
- raise ArgumentError, 'Invalid grammar name: %s' % name
+ raise ArgumentError, "Invalid grammar name: #{name}"
end
end