lib/citrus.rb in citrus-2.3.1 vs lib/citrus.rb in citrus-2.3.2

- old
+ new

@@ -6,27 +6,23 @@ # # http://mjijackson.com/citrus module Citrus autoload :File, 'citrus/file' - VERSION = [2, 3, 1] + # The current version of Citrus as [major, minor, patch]. + VERSION = [2, 3, 2] - # Returns the current version of Citrus as a string. - def self.version - VERSION.join('.') - end - - # A pattern to match any character, including \\n. + # A pattern to match any character, including newline. DOT = /./m Infinity = 1.0 / 0 CLOSE = -1 - # Parses the given Citrus +code+ using +options+. - def self.parse(code, options={}) - File.parse(code, options) + # Returns the current version of Citrus as a string. + def self.version + VERSION.join('.') end # Evaluates the given Citrus parsing expression grammar +code+ in the global # scope. Returns an array of any grammar modules that are created. # @@ -35,29 +31,35 @@ # rule abc # "abc" # end # end # CITRUS + # # => [MyGrammar] # - def self.eval(code) - parse(code).value + def self.eval(code, options={}) + File.parse(code, options).value end # Evaluates the given expression and creates a new Rule object from it. # # Citrus.rule('"a" | "b"') + # # => #<Citrus::Rule: ... > # - def self.rule(expr) - parse(expr, :root => :rule_body).value + def self.rule(expr, options={}) + File.parse(expr, options.merge(:root => :rule_body)).value end # Loads the grammar from the given +file+ into the global scope using #eval. - def self.load(file) + # + # Citrus.load('mygrammar') + # # => [MyGrammar] + # + def self.load(file, options={}) file << '.citrus' unless ::File.file?(file) - raise "Cannot find file #{file}" unless ::File.file?(file) - raise "Cannot read file #{file}" unless ::File.readable?(file) - eval(::File.read(file)) + raise ArgumentError, "Cannot find file #{file}" unless ::File.file?(file) + raise ArgumentError, "Cannot read file #{file}" unless ::File.readable?(file) + eval(::File.read(file), options) end # A standard error class that all Citrus errors extend. class Error < RuntimeError; end @@ -155,18 +157,23 @@ # +pos+. +pos+ defaults to the current pointer position. def line(pos=pos) lines[line_index(pos)] end + # Returns +true+ when using memoization to cache match results. + def memoized? + false + end + # Returns an array of events for the given +rule+ at the current pointer # position. Objects in this array may be one of three types: a Rule, # Citrus::CLOSE, or a length (integer). def exec(rule, events=[]) - index = events.size position = pos + index = events.size - if rule.exec(self, events).size > index + if apply_rule(rule, position, events).size > index position += events[-1] @max_offset = position if position > @max_offset end self.pos = position @@ -175,29 +182,31 @@ end # Returns the length of a match for the given +rule+ at the current pointer # position, +nil+ if none can be made. def test(rule) - start = pos - events = rule.exec(self) - self.pos = start + position = pos + events = apply_rule(rule, position, []) + self.pos = position events[-1] end - # Returns +true+ when using memoization to cache match results. - def memoized? - false + private + + # Appends all events for +rule+ at the given +position+ to +events+. + def apply_rule(rule, position, events) + rule.exec(self, events) end end - # A MemoizingInput is an Input that caches segments of the event stream for + # A MemoizedInput is an Input that caches segments of the event stream for # particular rules in a parse. This technique (also known as "Packrat" # parsing) guarantees parsers will operate in linear time but costs # significantly more in terms of time and memory required to perform a parse. # For more information, please read the paper on Packrat parsing at # http://pdos.csail.mit.edu/~baford/packrat/icfp02/. - class MemoizingInput < Input + class MemoizedInput < Input def initialize(string) super(string) @cache = {} @cache_hits = 0 end @@ -212,35 +221,34 @@ @cache.clear @cache_hits = 0 super end - def exec(rule, events=[]) # :nodoc: - position = pos + # Returns +true+ when using memoization to cache match results. + def memoized? + true + end + + private + + def apply_rule(rule, position, events) # :nodoc: memo = @cache[rule] ||= {} if memo[position] @cache_hits += 1 + events.concat(memo[position]) else - memo[position] = rule.exec(self) - end + index = events.size + rule.exec(self, events) - if memo[position].size > 0 - events.concat(memo[position]) - position += events[-1] - @max_offset = position if position > @max_offset + # Memoize the result so we can use it next time this same rule is + # executed at this position. + memo[position] = events.slice(index, events.size) end - self.pos = position - events end - - # Returns +true+ when using memoization to cache match results. - def memoized? - true - end end # Inclusion of this module into another extends the receiver with the grammar # helper methods in GrammarMethods. Although this module does not actually # provide any methods, constants, or variables to modules that include it, the @@ -279,12 +287,13 @@ # Parses the given +string+ using this grammar's root rule. Optionally, the # name of a different rule may be given here as the value of the +:root+ # option. Otherwise, all options are the same as in Rule#parse. def parse(string, options={}) rule_name = options.delete(:root) || root + raise Error, "No root rule specified" unless rule_name rule = rule(rule_name) - raise 'No rule named "%s"' % rule_name unless rule + raise Error, "No rule named \"#{rule_name}\"" unless rule rule.parse(string, options) end # Returns the name of this grammar as a string. def name @@ -509,34 +518,41 @@ def extension=(mod) if Proc === mod mod = Module.new { define_method(:value, &mod) } end - raise ArgumentError unless Module === mod + raise ArgumentError, "Extension must be a Module" unless Module === mod @extension = mod end # The module this rule uses to extend new matches. attr_reader :extension + # The default set of options to use when calling #parse or #test. + def default_options # :nodoc: + { :consume => true, + :memoize => false, + :offset => 0 + } + end + # Attempts to parse the given +string+ and return a Match if any can be - # made. The +options+ may contain any of the following keys: + # made. +options+ may contain any of the following keys: # - # offset:: The offset in +string+ at which to start the parse. Defaults - # to 0. + # consume:: If this is +true+ a ParseError will be raised unless the + # entire input string is consumed. Defaults to +true+. # memoize:: If this is +true+ the matches generated during a parse are - # memoized. See Input#memoize! for more information. Defaults to + # memoized. See MemoizedInput for more information. Defaults to # +false+. - # consume:: If this is +true+ a ParseError will be raised during a parse - # unless the entire input string is consumed. Defaults to - # +true+. + # offset:: The offset in +string+ at which to start parsing. Defaults + # to 0. def parse(string, options={}) - opts = default_parse_options.merge(options) + opts = default_options.merge(options) input = if opts[:memoize] - MemoizingInput.new(string) + MemoizedInput.new(string) else Input.new(string) end input.pos = opts[:offset] if opts[:offset] > 0 @@ -549,22 +565,17 @@ end Match.new(string.slice(opts[:offset], length), events) end - # The default set of options to use when parsing. - def default_parse_options # :nodoc: - { :offset => 0, - :memoize => false, - :consume => true - } - end - # Tests whether or not this rule matches on the given +string+. Returns the - # length of the match if any can be made, +nil+ otherwise. - def test(string) - Input.new(string).test(self) + # length of the match if any can be made, +nil+ otherwise. Accepts the same + # +options+ as #parse. + def test(string, options={}) + parse(string, options).length + rescue ParseError + nil end # Returns +true+ if this rule is a Terminal. def terminal? false @@ -1025,11 +1036,11 @@ if n >= min events << CLOSE events << length else - events.slice!(start, events.size) + events.slice!(start, index) end events end @@ -1086,11 +1097,11 @@ if n == m events << CLOSE events << length else - events.slice!(start, events.size) + events.slice!(start, index) end events end @@ -1378,8 +1389,8 @@ # def grammar(name, &block) namespace = respond_to?(:const_set) ? self : Object namespace.const_set(name, Citrus::Grammar.new(&block)) rescue NameError - raise ArgumentError, 'Invalid grammar name: %s' % name + raise ArgumentError, "Invalid grammar name: #{name}" end end