lib/citrus.rb in citrus-2.0.1 vs lib/citrus.rb in citrus-2.1.1

- old
+ new

@@ -6,11 +6,11 @@ # # http://mjijackson.com/citrus module Citrus autoload :File, 'citrus/file' - VERSION = [2, 0, 1] + VERSION = [2, 1, 1] # Returns the current version of Citrus as a string. def self.version VERSION.join('.') end @@ -25,96 +25,208 @@ # Loads the grammar from the given +file+ into the global scope using #eval. def self.load(file) file << '.citrus' unless F.file?(file) raise "Cannot find file #{file}" unless F.file?(file) raise "Cannot read file #{file}" unless F.readable?(file) - self.eval(F.read(file)) + eval(F.read(file)) end # Evaluates the given Citrus parsing expression grammar +code+ in the global - # scope. The +code+ may contain the definition of any number of modules. - # Returns an array of any grammar modules that are created. + # scope. Returns an array of any grammar modules that are created. Implicitly + # raises +SyntaxError+ on a failed parse. def self.eval(code) - File.parse(code).value + parse(code, :consume => true).value end - # This error is raised whenever a parse fails. - class ParseError < Exception - def initialize(input) - @input = input - msg = "Failed to parse input at offset %d\n" % offset - msg << detail - super(msg) + # Parses the given Citrus +code+ using the given +options+. Returns the + # generated match tree. Raises a +SyntaxError+ if the parse fails. + def self.parse(code, options={}) + begin + File.parse(code, options) + rescue ParseError => e + raise SyntaxError.new(e) end + end - # The Input object that was used for the parse. - attr_reader :input + # A standard error class that all Citrus errors extend. + class Error < RuntimeError; end - # Returns the 0-based offset at which the error occurred in the input, i.e. - # the maximum offset in the input that was successfully parsed before the - # error occurred. - def offset - input.max_offset + # Raised when there is an error parsing Citrus code. + class SyntaxError < Error + # The +error+ given here should be a +ParseError+ object. + def initialize(error) + msg = "Syntax error on line %d at offset %d\n%s" % + [error.line_number, error.line_offset, error.detail] + super(msg) end + end - # Returns the text of the line on which the error occurred. - def line - lines[line_index] + # Raised when a match cannot be found. + class NoMatchError < Error; end + + # Raised when a parse fails. + class ParseError < Error + # The +input+ given here is an instance of Citrus::Input. + def initialize(input) + @offset = input.max_offset + @line_offset = input.line_offset(offset) + @line_number = input.line_number(offset) + @line = input.line(offset) + msg = "Failed to parse input at offset %d\n" % offset + msg << detail + super(msg) end - # Returns the 1-based number of the line in the input where the error + # The 0-based offset at which the error occurred in the input, i.e. the + # maximum offset in the input that was successfully parsed before the error # occurred. - def line_number - line_index + 1 - end + attr_reader :offset - alias lineno line_number + # The 0-based offset at which the error occurred on the line on which it + # occurred in the input. + attr_reader :line_offset - # Returns the 0-based offset at which the error occurred on the line on - # which it occurred. - def line_offset - pos = 0 - each_line do |line| - len = line.length - return (offset - pos) if pos + len >= offset - pos += len - end - 0 - end + # The 1-based number of the line in the input where the error occurred. + attr_reader :line_number + # The text of the line in the input where the error occurred. + attr_reader :line + # Returns a string that, when printed, gives a visual representation of # exactly where the error occurred on its line in the input. def detail "%s\n%s^" % [line, ' ' * line_offset] end + end - private + # This class represents the core of the parsing algorithm. It wraps the input + # string and serves matches to all nonterminals. + class Input < StringScanner + def initialize(string) + super(string) + @max_offset = 0 + end - def string - input.string + # The maximum offset that has been achieved during a parse. + attr_reader :max_offset + + # A nested hash of rule id's to offsets and their respective matches. Only + # present if memoing is enabled. + attr_reader :cache + + # The number of times the cache was hit. Only present if memoing is enabled. + attr_reader :cache_hits + + # Returns the length of this input. + def length + string.length end + # Returns an array containing the lines of text in the input. def lines string.send(string.respond_to?(:lines) ? :lines : :to_s).to_a end + # Iterates over the lines of text in the input using the given +block+. def each_line(&block) string.each_line(&block) end - # Returns the 0-based number of the line in the input where the error - # occurred. - def line_index - pos = 0 - idx = 0 + # Returns the 0-based offset of the given +pos+ in the input on the line + # on which it is found. +pos+ defaults to the current pointer position. + def line_offset(pos=pos) + p = 0 each_line do |line| - pos += line.length - return idx if pos >= offset - idx += 1 + len = line.length + return (pos - p) if p + len >= pos + p += len end 0 end + + # Returns the 0-based number of the line that contains the character at the + # given +pos+. +pos+ defaults to the current pointer position. + def line_index(pos=pos) + p, n = 0, 0 + each_line do |line| + p += line.length + return n if p >= pos + n += 1 + end + 0 + end + + # Returns the 1-based number of the line that contains the character at the + # given +pos+. +pos+ defaults to the current pointer position. + def line_number(pos=pos) + line_index(pos) + 1 + end + + alias lineno line_number + + # Returns the text of the line that contains the character at the given + # +pos+. +pos+ defaults to the current pointer position. + def line(pos=pos) + lines[line_index(pos)] + end + + # Returns the match for the given +rule+ at the current pointer position, + # which is +nil+ if no match can be made. + def match(rule) + offset = pos + match = rule.match(self) + + if match + @max_offset = pos if pos > @max_offset + else + # Reset the position for the next attempt at a match. + self.pos = offset unless match + end + + match + end + + # Returns +true+ when using memoization to cache match results. + def memoized? + !! @cache + end + + # Modifies this object to cache match results during a parse. This technique + # (also known as "Packrat" parsing) guarantees parsers will operate in + # linear time but costs significantly more in terms of time and memory + # required to perform a parse. For more information, please read the paper + # on Packrat parsing at http://pdos.csail.mit.edu/~baford/packrat/icfp02/. + def memoize! + return if memoized? + + # Using +instance_eval+ here preserves access to +super+ within the + # methods we define inside the block. + instance_eval do + def match(rule) # :nodoc: + c = @cache[rule.id] ||= {} + + if c.key?(pos) + @cache_hits += 1 + c[pos] + else + c[pos] = super + end + end + + # Resets all internal variables so that this object may be used in + # another parse. + def reset + super + @max_offset = 0 + @cache = {} + @cache_hits = 0 + end + end + + @cache = {} + @cache_hits = 0 + end end # Inclusion of this module into another extends the receiver with the grammar # helper methods in GrammarMethods. Although this module does not actually # provide any methods, constants, or variables to modules that include it, the @@ -359,98 +471,19 @@ :consume => false } end end - # This class represents the core of the parsing algorithm. It wraps the input - # string and serves matches to all nonterminals. - class Input < StringScanner - def initialize(string) - super(string) - @max_offset = 0 - end - - # The maximum offset that has been achieved during a parse. - attr_reader :max_offset - - # A nested hash of rule id's to offsets and their respective matches. Only - # present if memoing is enabled. - attr_reader :cache - - # The number of times the cache was hit. Only present if memoing is enabled. - attr_reader :cache_hits - - # Returns the length of this input. - def length - string.length - end - - # Returns the match for a given +rule+ at the current position in the input. - def match(rule) - offset = pos - match = rule.match(self) - - if match - @max_offset = pos if pos > @max_offset - else - # Reset the position for the next attempt at a match. - self.pos = offset - end - - match - end - - # Returns true if this input uses memoization to cache match results. See - # #memoize!. - def memoized? - !! @cache - end - - # Modifies this object to cache match results during a parse. This technique - # (also known as "Packrat" parsing) guarantees parsers will operate in - # linear time but costs significantly more in terms of time and memory - # required to perform a parse. For more information, please read the paper - # on Packrat parsing at http://pdos.csail.mit.edu/~baford/packrat/icfp02/. - def memoize! - return if memoized? - - # Using +instance_eval+ here preserves access to +super+ within the - # methods we define inside the block. - instance_eval do - def match(rule) - c = @cache[rule.id] ||= {} - - if c.key?(pos) - @cache_hits += 1 - c[pos] - else - c[pos] = super - end - end - - def reset - super - @max_offset = 0 - @cache = {} - @cache_hits = 0 - end - end - - @cache = {} - @cache_hits = 0 - end - end - # A Rule is an object that is used by a grammar to create matches on the # Input during parsing. module Rule # Evaluates the given expression and creates a new rule object from it. # # Citrus::Rule.eval('"a" | "b"') # def self.eval(expr) - File.parse(expr, :root => :rule_body).value + Citrus.parse(expr, :root => :rule_body, :consume => true).value end # Returns a new Rule object depending on the type of object given. def self.new(obj) case obj @@ -666,11 +699,11 @@ # The actual Regexp object this rule uses to match. attr_reader :rule # Returns the Match for this rule on +input+, +nil+ if no match can be made. def match(input) - m = input.scan(@rule) + m = input.scan(rule) create_match(m) if m end # Returns the Citrus notation of this rule as a string. def to_s @@ -1014,11 +1047,12 @@ # Allows sub-matches of this match to be retrieved by name as instance # methods. def method_missing(sym, *args) m = first(sym) return m if m - raise 'No match named "%s" in %s (%s)' % [sym, self, name] + raise NoMatchError, 'No match named "%s" in %s (%s)' % + [sym, self, name || '<anonymous>'] end def to_ary # This method intentionally left blank to work around a bug in Ruby 1.9. end @@ -1035,11 +1069,11 @@ # grammar :Calc do # end # end # def grammar(name, &block) - obj = respond_to?(:const_set) ? self : Object - obj.const_set(name, Citrus::Grammar.new(&block)) + namespace = respond_to?(:const_set) ? self : Object + namespace.const_set(name, Citrus::Grammar.new(&block)) rescue NameError raise ArgumentError, 'Invalid grammar name: %s' % name end end