lib/citrus.rb in citrus-2.0.1 vs lib/citrus.rb in citrus-2.1.1
- old
+ new
@@ -6,11 +6,11 @@
#
# http://mjijackson.com/citrus
module Citrus
autoload :File, 'citrus/file'
- VERSION = [2, 0, 1]
+ VERSION = [2, 1, 1]
# Returns the current version of Citrus as a string.
def self.version
VERSION.join('.')
end
@@ -25,96 +25,208 @@
# Loads the grammar from the given +file+ into the global scope using #eval.
def self.load(file)
file << '.citrus' unless F.file?(file)
raise "Cannot find file #{file}" unless F.file?(file)
raise "Cannot read file #{file}" unless F.readable?(file)
- self.eval(F.read(file))
+ eval(F.read(file))
end
# Evaluates the given Citrus parsing expression grammar +code+ in the global
- # scope. The +code+ may contain the definition of any number of modules.
- # Returns an array of any grammar modules that are created.
+ # scope. Returns an array of any grammar modules that are created. Implicitly
+ # raises +SyntaxError+ on a failed parse.
def self.eval(code)
- File.parse(code).value
+ parse(code, :consume => true).value
end
- # This error is raised whenever a parse fails.
- class ParseError < Exception
- def initialize(input)
- @input = input
- msg = "Failed to parse input at offset %d\n" % offset
- msg << detail
- super(msg)
+ # Parses the given Citrus +code+ using the given +options+. Returns the
+ # generated match tree. Raises a +SyntaxError+ if the parse fails.
+ def self.parse(code, options={})
+ begin
+ File.parse(code, options)
+ rescue ParseError => e
+ raise SyntaxError.new(e)
end
+ end
- # The Input object that was used for the parse.
- attr_reader :input
+ # A standard error class that all Citrus errors extend.
+ class Error < RuntimeError; end
- # Returns the 0-based offset at which the error occurred in the input, i.e.
- # the maximum offset in the input that was successfully parsed before the
- # error occurred.
- def offset
- input.max_offset
+ # Raised when there is an error parsing Citrus code.
+ class SyntaxError < Error
+ # The +error+ given here should be a +ParseError+ object.
+ def initialize(error)
+ msg = "Syntax error on line %d at offset %d\n%s" %
+ [error.line_number, error.line_offset, error.detail]
+ super(msg)
end
+ end
- # Returns the text of the line on which the error occurred.
- def line
- lines[line_index]
+ # Raised when a match cannot be found.
+ class NoMatchError < Error; end
+
+ # Raised when a parse fails.
+ class ParseError < Error
+ # The +input+ given here is an instance of Citrus::Input.
+ def initialize(input)
+ @offset = input.max_offset
+ @line_offset = input.line_offset(offset)
+ @line_number = input.line_number(offset)
+ @line = input.line(offset)
+ msg = "Failed to parse input at offset %d\n" % offset
+ msg << detail
+ super(msg)
end
- # Returns the 1-based number of the line in the input where the error
+ # The 0-based offset at which the error occurred in the input, i.e. the
+ # maximum offset in the input that was successfully parsed before the error
# occurred.
- def line_number
- line_index + 1
- end
+ attr_reader :offset
- alias lineno line_number
+ # The 0-based offset at which the error occurred on the line on which it
+ # occurred in the input.
+ attr_reader :line_offset
- # Returns the 0-based offset at which the error occurred on the line on
- # which it occurred.
- def line_offset
- pos = 0
- each_line do |line|
- len = line.length
- return (offset - pos) if pos + len >= offset
- pos += len
- end
- 0
- end
+ # The 1-based number of the line in the input where the error occurred.
+ attr_reader :line_number
+ # The text of the line in the input where the error occurred.
+ attr_reader :line
+
# Returns a string that, when printed, gives a visual representation of
# exactly where the error occurred on its line in the input.
def detail
"%s\n%s^" % [line, ' ' * line_offset]
end
+ end
- private
+ # This class represents the core of the parsing algorithm. It wraps the input
+ # string and serves matches to all nonterminals.
+ class Input < StringScanner
+ def initialize(string)
+ super(string)
+ @max_offset = 0
+ end
- def string
- input.string
+ # The maximum offset that has been achieved during a parse.
+ attr_reader :max_offset
+
+ # A nested hash of rule id's to offsets and their respective matches. Only
+ # present if memoing is enabled.
+ attr_reader :cache
+
+ # The number of times the cache was hit. Only present if memoing is enabled.
+ attr_reader :cache_hits
+
+ # Returns the length of this input.
+ def length
+ string.length
end
+ # Returns an array containing the lines of text in the input.
def lines
string.send(string.respond_to?(:lines) ? :lines : :to_s).to_a
end
+ # Iterates over the lines of text in the input using the given +block+.
def each_line(&block)
string.each_line(&block)
end
- # Returns the 0-based number of the line in the input where the error
- # occurred.
- def line_index
- pos = 0
- idx = 0
+ # Returns the 0-based offset of the given +pos+ in the input on the line
+ # on which it is found. +pos+ defaults to the current pointer position.
+ def line_offset(pos=pos)
+ p = 0
each_line do |line|
- pos += line.length
- return idx if pos >= offset
- idx += 1
+ len = line.length
+ return (pos - p) if p + len >= pos
+ p += len
end
0
end
+
+ # Returns the 0-based number of the line that contains the character at the
+ # given +pos+. +pos+ defaults to the current pointer position.
+ def line_index(pos=pos)
+ p, n = 0, 0
+ each_line do |line|
+ p += line.length
+ return n if p >= pos
+ n += 1
+ end
+ 0
+ end
+
+ # Returns the 1-based number of the line that contains the character at the
+ # given +pos+. +pos+ defaults to the current pointer position.
+ def line_number(pos=pos)
+ line_index(pos) + 1
+ end
+
+ alias lineno line_number
+
+ # Returns the text of the line that contains the character at the given
+ # +pos+. +pos+ defaults to the current pointer position.
+ def line(pos=pos)
+ lines[line_index(pos)]
+ end
+
+ # Returns the match for the given +rule+ at the current pointer position,
+ # which is +nil+ if no match can be made.
+ def match(rule)
+ offset = pos
+ match = rule.match(self)
+
+ if match
+ @max_offset = pos if pos > @max_offset
+ else
+ # Reset the position for the next attempt at a match.
+ self.pos = offset unless match
+ end
+
+ match
+ end
+
+ # Returns +true+ when using memoization to cache match results.
+ def memoized?
+ !! @cache
+ end
+
+ # Modifies this object to cache match results during a parse. This technique
+ # (also known as "Packrat" parsing) guarantees parsers will operate in
+ # linear time but costs significantly more in terms of time and memory
+ # required to perform a parse. For more information, please read the paper
+ # on Packrat parsing at http://pdos.csail.mit.edu/~baford/packrat/icfp02/.
+ def memoize!
+ return if memoized?
+
+ # Using +instance_eval+ here preserves access to +super+ within the
+ # methods we define inside the block.
+ instance_eval do
+ def match(rule) # :nodoc:
+ c = @cache[rule.id] ||= {}
+
+ if c.key?(pos)
+ @cache_hits += 1
+ c[pos]
+ else
+ c[pos] = super
+ end
+ end
+
+ # Resets all internal variables so that this object may be used in
+ # another parse.
+ def reset
+ super
+ @max_offset = 0
+ @cache = {}
+ @cache_hits = 0
+ end
+ end
+
+ @cache = {}
+ @cache_hits = 0
+ end
end
# Inclusion of this module into another extends the receiver with the grammar
# helper methods in GrammarMethods. Although this module does not actually
# provide any methods, constants, or variables to modules that include it, the
@@ -359,98 +471,19 @@
:consume => false
}
end
end
- # This class represents the core of the parsing algorithm. It wraps the input
- # string and serves matches to all nonterminals.
- class Input < StringScanner
- def initialize(string)
- super(string)
- @max_offset = 0
- end
-
- # The maximum offset that has been achieved during a parse.
- attr_reader :max_offset
-
- # A nested hash of rule id's to offsets and their respective matches. Only
- # present if memoing is enabled.
- attr_reader :cache
-
- # The number of times the cache was hit. Only present if memoing is enabled.
- attr_reader :cache_hits
-
- # Returns the length of this input.
- def length
- string.length
- end
-
- # Returns the match for a given +rule+ at the current position in the input.
- def match(rule)
- offset = pos
- match = rule.match(self)
-
- if match
- @max_offset = pos if pos > @max_offset
- else
- # Reset the position for the next attempt at a match.
- self.pos = offset
- end
-
- match
- end
-
- # Returns true if this input uses memoization to cache match results. See
- # #memoize!.
- def memoized?
- !! @cache
- end
-
- # Modifies this object to cache match results during a parse. This technique
- # (also known as "Packrat" parsing) guarantees parsers will operate in
- # linear time but costs significantly more in terms of time and memory
- # required to perform a parse. For more information, please read the paper
- # on Packrat parsing at http://pdos.csail.mit.edu/~baford/packrat/icfp02/.
- def memoize!
- return if memoized?
-
- # Using +instance_eval+ here preserves access to +super+ within the
- # methods we define inside the block.
- instance_eval do
- def match(rule)
- c = @cache[rule.id] ||= {}
-
- if c.key?(pos)
- @cache_hits += 1
- c[pos]
- else
- c[pos] = super
- end
- end
-
- def reset
- super
- @max_offset = 0
- @cache = {}
- @cache_hits = 0
- end
- end
-
- @cache = {}
- @cache_hits = 0
- end
- end
-
# A Rule is an object that is used by a grammar to create matches on the
# Input during parsing.
module Rule
# Evaluates the given expression and creates a new rule object from it.
#
# Citrus::Rule.eval('"a" | "b"')
#
def self.eval(expr)
- File.parse(expr, :root => :rule_body).value
+ Citrus.parse(expr, :root => :rule_body, :consume => true).value
end
# Returns a new Rule object depending on the type of object given.
def self.new(obj)
case obj
@@ -666,11 +699,11 @@
# The actual Regexp object this rule uses to match.
attr_reader :rule
# Returns the Match for this rule on +input+, +nil+ if no match can be made.
def match(input)
- m = input.scan(@rule)
+ m = input.scan(rule)
create_match(m) if m
end
# Returns the Citrus notation of this rule as a string.
def to_s
@@ -1014,11 +1047,12 @@
# Allows sub-matches of this match to be retrieved by name as instance
# methods.
def method_missing(sym, *args)
m = first(sym)
return m if m
- raise 'No match named "%s" in %s (%s)' % [sym, self, name]
+ raise NoMatchError, 'No match named "%s" in %s (%s)' %
+ [sym, self, name || '<anonymous>']
end
def to_ary
# This method intentionally left blank to work around a bug in Ruby 1.9.
end
@@ -1035,11 +1069,11 @@
# grammar :Calc do
# end
# end
#
def grammar(name, &block)
- obj = respond_to?(:const_set) ? self : Object
- obj.const_set(name, Citrus::Grammar.new(&block))
+ namespace = respond_to?(:const_set) ? self : Object
+ namespace.const_set(name, Citrus::Grammar.new(&block))
rescue NameError
raise ArgumentError, 'Invalid grammar name: %s' % name
end
end