lib/citrus.rb in citrus-2.3.2 vs lib/citrus.rb in citrus-2.3.3
- old
+ new
@@ -1,32 +1,42 @@
+# encoding: UTF-8
+
require 'strscan'
+require 'pathname'
+require 'citrus/version'
# Citrus is a compact and powerful parsing library for Ruby that combines the
# elegance and expressiveness of the language with the simplicity and power of
# parsing expressions.
#
# http://mjijackson.com/citrus
module Citrus
autoload :File, 'citrus/file'
- # The current version of Citrus as [major, minor, patch].
- VERSION = [2, 3, 2]
-
# A pattern to match any character, including newline.
- DOT = /./m
+ DOT = /./mu
Infinity = 1.0 / 0
CLOSE = -1
- # Returns the current version of Citrus as a string.
- def self.version
- VERSION.join('.')
+ @cache = {}
+
+ # Returns a map of paths of files that have been loaded via #load to the
+ # result of #eval on the code in that file.
+ #
+ # Note: These paths are not absolute unless you pass an absolute path to
+ # #load. That means that if you change the working directory and try to
+ # #require the same file with a different relative path, it will be loaded
+ # twice.
+ def self.cache
+ @cache
end
- # Evaluates the given Citrus parsing expression grammar +code+ in the global
- # scope. Returns an array of any grammar modules that are created.
+ # Evaluates the given Citrus parsing expression grammar +code+ and returns an
+ # array of any grammar modules that are created. Accepts the same +options+ as
+ # GrammarMethods#parse.
#
# Citrus.eval(<<CITRUS)
# grammar MyGrammar
# rule abc
# "abc"
@@ -38,42 +48,92 @@
def self.eval(code, options={})
File.parse(code, options).value
end
# Evaluates the given expression and creates a new Rule object from it.
+ # Accepts the same +options+ as #eval.
#
# Citrus.rule('"a" | "b"')
# # => #<Citrus::Rule: ... >
#
def self.rule(expr, options={})
- File.parse(expr, options.merge(:root => :rule_body)).value
+ eval(expr, options.merge(:root => :expression))
end
- # Loads the grammar from the given +file+ into the global scope using #eval.
+ # Loads the grammar(s) from the given +file+. Accepts the same +options+ as
+ # #eval, plus the following:
#
+ # force:: Normally this method will not reload a file that is already in
+ # the #cache. However, if this option is +true+ the file will be
+ # loaded, regardless of whether or not it is in the cache. Defaults
+ # to +false+.
+ #
# Citrus.load('mygrammar')
# # => [MyGrammar]
#
def self.load(file, options={})
- file << '.citrus' unless ::File.file?(file)
- raise ArgumentError, "Cannot find file #{file}" unless ::File.file?(file)
- raise ArgumentError, "Cannot read file #{file}" unless ::File.readable?(file)
- eval(::File.read(file), options)
+ file += '.citrus' unless file =~ /\.citrus$/
+ force = options.delete(:force)
+
+ if force || !@cache[file]
+ raise LoadError, "Cannot find file #{file}" unless ::File.file?(file)
+ raise LoadError, "Cannot read file #{file}" unless ::File.readable?(file)
+
+ begin
+ @cache[file] = eval(::File.read(file), options)
+ rescue SyntaxError => e
+ e.message.replace("#{::File.expand_path(file)}: #{e.message}")
+ raise e
+ end
+ end
+
+ @cache[file]
end
- # A standard error class that all Citrus errors extend.
+ # Searches the <tt>$LOAD_PATH</tt> for a +file+ with the .citrus suffix and
+ # attempts to load it via #load. Returns the path to the file that was loaded
+ # on success, +nil+ on failure. Accepts the same +options+ as #load.
+ #
+ # path = Citrus.require('mygrammar')
+ # # => "/path/to/mygrammar.citrus"
+ # Citrus.cache[path]
+ # # => [MyGrammar]
+ #
+ def self.require(file, options={})
+ file += '.citrus' unless file =~ /\.citrus$/
+ found = nil
+
+ (Pathname.new(file).absolute? ? [''] : $LOAD_PATH).each do |dir|
+ found = Dir[::File.join(dir, file)].first
+ break if found
+ end
+
+ if found
+ Citrus.load(found, options)
+ else
+ raise LoadError, "Cannot find file #{file}"
+ end
+
+ found
+ end
+
+ # A base class for all Citrus errors.
class Error < RuntimeError; end
# Raised when a parse fails.
class ParseError < Error
# The +input+ given here is an instance of Citrus::Input.
def initialize(input)
@offset = input.max_offset
@line_offset = input.line_offset(offset)
@line_number = input.line_number(offset)
@line = input.line(offset)
- super("Failed to parse input on line #{line_number} at offset #{line_offset}\n#{detail}")
+
+ message = "Failed to parse input on line #{line_number}"
+ message << " at offset #{line_offset}\n#{detail}"
+
+ super(message)
end
# The 0-based offset at which the error occurred in the input, i.e. the
# maximum offset in the input that was successfully parsed before the error
# occurred.
@@ -94,10 +154,24 @@
def detail
"#{line}\n#{' ' * line_offset}^"
end
end
+ # Raised when Citrus.load fails to load a file.
+ class LoadError < Error; end
+
+ # Raised when Citrus::File.parse fails.
+ class SyntaxError < Error
+ # The +error+ given here is an instance of Citrus::ParseError.
+ def initialize(error)
+ message = "Malformed Citrus syntax on line #{error.line_number}"
+ message << " at offset #{error.line_offset}\n#{error.detail}"
+
+ super(message)
+ end
+ end
+
# An Input is a scanner that is responsible for executing rules at different
# positions in the input string and persisting event streams.
class Input < StringScanner
def initialize(string)
super(string)
@@ -170,16 +244,15 @@
def exec(rule, events=[])
position = pos
index = events.size
if apply_rule(rule, position, events).size > index
- position += events[-1]
- @max_offset = position if position > @max_offset
+ @max_offset = pos if pos > @max_offset
+ else
+ self.pos = position
end
- self.pos = position
-
events
end
# Returns the length of a match for the given +rule+ at the current pointer
# position, +nil+ if none can be made.
@@ -258,11 +331,11 @@
# Creates a new anonymous module that includes Grammar. If a +block+ is
# provided, it is +module_eval+'d in the context of the new module. Grammars
# created with this method may be assigned a name by being assigned to some
# constant, e.g.:
#
- # Calc = Citrus::Grammar.new {}
+ # MyGrammar = Citrus::Grammar.new {}
#
def self.new(&block)
mod = Module.new { include Grammar }
mod.module_eval(&block) if block
mod
@@ -282,13 +355,15 @@
def self.extend_object(obj)
raise ArgumentError, "Grammars must be Modules" unless Module === obj
super
end
- # Parses the given +string+ using this grammar's root rule. Optionally, the
- # name of a different rule may be given here as the value of the +:root+
- # option. Otherwise, all options are the same as in Rule#parse.
+ # Parses the given +string+ using this grammar's root rule. Accepts the same
+ # +options+ as Rule#parse, plus the following:
+ #
+ # root:: The name of the root rule to start parsing at. Defaults to this
+ # grammar's #root.
def parse(string, options={})
rule_name = options.delete(:root) || root
raise Error, "No root rule specified" unless rule_name
rule = rule(rule_name)
raise Error, "No rule named \"#{rule_name}\"" unless rule
@@ -305,12 +380,11 @@
def included_grammars
included_modules.select {|mod| mod.include?(Grammar) }
end
# Returns an array of all names of rules in this grammar as symbols ordered
- # in the same way they were defined (i.e. rules that were defined later
- # appear later in the array).
+ # in the same way they were declared.
def rule_names
@rule_names ||= []
end
# Returns a hash of all Rule objects in this grammar, keyed by rule name.
@@ -368,11 +442,10 @@
rules[sym] = rule
end
rules[sym] || super_rule(sym)
rescue => e
- # This preserves the backtrace.
e.message.replace("Cannot create rule \"#{name}\": #{e.message}")
raise e
end
# Gets/sets the +name+ of the root rule of this grammar. If no root rule is
@@ -445,11 +518,11 @@
# specify semantic behavior (via #ext).
def any(*args, &block)
ext(Choice.new(args), block)
end
- # Adds +label+ to the given +rule+.A block may be provided to specify
+ # Adds +label+ to the given +rule+. A block may be provided to specify
# semantic behavior (via #ext).
def label(rule, label, &block)
rule = ext(rule, block)
rule.label = label
rule
@@ -489,11 +562,11 @@
else
raise ArgumentError, "Invalid rule object: #{obj.inspect}"
end
end
- # The grammar this rule belongs to.
+ # The grammar this rule belongs to, if any.
attr_accessor :grammar
# Sets the name of this rule.
def name=(name)
@name = name.to_sym
@@ -526,11 +599,11 @@
end
# The module this rule uses to extend new matches.
attr_reader :extension
- # The default set of options to use when calling #parse or #test.
+ # The default set of options to use when calling #parse.
def default_options # :nodoc:
{ :consume => true,
:memoize => false,
:offset => 0
}
@@ -547,23 +620,18 @@
# offset:: The offset in +string+ at which to start parsing. Defaults
# to 0.
def parse(string, options={})
opts = default_options.merge(options)
- input = if opts[:memoize]
- MemoizedInput.new(string)
- else
- Input.new(string)
- end
-
+ input = (opts[:memoize] ? MemoizedInput : Input).new(string)
input.pos = opts[:offset] if opts[:offset] > 0
events = input.exec(self)
length = events[-1]
if !length || (opts[:consume] && length < (string.length - opts[:offset]))
- raise ParseError.new(input)
+ raise ParseError, input
end
Match.new(string.slice(opts[:offset], length), events)
end
@@ -621,12 +689,10 @@
else
super
end
end
- alias_method :eql?, :==
-
def inspect # :nodoc:
to_s
end
def extend_match(match) # :nodoc:
@@ -634,12 +700,12 @@
end
end
# A Proxy is a Rule that is a placeholder for another rule. It stores the
# name of some other rule in the grammar internally and resolves it to the
- # actual Rule object at runtime. This lazy evaluation permits us to create
- # Proxy objects for rules that we may not know the definition of yet.
+ # actual Rule object at runtime. This lazy evaluation permits creation of
+ # Proxy objects for rules that may not yet be defined.
module Proxy
include Rule
def initialize(rule_name='<proxy>')
self.rule_name = rule_name
@@ -705,12 +771,11 @@
# this proxy's #rule_name. Raises an error if one cannot be found.
def resolve!
rule = grammar.rule(rule_name)
unless rule
- raise RuntimeError,
- "No rule named \"#{rule_name}\" in grammar #{grammar.name}"
+ raise Error, "No rule named \"#{rule_name}\" in grammar #{grammar}"
end
rule
end
end
@@ -736,12 +801,12 @@
# #rule_name. Raises an error if one cannot be found.
def resolve!
rule = grammar.super_rule(rule_name)
unless rule
- raise RuntimeError,
- "No rule named \"#{rule_name}\" in hierarchy of grammar #{grammar.name}"
+ raise Error,
+ "No rule named \"#{rule_name}\" in hierarchy of grammar #{grammar}"
end
rule
end
end
@@ -771,16 +836,16 @@
# The actual Regexp object this rule uses to match.
attr_reader :regexp
# Returns an array of events for this rule on the given +input+.
def exec(input, events=[])
- length = input.scan_full(@regexp, false, false)
+ match = input.scan(@regexp)
- if length
+ if match
events << self
events << CLOSE
- events << length
+ events << match.length
end
events
end
@@ -1009,11 +1074,12 @@
include Nonterminal
def initialize(rule='', min=1, max=Infinity)
raise ArgumentError, "Min cannot be greater than max" if min > max
super([rule])
- @range = Range.new(min, max)
+ @min = min
+ @max = max
end
# Returns the Rule object this rule uses to match.
def rule
rules[0]
@@ -1024,13 +1090,12 @@
events << self
index = events.size
start = index - 1
length = n = 0
- m = max
- while n < m && input.exec(rule, events).size > index
+ while n < max && input.exec(rule, events).size > index
length += events[-1]
index = events.size
n += 1
end
@@ -1043,18 +1108,14 @@
events
end
# The minimum number of times this rule must match.
- def min
- @range.begin
- end
+ attr_reader :min
# The maximum number of times this rule may match.
- def max
- @range.end
- end
+ attr_reader :max
# Returns the operator this rule uses as a string. Will be one of
# <tt>+</tt>, <tt>?</tt>, or <tt>N*M</tt>.
def operator
@operator ||= case [min, max]
@@ -1168,18 +1229,21 @@
elisions = []
while events[0].elide?
elisions.unshift(events.shift)
- events = events.slice(0, events.length - 2)
+ events.slice!(-2, events.length)
end
events[0].extend_match(self)
elisions.each do |rule|
rule.extend_match(self)
end
+ else
+ # Create a default stream of events for the given string.
+ events = [Rule.for(string), CLOSE, string.length]
end
@events = events
end
@@ -1192,127 +1256,67 @@
end
# Returns a hash of capture names to arrays of matches with that name,
# in the order they appeared in the input.
def captures
- @captures ||= begin
- captures = {}
- stack = []
- offset = 0
- close = false
- index = 0
- last_length = nil
- in_proxy = false
- count = 0
-
- while index < @events.size
- event = @events[index]
-
- if close
- start = stack.pop
-
- if Rule === start
- rule = start
- os = stack.pop
- start = stack.pop
-
- match = Match.new(@string.slice(os, event), @events[start..index])
-
- # We can lookup immediate submatches by their index.
- if stack.size == 1
- captures[count] = match
- count += 1
- end
-
- # We can lookup matches that were created by proxy by the name of
- # the rule they are proxy for.
- if Proxy === rule
- if captures[rule.rule_name]
- captures[rule.rule_name] << match
- else
- captures[rule.rule_name] = [match]
- end
- end
-
- # We can lookup matches that were created by rules with labels by
- # that label.
- if rule.label
- if captures[rule.label]
- captures[rule.label] << match
- else
- captures[rule.label] = [match]
- end
- end
-
- in_proxy = false
- end
-
- unless last_length
- last_length = event
- end
-
- close = false
- elsif event == CLOSE
- close = true
- else
- stack << index
-
- # We can calculate the offset of this rule event by adding back the
- # last match length.
- if last_length
- offset += last_length
- last_length = nil
- end
-
- # We should not create captures when traversing the portion of the
- # event stream that is masked by a proxy in the original rule
- # definition.
- unless in_proxy || stack.size == 1
- stack << offset
- stack << event
- in_proxy = true if Proxy === event
- end
- end
-
- index += 1
- end
-
- captures
- end
+ process_events! unless @captures
+ @captures
end
# Returns an array of all immediate submatches of this match.
def matches
- @matches ||= (0...captures.size).map {|n| captures[n] }.compact
+ process_events! unless @matches
+ @matches
end
# A shortcut for retrieving the first immediate submatch of this match.
def first
- captures[0]
+ matches.first
end
- # The default value for a match is its string value. This method is
- # overridden in most cases to be more meaningful according to the desired
- # interpretation.
- alias_method :value, :to_s
-
# Allows methods of this match's string to be called directly and provides
# a convenient interface for retrieving the first match with a given name.
def method_missing(sym, *args, &block)
if @string.respond_to?(sym)
@string.__send__(sym, *args, &block)
else
- captures[sym].first if captures[sym]
+ captures[sym].first
end
end
def to_s
@string
end
alias_method :to_str, :to_s
+ # The default value for a match is its string value. This method is
+ # overridden in most cases to be more meaningful according to the desired
+ # interpretation.
+ alias_method :value, :to_s
+
+ # Returns this match plus all sub #matches in an array.
+ def to_a
+ [captures[0]] + matches
+ end
+
+ alias_method :to_ary, :to_a
+
+ # Returns the capture at the given +key+. If it is an Integer (and an
+ # optional length) or a Range, the result of #to_a with the same arguments
+ # is returned. Otherwise, the value at +key+ in #captures is returned.
+ def [](key, *args)
+ case key
+ when Integer, Range
+ to_a[key, *args]
+ else
+ captures[key]
+ end
+ end
+
+ alias_method :fetch, :[]
+
def ==(other)
case other
when String
@string == other
when Match
@@ -1320,12 +1324,10 @@
else
super
end
end
- alias_method :eql?, :==
-
def inspect
@string.inspect
end
# Prints the entire subtree of this match using the given +indent+ to
@@ -1348,13 +1350,11 @@
space = indent * (stack.size / 3)
string = @string.slice(os, event)
lines[start] = "#{space}#{string.inspect} rule=#{rule}, offset=#{os}, length=#{event}"
- unless last_length
- last_length = event
- end
+ last_length = event unless last_length
close = false
elsif event == CLOSE
close = true
else
@@ -1371,14 +1371,124 @@
index += 1
end
puts lines.compact.join("\n")
end
+
+ private
+
+ # Initializes both the @captures and @matches instance variables.
+ def process_events!
+ @captures = captures_hash
+ @matches = []
+
+ capture!(@events[0], self)
+
+ stack = []
+ offset = 0
+ close = false
+ index = 0
+ last_length = nil
+ capture = true
+
+ while index < @events.size
+ event = @events[index]
+
+ if close
+ start = stack.pop
+
+ if Rule === start
+ rule = start
+ os = stack.pop
+ start = stack.pop
+
+ match = Match.new(@string.slice(os, event), @events[start..index])
+ capture!(rule, match)
+
+ @matches << match if stack.size == 1
+
+ capture = true
+ end
+
+ last_length = event unless last_length
+
+ close = false
+ elsif event == CLOSE
+ close = true
+ else
+ stack << index
+
+ # We can calculate the offset of this rule event by adding back the
+ # last match length.
+ if last_length
+ offset += last_length
+ last_length = nil
+ end
+
+ if capture && stack.size != 1
+ stack << offset
+ stack << event
+
+ # We should not create captures when traversing a portion of the
+ # event stream that is masked by a proxy in the original rule
+ # definition.
+ capture = false if Proxy === event
+ end
+ end
+
+ index += 1
+ end
+
+ # Add numeric indices to @captures.
+ @captures[0] = self
+
+ @matches.each_with_index do |match, index|
+ @captures[index + 1] = match
+ end
+ end
+
+ def capture!(rule, match)
+ # We can lookup matches that were created by proxy by the name of
+ # the rule they are proxy for.
+ if Proxy === rule
+ if @captures.key?(rule.rule_name)
+ @captures[rule.rule_name] << match
+ else
+ @captures[rule.rule_name] = [match]
+ end
+ end
+
+ # We can lookup matches that were created by rules with labels by
+ # that label.
+ if rule.label
+ if @captures.key?(rule.label)
+ @captures[rule.label] << match
+ else
+ @captures[rule.label] = [match]
+ end
+ end
+ end
+
+ # Returns a new Hash that is to be used for @captures. This hash normalizes
+ # String keys to Symbols, returns +nil+ for unknown Numeric keys, and an
+ # empty Array for all other unknown keys.
+ def captures_hash
+ Hash.new do |hash, key|
+ case key
+ when String
+ hash[key.to_sym]
+ when Numeric
+ nil
+ else
+ []
+ end
+ end
+ end
end
end
class Object
- # A sugar method for creating grammars.
+ # A sugar method for creating Citrus grammars from any namespace.
#
# grammar :Calc do
# end
#
# module MyModule