lib/citrus.rb in citrus-2.3.2 vs lib/citrus.rb in citrus-2.3.3

- old
+ new

@@ -1,32 +1,42 @@ +# encoding: UTF-8 + require 'strscan' +require 'pathname' +require 'citrus/version' # Citrus is a compact and powerful parsing library for Ruby that combines the # elegance and expressiveness of the language with the simplicity and power of # parsing expressions. # # http://mjijackson.com/citrus module Citrus autoload :File, 'citrus/file' - # The current version of Citrus as [major, minor, patch]. - VERSION = [2, 3, 2] - # A pattern to match any character, including newline. - DOT = /./m + DOT = /./mu Infinity = 1.0 / 0 CLOSE = -1 - # Returns the current version of Citrus as a string. - def self.version - VERSION.join('.') + @cache = {} + + # Returns a map of paths of files that have been loaded via #load to the + # result of #eval on the code in that file. + # + # Note: These paths are not absolute unless you pass an absolute path to + # #load. That means that if you change the working directory and try to + # #require the same file with a different relative path, it will be loaded + # twice. + def self.cache + @cache end - # Evaluates the given Citrus parsing expression grammar +code+ in the global - # scope. Returns an array of any grammar modules that are created. + # Evaluates the given Citrus parsing expression grammar +code+ and returns an + # array of any grammar modules that are created. Accepts the same +options+ as + # GrammarMethods#parse. # # Citrus.eval(<<CITRUS) # grammar MyGrammar # rule abc # "abc" @@ -38,42 +48,92 @@ def self.eval(code, options={}) File.parse(code, options).value end # Evaluates the given expression and creates a new Rule object from it. + # Accepts the same +options+ as #eval. # # Citrus.rule('"a" | "b"') # # => #<Citrus::Rule: ... > # def self.rule(expr, options={}) - File.parse(expr, options.merge(:root => :rule_body)).value + eval(expr, options.merge(:root => :expression)) end - # Loads the grammar from the given +file+ into the global scope using #eval. + # Loads the grammar(s) from the given +file+. Accepts the same +options+ as + # #eval, plus the following: # + # force:: Normally this method will not reload a file that is already in + # the #cache. However, if this option is +true+ the file will be + # loaded, regardless of whether or not it is in the cache. Defaults + # to +false+. + # # Citrus.load('mygrammar') # # => [MyGrammar] # def self.load(file, options={}) - file << '.citrus' unless ::File.file?(file) - raise ArgumentError, "Cannot find file #{file}" unless ::File.file?(file) - raise ArgumentError, "Cannot read file #{file}" unless ::File.readable?(file) - eval(::File.read(file), options) + file += '.citrus' unless file =~ /\.citrus$/ + force = options.delete(:force) + + if force || !@cache[file] + raise LoadError, "Cannot find file #{file}" unless ::File.file?(file) + raise LoadError, "Cannot read file #{file}" unless ::File.readable?(file) + + begin + @cache[file] = eval(::File.read(file), options) + rescue SyntaxError => e + e.message.replace("#{::File.expand_path(file)}: #{e.message}") + raise e + end + end + + @cache[file] end - # A standard error class that all Citrus errors extend. + # Searches the <tt>$LOAD_PATH</tt> for a +file+ with the .citrus suffix and + # attempts to load it via #load. Returns the path to the file that was loaded + # on success, +nil+ on failure. Accepts the same +options+ as #load. + # + # path = Citrus.require('mygrammar') + # # => "/path/to/mygrammar.citrus" + # Citrus.cache[path] + # # => [MyGrammar] + # + def self.require(file, options={}) + file += '.citrus' unless file =~ /\.citrus$/ + found = nil + + (Pathname.new(file).absolute? ? [''] : $LOAD_PATH).each do |dir| + found = Dir[::File.join(dir, file)].first + break if found + end + + if found + Citrus.load(found, options) + else + raise LoadError, "Cannot find file #{file}" + end + + found + end + + # A base class for all Citrus errors. class Error < RuntimeError; end # Raised when a parse fails. class ParseError < Error # The +input+ given here is an instance of Citrus::Input. def initialize(input) @offset = input.max_offset @line_offset = input.line_offset(offset) @line_number = input.line_number(offset) @line = input.line(offset) - super("Failed to parse input on line #{line_number} at offset #{line_offset}\n#{detail}") + + message = "Failed to parse input on line #{line_number}" + message << " at offset #{line_offset}\n#{detail}" + + super(message) end # The 0-based offset at which the error occurred in the input, i.e. the # maximum offset in the input that was successfully parsed before the error # occurred. @@ -94,10 +154,24 @@ def detail "#{line}\n#{' ' * line_offset}^" end end + # Raised when Citrus.load fails to load a file. + class LoadError < Error; end + + # Raised when Citrus::File.parse fails. + class SyntaxError < Error + # The +error+ given here is an instance of Citrus::ParseError. + def initialize(error) + message = "Malformed Citrus syntax on line #{error.line_number}" + message << " at offset #{error.line_offset}\n#{error.detail}" + + super(message) + end + end + # An Input is a scanner that is responsible for executing rules at different # positions in the input string and persisting event streams. class Input < StringScanner def initialize(string) super(string) @@ -170,16 +244,15 @@ def exec(rule, events=[]) position = pos index = events.size if apply_rule(rule, position, events).size > index - position += events[-1] - @max_offset = position if position > @max_offset + @max_offset = pos if pos > @max_offset + else + self.pos = position end - self.pos = position - events end # Returns the length of a match for the given +rule+ at the current pointer # position, +nil+ if none can be made. @@ -258,11 +331,11 @@ # Creates a new anonymous module that includes Grammar. If a +block+ is # provided, it is +module_eval+'d in the context of the new module. Grammars # created with this method may be assigned a name by being assigned to some # constant, e.g.: # - # Calc = Citrus::Grammar.new {} + # MyGrammar = Citrus::Grammar.new {} # def self.new(&block) mod = Module.new { include Grammar } mod.module_eval(&block) if block mod @@ -282,13 +355,15 @@ def self.extend_object(obj) raise ArgumentError, "Grammars must be Modules" unless Module === obj super end - # Parses the given +string+ using this grammar's root rule. Optionally, the - # name of a different rule may be given here as the value of the +:root+ - # option. Otherwise, all options are the same as in Rule#parse. + # Parses the given +string+ using this grammar's root rule. Accepts the same + # +options+ as Rule#parse, plus the following: + # + # root:: The name of the root rule to start parsing at. Defaults to this + # grammar's #root. def parse(string, options={}) rule_name = options.delete(:root) || root raise Error, "No root rule specified" unless rule_name rule = rule(rule_name) raise Error, "No rule named \"#{rule_name}\"" unless rule @@ -305,12 +380,11 @@ def included_grammars included_modules.select {|mod| mod.include?(Grammar) } end # Returns an array of all names of rules in this grammar as symbols ordered - # in the same way they were defined (i.e. rules that were defined later - # appear later in the array). + # in the same way they were declared. def rule_names @rule_names ||= [] end # Returns a hash of all Rule objects in this grammar, keyed by rule name. @@ -368,11 +442,10 @@ rules[sym] = rule end rules[sym] || super_rule(sym) rescue => e - # This preserves the backtrace. e.message.replace("Cannot create rule \"#{name}\": #{e.message}") raise e end # Gets/sets the +name+ of the root rule of this grammar. If no root rule is @@ -445,11 +518,11 @@ # specify semantic behavior (via #ext). def any(*args, &block) ext(Choice.new(args), block) end - # Adds +label+ to the given +rule+.A block may be provided to specify + # Adds +label+ to the given +rule+. A block may be provided to specify # semantic behavior (via #ext). def label(rule, label, &block) rule = ext(rule, block) rule.label = label rule @@ -489,11 +562,11 @@ else raise ArgumentError, "Invalid rule object: #{obj.inspect}" end end - # The grammar this rule belongs to. + # The grammar this rule belongs to, if any. attr_accessor :grammar # Sets the name of this rule. def name=(name) @name = name.to_sym @@ -526,11 +599,11 @@ end # The module this rule uses to extend new matches. attr_reader :extension - # The default set of options to use when calling #parse or #test. + # The default set of options to use when calling #parse. def default_options # :nodoc: { :consume => true, :memoize => false, :offset => 0 } @@ -547,23 +620,18 @@ # offset:: The offset in +string+ at which to start parsing. Defaults # to 0. def parse(string, options={}) opts = default_options.merge(options) - input = if opts[:memoize] - MemoizedInput.new(string) - else - Input.new(string) - end - + input = (opts[:memoize] ? MemoizedInput : Input).new(string) input.pos = opts[:offset] if opts[:offset] > 0 events = input.exec(self) length = events[-1] if !length || (opts[:consume] && length < (string.length - opts[:offset])) - raise ParseError.new(input) + raise ParseError, input end Match.new(string.slice(opts[:offset], length), events) end @@ -621,12 +689,10 @@ else super end end - alias_method :eql?, :== - def inspect # :nodoc: to_s end def extend_match(match) # :nodoc: @@ -634,12 +700,12 @@ end end # A Proxy is a Rule that is a placeholder for another rule. It stores the # name of some other rule in the grammar internally and resolves it to the - # actual Rule object at runtime. This lazy evaluation permits us to create - # Proxy objects for rules that we may not know the definition of yet. + # actual Rule object at runtime. This lazy evaluation permits creation of + # Proxy objects for rules that may not yet be defined. module Proxy include Rule def initialize(rule_name='<proxy>') self.rule_name = rule_name @@ -705,12 +771,11 @@ # this proxy's #rule_name. Raises an error if one cannot be found. def resolve! rule = grammar.rule(rule_name) unless rule - raise RuntimeError, - "No rule named \"#{rule_name}\" in grammar #{grammar.name}" + raise Error, "No rule named \"#{rule_name}\" in grammar #{grammar}" end rule end end @@ -736,12 +801,12 @@ # #rule_name. Raises an error if one cannot be found. def resolve! rule = grammar.super_rule(rule_name) unless rule - raise RuntimeError, - "No rule named \"#{rule_name}\" in hierarchy of grammar #{grammar.name}" + raise Error, + "No rule named \"#{rule_name}\" in hierarchy of grammar #{grammar}" end rule end end @@ -771,16 +836,16 @@ # The actual Regexp object this rule uses to match. attr_reader :regexp # Returns an array of events for this rule on the given +input+. def exec(input, events=[]) - length = input.scan_full(@regexp, false, false) + match = input.scan(@regexp) - if length + if match events << self events << CLOSE - events << length + events << match.length end events end @@ -1009,11 +1074,12 @@ include Nonterminal def initialize(rule='', min=1, max=Infinity) raise ArgumentError, "Min cannot be greater than max" if min > max super([rule]) - @range = Range.new(min, max) + @min = min + @max = max end # Returns the Rule object this rule uses to match. def rule rules[0] @@ -1024,13 +1090,12 @@ events << self index = events.size start = index - 1 length = n = 0 - m = max - while n < m && input.exec(rule, events).size > index + while n < max && input.exec(rule, events).size > index length += events[-1] index = events.size n += 1 end @@ -1043,18 +1108,14 @@ events end # The minimum number of times this rule must match. - def min - @range.begin - end + attr_reader :min # The maximum number of times this rule may match. - def max - @range.end - end + attr_reader :max # Returns the operator this rule uses as a string. Will be one of # <tt>+</tt>, <tt>?</tt>, or <tt>N*M</tt>. def operator @operator ||= case [min, max] @@ -1168,18 +1229,21 @@ elisions = [] while events[0].elide? elisions.unshift(events.shift) - events = events.slice(0, events.length - 2) + events.slice!(-2, events.length) end events[0].extend_match(self) elisions.each do |rule| rule.extend_match(self) end + else + # Create a default stream of events for the given string. + events = [Rule.for(string), CLOSE, string.length] end @events = events end @@ -1192,127 +1256,67 @@ end # Returns a hash of capture names to arrays of matches with that name, # in the order they appeared in the input. def captures - @captures ||= begin - captures = {} - stack = [] - offset = 0 - close = false - index = 0 - last_length = nil - in_proxy = false - count = 0 - - while index < @events.size - event = @events[index] - - if close - start = stack.pop - - if Rule === start - rule = start - os = stack.pop - start = stack.pop - - match = Match.new(@string.slice(os, event), @events[start..index]) - - # We can lookup immediate submatches by their index. - if stack.size == 1 - captures[count] = match - count += 1 - end - - # We can lookup matches that were created by proxy by the name of - # the rule they are proxy for. - if Proxy === rule - if captures[rule.rule_name] - captures[rule.rule_name] << match - else - captures[rule.rule_name] = [match] - end - end - - # We can lookup matches that were created by rules with labels by - # that label. - if rule.label - if captures[rule.label] - captures[rule.label] << match - else - captures[rule.label] = [match] - end - end - - in_proxy = false - end - - unless last_length - last_length = event - end - - close = false - elsif event == CLOSE - close = true - else - stack << index - - # We can calculate the offset of this rule event by adding back the - # last match length. - if last_length - offset += last_length - last_length = nil - end - - # We should not create captures when traversing the portion of the - # event stream that is masked by a proxy in the original rule - # definition. - unless in_proxy || stack.size == 1 - stack << offset - stack << event - in_proxy = true if Proxy === event - end - end - - index += 1 - end - - captures - end + process_events! unless @captures + @captures end # Returns an array of all immediate submatches of this match. def matches - @matches ||= (0...captures.size).map {|n| captures[n] }.compact + process_events! unless @matches + @matches end # A shortcut for retrieving the first immediate submatch of this match. def first - captures[0] + matches.first end - # The default value for a match is its string value. This method is - # overridden in most cases to be more meaningful according to the desired - # interpretation. - alias_method :value, :to_s - # Allows methods of this match's string to be called directly and provides # a convenient interface for retrieving the first match with a given name. def method_missing(sym, *args, &block) if @string.respond_to?(sym) @string.__send__(sym, *args, &block) else - captures[sym].first if captures[sym] + captures[sym].first end end def to_s @string end alias_method :to_str, :to_s + # The default value for a match is its string value. This method is + # overridden in most cases to be more meaningful according to the desired + # interpretation. + alias_method :value, :to_s + + # Returns this match plus all sub #matches in an array. + def to_a + [captures[0]] + matches + end + + alias_method :to_ary, :to_a + + # Returns the capture at the given +key+. If it is an Integer (and an + # optional length) or a Range, the result of #to_a with the same arguments + # is returned. Otherwise, the value at +key+ in #captures is returned. + def [](key, *args) + case key + when Integer, Range + to_a[key, *args] + else + captures[key] + end + end + + alias_method :fetch, :[] + def ==(other) case other when String @string == other when Match @@ -1320,12 +1324,10 @@ else super end end - alias_method :eql?, :== - def inspect @string.inspect end # Prints the entire subtree of this match using the given +indent+ to @@ -1348,13 +1350,11 @@ space = indent * (stack.size / 3) string = @string.slice(os, event) lines[start] = "#{space}#{string.inspect} rule=#{rule}, offset=#{os}, length=#{event}" - unless last_length - last_length = event - end + last_length = event unless last_length close = false elsif event == CLOSE close = true else @@ -1371,14 +1371,124 @@ index += 1 end puts lines.compact.join("\n") end + + private + + # Initializes both the @captures and @matches instance variables. + def process_events! + @captures = captures_hash + @matches = [] + + capture!(@events[0], self) + + stack = [] + offset = 0 + close = false + index = 0 + last_length = nil + capture = true + + while index < @events.size + event = @events[index] + + if close + start = stack.pop + + if Rule === start + rule = start + os = stack.pop + start = stack.pop + + match = Match.new(@string.slice(os, event), @events[start..index]) + capture!(rule, match) + + @matches << match if stack.size == 1 + + capture = true + end + + last_length = event unless last_length + + close = false + elsif event == CLOSE + close = true + else + stack << index + + # We can calculate the offset of this rule event by adding back the + # last match length. + if last_length + offset += last_length + last_length = nil + end + + if capture && stack.size != 1 + stack << offset + stack << event + + # We should not create captures when traversing a portion of the + # event stream that is masked by a proxy in the original rule + # definition. + capture = false if Proxy === event + end + end + + index += 1 + end + + # Add numeric indices to @captures. + @captures[0] = self + + @matches.each_with_index do |match, index| + @captures[index + 1] = match + end + end + + def capture!(rule, match) + # We can lookup matches that were created by proxy by the name of + # the rule they are proxy for. + if Proxy === rule + if @captures.key?(rule.rule_name) + @captures[rule.rule_name] << match + else + @captures[rule.rule_name] = [match] + end + end + + # We can lookup matches that were created by rules with labels by + # that label. + if rule.label + if @captures.key?(rule.label) + @captures[rule.label] << match + else + @captures[rule.label] = [match] + end + end + end + + # Returns a new Hash that is to be used for @captures. This hash normalizes + # String keys to Symbols, returns +nil+ for unknown Numeric keys, and an + # empty Array for all other unknown keys. + def captures_hash + Hash.new do |hash, key| + case key + when String + hash[key.to_sym] + when Numeric + nil + else + [] + end + end + end end end class Object - # A sugar method for creating grammars. + # A sugar method for creating Citrus grammars from any namespace. # # grammar :Calc do # end # # module MyModule