lib/citrus.rb in citrus-1.7.0 vs lib/citrus.rb in citrus-1.8.0
- old
+ new
@@ -2,21 +2,24 @@
# elegance and expressiveness of the language with the simplicity and power of
# parsing expressions.
#
# http://mjijackson.com/citrus
module Citrus
- VERSION = [1, 7, 0]
-
- Infinity = 1.0 / 0
-
autoload :File, 'citrus/file'
+ VERSION = [1, 8, 0]
+
# Returns the current version of Citrus as a string.
def self.version
VERSION.join('.')
end
+ # A pattern to match any character, including \n.
+ DOT = /./m
+
+ Infinity = 1.0 / 0
+
F = ::File
# Loads the grammar from the given +file+ into the global scope using #eval.
def self.load(file)
file << '.citrus' unless F.file?(file)
@@ -24,11 +27,12 @@
raise "Cannot read file #{file}" unless F.readable?(file)
self.eval(F.read(file))
end
# Evaluates the given Citrus parsing expression grammar +code+ in the global
- # scope. Returns an array of any grammar modules that were created.
+ # scope. The +code+ may contain the definition of any number of modules.
+ # Returns an array of any grammar modules that are created.
def self.eval(code)
File.parse(code).value
end
# This error is raised whenever a parse fails.
@@ -175,10 +179,16 @@
@root = name.to_sym if name
# The first rule in a grammar is the default root.
@root || rule_names.first
end
+ # Creates a new rule that will match any single character. A block may be
+ # provided to specify semantic behavior (via #ext).
+ def dot(&block)
+ ext(Rule.new(DOT), block)
+ end
+
# Creates a new Super for the rule currently being defined in the grammar. A
# block may be provided to specify semantic behavior (via #ext).
def sup(&block)
ext(Super.new, block)
end
@@ -193,21 +203,29 @@
# to specify semantic behavior (via #ext).
def notp(rule, &block)
ext(NotPredicate.new(rule), block)
end
+ # Creates a new ButPredicate using the given +rule+. A block may be provided
+ # to specify semantic behavior (via #ext).
+ def but(rule, &block)
+ ext(ButPredicate.new(rule), block)
+ end
+
+ alias butp but # For consistency with #andp and #notp.
+
# Creates a new Label using the given +rule+ and +label+. A block may be
# provided to specify semantic behavior (via #ext).
def label(rule, label, &block)
- ext(Label.new(label, rule), block)
+ ext(Label.new(rule, label), block)
end
# Creates a new Repeat using the given +rule+. +min+ and +max+ specify the
# minimum and maximum number of times the rule must match. A block may be
# provided to specify semantic behavior (via #ext).
def rep(rule, min=1, max=Infinity, &block)
- ext(Repeat.new(min, max, rule), block)
+ ext(Repeat.new(rule, min, max), block)
end
# An alias for #rep.
def one_or_more(rule, &block)
rep(rule, &block)
@@ -254,11 +272,17 @@
root_rule = rule(opts[:root])
raise 'No rule named "%s"' % root unless root_rule
input = Input.new(string, opts[:memoize])
- input.match(root_rule, opts[:offset]) or raise ParseError.new(input)
+ match = input.match(root_rule, opts[:offset])
+
+ if match.nil? || (opts[:consume] && input.length != match.length)
+ raise ParseError.new(input)
+ end
+
+ match
end
# The default set of options that is used in #parse. The options hash may
# have any of the following keys:
#
@@ -268,14 +292,18 @@
# memoize:: If this is +true+ the matches generated during a parse are
# memoized. This technique (also known as Packrat parsing)
# guarantees parsers will operate in linear time but costs
# significantly more in terms of time and memory required.
# Defaults to +false+.
+ # consume:: If this is +true+ a ParseError will be raised during a parse
+ # unless the entire input string is consumed. Defaults to
+ # +false+.
def default_parse_options
{ :offset => 0,
:root => root,
- :memoize => false
+ :memoize => false,
+ :consume => false
}
end
end
# This class represents the core of the parsing algorithm. It wraps the input
@@ -339,10 +367,18 @@
end
# A Rule is an object that is used by a grammar to create matches on the
# Input during parsing.
module Rule
+ # Evaluates the given expression and creates a new rule object from it.
+ #
+ # Citrus::Rule.eval('"a" | "b"')
+ #
+ def self.eval(expr)
+ File.parse(expr, :root => :rule_body).value
+ end
+
# Returns a new Rule object depending on the type of object given.
def self.new(obj)
case obj
when Rule then obj
when Symbol then Alias.new(obj)
@@ -354,18 +390,10 @@
else
raise ArgumentError, "Invalid rule object: #{obj.inspect}"
end
end
- # Creates a new rule object from the given expression.
- #
- # Citrus::Rule.create('"a" | "b"')
- #
- def self.create(expr)
- File.parse(expr, :root => :rule_body).value
- end
-
@unique_id = 0
# Generates a new rule id.
def self.new_id
@unique_id += 1
@@ -389,11 +417,22 @@
# Specifies a module that will be used to extend all Match objects that
# result from this rule. If +mod+ is a Proc, it is used to create an
# anonymous module.
def extension=(mod)
- mod = Module.new(&mod) if Proc === mod
+ if Proc === mod
+ begin
+ tmp = Module.new(&mod)
+ raise ArgumentError unless tmp.instance_methods.any?
+ mod = tmp
+ rescue ArgumentError, NameError, NoMethodError
+ mod = Module.new { define_method(:value, &mod) }
+ end
+ end
+
+ raise ArgumentError unless Module === mod
+
@extension = mod
end
# The module this rule uses to extend new matches.
attr_reader :extension
@@ -420,11 +459,11 @@
end
private
def extend_match(match, name)
- match.extensions << extension if extension
+ match.extend(extension) if extension
match.names << name if name
match
end
def create_match(data, offset)
@@ -552,11 +591,11 @@
end
# Returns the Match for this rule on +input+ at the given +offset+, +nil+ if
# no match can be made.
def match(input, offset=0)
- create_match(rule.dup, offset) if rule == input[offset, rule.length]
+ create_match(rule.dup, offset) if input[offset, rule.length] == rule
end
end
# An Expression is a Terminal that has the same semantics as a regular
# expression in Ruby. The expression must match at the beginning of the input
@@ -610,11 +649,11 @@
# A Predicate is a Nonterminal that contains one other rule.
module Predicate
include Nonterminal
def initialize(rule='')
- super([ rule ])
+ super([rule])
end
# Returns the Rule object this rule uses to match.
def rule
rules[0]
@@ -661,44 +700,76 @@
def to_s
'!' + rule.embed
end
end
+ # A ButPredicate is a Predicate that consumes all characters until its rule
+ # matches. It must match at least one character in order to succeed. The
+ # Citrus notation is any expression preceded by a tilde, e.g.:
+ #
+ # ~expr
+ #
+ class ButPredicate
+ include Predicate
+
+ DOT_RULE = Rule.new(DOT)
+
+ # Returns the Match for this rule on +input+ at the given +offset+, +nil+ if
+ # no match can be made.
+ def match(input, offset=0)
+ matches = []
+ os = offset
+ while input.match(rule, os).nil?
+ m = input.match(DOT_RULE, os)
+ break unless m
+ matches << m
+ os += m.length
+ end
+ # Create a single match from the aggregate text value of all submatches.
+ create_match(matches.join, offset) if matches.any?
+ end
+
+ # Returns the Citrus notation of this rule as a string.
+ def to_s
+ '~' + rule.embed
+ end
+ end
+
# A Label is a Predicate that applies a new name to any matches made by its
# rule. The Citrus notation is any sequence of word characters (i.e.
# <tt>[a-zA-Z0-9_]</tt>) followed by a colon, followed by any other
# expression, e.g.:
#
# label:expr
#
class Label
include Predicate
- def initialize(label_name='<label>', rule='')
+ def initialize(rule='', label='<label>')
super(rule)
- self.label_name = label_name
+ self.label = label
end
# Sets the name of this label.
- def label_name=(label_name)
- @label_name = label_name.to_sym
+ def label=(label)
+ @label = label.to_sym
end
- # The name this rule adds to all its matches.
- attr_reader :label_name
+ # The label this rule adds to all its matches.
+ attr_reader :label
# Returns the Match for this rule on +input+ at the given +offset+, +nil+ if
# no match can be made. When a Label makes a match, it re-names the match to
# the value of its label.
def match(input, offset=0)
- m = rule.match(input, offset)
- extend_match(m, label_name) if m
+ m = input.match(rule, offset)
+ extend_match(m, label) if m
end
# Returns the Citrus notation of this rule as a string.
def to_s
- label_name.to_s + ':' + rule.embed
+ label.to_s + ':' + rule.embed
end
end
# A Repeat is a Predicate that specifies a minimum and maximum number of times
# its rule must match. The Citrus notation is an integer, +N+, followed by an
@@ -720,14 +791,14 @@
# expr?
#
class Repeat
include Predicate
- def initialize(min=1, max=Infinity, rule='')
+ def initialize(rule='', min=1, max=Infinity)
+ super(rule)
raise ArgumentError, "Min cannot be greater than max" if min > max
@range = Range.new(min, max)
- super(rule)
end
# Returns the Match for this rule on +input+ at the given +offset+, +nil+ if
# no match can be made.
def match(input, offset=0)
@@ -833,19 +904,20 @@
end
# The base class for all matches. Matches are organized into a tree where any
# match may contain any number of other matches. This class provides several
# convenient tree traversal methods that help when examining parse results.
- class Match
+ class Match < String
def initialize(data, offset=0)
case data
when String
- @text = data
+ super(data)
when MatchData
- @text = data[0]
+ super(data[0])
@captures = data.captures
when Array
+ super(data.join)
@matches = data
end
@offset = offset
end
@@ -868,15 +940,10 @@
# Returns +true+ if this match has the given +name+.
def has_name?(name)
names.include?(name)
end
- # An array of all extension modules of this match.
- def extensions
- @extensions ||= []
- end
-
# An array of all sub-matches of this match.
def matches
@matches ||= []
end
@@ -884,35 +951,17 @@
# created by an Expression.
def captures
@captures ||= []
end
- # Returns the raw text value of this match, which may simply be an
- # aggregate of the text of all sub-matches if this match is not #terminal?.
- def text
- @text ||= matches.inject('') {|s, m| s << m.text }
- end
-
- alias to_s text
-
- # Returns the length of this match's #text value as an Integer.
- def length
- text.length
- end
-
- # Passes all arguments to the #text of this match.
- def [](*args)
- text.__send__(:[], *args)
- end
-
# Returns an array of all sub-matches with the given +name+. If +deep+ is
# +false+, returns only sub-matches that are immediate descendants of this
# match.
def find(name, deep=true)
sym = name.to_sym
ms = matches.select {|m| m.has_name?(sym) }
- ms.concat(matches.map {|m| m.find(name, deep) }.flatten) if deep
+ matches.each {|m| ms.concat(m.find(name, deep)) } if deep
ms
end
# A shortcut for retrieving the first immediate sub-match of this match. If
# +name+ is given, attempts to retrieve the first immediate sub-match named
@@ -925,44 +974,24 @@
# Terminal).
def terminal?
matches.length == 0
end
- # Checks equality by comparing this match's #text value to +obj+.
- def ==(obj)
- obj == text
+ # Creates a new String object from the contents of this match.
+ def to_s
+ String.new(self)
end
- alias eql? ==
-
- private
-
- def redefine_method_missing! # :nodoc:
- instance_eval(<<-RUBY, __FILE__, __LINE__ + 1)
- def method_missing(sym, *args)
- if sym == :to_ary
- original_method_missing(sym, *args)
- else
- m = first(sym)
- return m if m
- raise 'No match named "%s" in %s (%s)' % [sym, self, name]
- end
- end
- RUBY
- end
-
- alias original_method_missing method_missing
-
- public
-
# Allows sub-matches of this match to be retrieved by name as instance
# methods.
def method_missing(sym, *args)
- # Extend this object only when needed and immediately redefine
- # #method_missing so that the new version is used on all future calls.
- extensions.each {|e| extend(e) } if @extensions
- redefine_method_missing!
- __send__(sym, *args)
+ m = first(sym)
+ return m if m
+ raise 'No match named "%s" in %s (%s)' % [sym, self, name]
+ end
+
+ def to_ary
+ # This method intentionally left blank to work around a bug in Ruby 1.9.
end
end
end
class Object