lib/packrat/grammar.rb in rockit-0.7.1 vs lib/packrat/grammar.rb in rockit-0.7.2
- old
+ new
@@ -55,15 +55,16 @@
# A StringLiteral works like a RegexpLiteral. The only reason we use
# a special class for it is so that we can inspect it in a more natural way
# (as a string instead of a Regexp).
class Packrat::StringLiteral < Packrat::RegexpLiteral
+ attr_reader :string
def initialize(str)
super(Regexp.new(Regexp.escape(str)))
- @str = str
+ @string = str
end
- def inspect; @str.inspect; end
+ def inspect; @string.inspect; end
end
class Packrat::RuleRef < Packrat::GrammarElement
attr_reader :rule_name
def initialize(ruleName)
@@ -97,24 +98,27 @@
# A grammar Production is sequence of rhs elements describing how the lhs
# symbol should be parsed.
class Packrat::Production
attr_accessor :grammar
- attr_reader :name, :rhs, :result_modifier
+ attr_reader :name, :rhs, :result_producer
def initialize(name, rhs)
@name, @rhs = name, rhs
- if Packrat::ResultModifier === rhs.last
- @result_modifier = @rhs.pop
+ if Packrat::ResultProducer === rhs.last
+ @result_producer = @rhs.pop
else
- # Default modifier is to create a Sexpr with the production name
+ # Default producer is to create a Sexpr with the production name
# as the head of the returned array.
- @result_modifier = Packrat::SexprModifier.new(@name)
+ @result_producer = Packrat::SexprProducer.new(@name)
end
@rhs.map! {|e| e.to_packrat_grammar_element}
end
+ def finalize!
+ @result_producer.production = self
+ end
def inspect(withLhs = true)
- rhs = @rhs.map {|e| e.inspect}.join(' ')
+ rhs = @rhs.map {|e| e.hidden ? nil : e.inspect}.compact.join(' ')
withLhs ? "#{name.to_s} -> " + rhs : rhs
end
end
# Report results of parsing a prod or grammar element
@@ -122,29 +126,30 @@
def initialize(sub)
@sub = sub
end
def parse(parser)
res = @sub.parse(parser)
- if res == false
+ if false == res
lputs "\t\t\t FAIL #{@sub.inspect}"
puts ""
else
- lputs " Match #{@sub.inspect}"
+ lputs " #{parser.pos}: Match #{@sub.inspect}"
puts ""
end
res
end
+ def inspect; @sub.inspect; end
def method_missing(method, *args)
@sub.send(method, *args)
end
end
module Packrat::GrammarBuild
attr_reader :start
def start_symbol(name); @start = name; end
def rules; @rules ||= (Hash.new {|h,k| h[k] = Packrat::Rule.new(k)}); end
- def rule(name, rhss)
+ def rule(name, *rhss)
rhss.each {|rhs| prod(name, rhs)}
end
def prod(name, rhs)
pr = Packrat::Production.new(name, rhs)
pr = Packrat::ErrorReporter.new(pr) if $DEBUG
@@ -158,24 +163,20 @@
e
end
# Finalize the building of the grammar by conducting postprocessing.
def finalize!
postprocess_set_grammar_on_rules
- postprocess_create_ast_classes
+ each_prod {|p| p.finalize!}
end
def postprocess_set_grammar_on_rules
- rules.values.each {|r| r.grammar = self}
+ each_prod {|r| r.grammar = self}
end
- def postprocess_create_ast_classes
- each_prod do |p|
- if Packrat::ASTBuilder === p.result_modifier
- ast_class(p.result_modifier.name, p)
- end
- end
+ def each_rule
+ rules.values.each {|r| yield(r)}
end
def each_prod
- rules.values.each {|r| r.prods.each {|p| yield(p)}}
+ each_rule {|r| r.prods.each {|p| yield(p)}}
end
end
class Packrat::Grammar
extend Packrat::GrammarBuild
@@ -223,55 +224,64 @@
def initialize(sub)
@sub = sub.to_packrat_grammar_element
end
def parse(parser)
res = @sub.parse(parser)
- return res if res
- return nil
+ false == res ? nil : res
end
def inspect
"(#{@sub.inspect})?"
end
end
module Packrat::GrammarBuild
def maybe(element); Packrat::Maybe.new(element); end
end
-# The last element of a prod can be a result modifier that modifies
-# the result to be returned by the prod when parsing.
-class Packrat::ResultModifier
+# The last element of a prod can be a result producer that produces
+# the result to be returned by the prod in case of a successfull parse.
+class Packrat::ResultProducer
+ # Before any results are produced we need to know the prod we are in
+ def production=(prod); @prod = prod; end
+
+ # A ResultProducer returns a result which it then updates. This is needed
+ # since multiple results can be in production at the same time.
+ def new_result; end
+ def update_result(res, subres, elem, index); end
+ def finalize_result(res); res; end
end
# Create a Sexpr based on the name of the matched production and the
# result-array.
-class Packrat::SexprModifier < Packrat::ResultModifier
+class Packrat::SexprProducer < Packrat::ResultProducer
def initialize(name)
@name = name
end
- def modify_result(prod, result)
- # Add the production name in the front
- result.unshift @name
- result
- end
+ def new_result; [@name]; end
+ def update_result(res, subres, elem, index, nhi); res << subres; end
end
# Lift one of the sub-results as the result from parsing a production.
-class Packrat::LiftModifier < Packrat::ResultModifier
+# Optionally a block can be given. If so the block will get called with
+# the lifted result and can modify it.
+class Packrat::LiftOneResultProducer < Packrat::ResultProducer
def initialize(valueIndex, &block)
@value_index = valueIndex
@block = block
end
- def modify_result(prod, result)
- extracted_result = result[@value_index]
- @block? @block.call(extracted_result) : extracted_result
+ def new_result; nil; end
+ def update_result(res, subres, elem, index, nonhiddenIndex)
+ index == @value_index ? subres : res
end
+ def finalize_result(res)
+ @block ? @block.call(res) : res
+ end
end
module Packrat::GrammarBuild
- def sexpr(name); Packrat::SexprModifier.new(name); end
- def lift(index, &b); Packrat::LiftModifier.new(index, &b); end
+ def sexpr(name); Packrat::SexprProducer.new(name); end
+ def lift(index, &b); Packrat::LiftOneResultProducer.new(index, &b); end
end
module Packrat::GrammarBuild
# any() can be implemented in many ways but if all the sub-elements are
# strings we simply create a regexp matching any of them. If they are not
@@ -280,11 +290,11 @@
if subs.all? {|e| String === e}
re_string = subs.map {|s| "(" + Regexp.escape(s) + ")"}.join("|")
Packrat::RegexpLiteral.new(Regexp.new(re_string))
else
name = internal_rule_name()
- rule(name, subs.map {|s| [s, lift(0)]})
+ rule(name, *subs.map {|s| [s, lift(0)]})
Packrat::RuleRef.new(name)
end
end
def next_internal_rule_num
@@ -306,54 +316,68 @@
module Packrat::GrammarBuild
def eos(); hidden(Packrat::EOS.new); end
end
-class Packrat::ASTBuilder < Packrat::ResultModifier
+# Build AST tree as result of parsing a Production.
+class Packrat::ASTBuilder < Packrat::ResultProducer
attr_reader :name
- def initialize(nodeName)
- @name = nodeName
+ def initialize(nodeName, nameMap = {})
+ @name, @name_map = nodeName, nameMap
end
- def modify_result(prod, result)
- astklass = prod.grammar.ast_class(@name, prod)
- astklass.new(*result)
+ def production=(prod)
+ super
+ @ast_class = prod.grammar.ast_class(@name, prod, @name_map)
end
+
+ def new_result; Array.new; end
+ def update_result(res, subres, elem, index, nhIndex)
+ res << subres unless @ast_class.constant_elem_at?(nhIndex)
+ res
+ end
+ def finalize_result(res)
+ @ast_class.new(res, {:only_nonconstant => true})
+ end
end
module Packrat::GrammarBuild
- def ast(name)
- Packrat::ASTBuilder.new(name)
+ def ast(name, options = {})
+ Packrat::ASTBuilder.new(name, options)
end
# Return the ast class with the given <nodeName> for the given <production>.
# If not previously created we create it and add it to the Tree module.
- def ast_class(name, prod)
+ def ast_class(name, prod, nameMap)
acn = ast_class_name(name)
begin
const_get("ASTs").const_get(acn)
rescue
- const_get("ASTs").const_set(acn, make_ast_class(acn, prod))
+ const_get("ASTs").const_set(acn, make_ast_class(acn, prod, nameMap))
end
end
def ast_class_name(name)
s = name.to_s
s[0,1].upcase + s[1..-1]
end
- def make_ast_class(klassName, production)
- Packrat::AST.new_subclass(klassName, production)
+ def make_ast_class(klassName, production, nameMap)
+ Packrat::AST.new_subclass(klassName, production, nameMap)
end
end
# Node in AST trees.
class Packrat::AST
class <<self
attr_accessor :sig
- def new_subclass(name, production)
+ # Create a new AST subclass. The <nameMap> hash can specify names
+ # for certain element indices (such explicitly specified names
+ # will override the default naming scheme which is to use a downcase
+ # version of the production name).
+ def new_subclass(name, production, nameMap = {})
klass = Class.new(self)
- klass.sig = extract_sig(production)
+ klass.sig = extract_sig(production, nameMap)
# Add accessor methods for all symbols in the sig
num_strings = 0
klass.sig.each_with_index do |sn, i|
if Symbol === sn
# We should subtract the num_strings in the index below
@@ -372,45 +396,72 @@
end
# Return a sig for the given <production>. The sig has strings in the
# positions where the production rhs has a String or StringLiteral,
# has symbols in the positions where a rhs element refer to another
- # production, and has nil in other positions.
- def extract_sig(production)
- production.rhs.map do |e|
- case e
- when String
- e
- when Packrat::StringLiteral
- e.inspect # gives us the string itself
- when Packrat::RuleRef
- sub_element_name(e.rule_name)
- else
- nil # Expand this so that names are lifted out of Maybe, and "s" is added when plus and mult etc
+ # production, and has nil in other positions. The <nameMap> can contain
+ # explicit names for certing indices (indices as key and name as symbol
+ # value).
+ def extract_sig(production, nameMap = {})
+ sig = []
+ production.rhs.each_with_index do |e, i|
+ unless e.hidden
+ case e
+ when String
+ sig << e
+ when Packrat::StringLiteral
+ sig << e.string
+ when Packrat::RuleRef
+ sig << sub_element_name(e.rule_name)
+ else
+ sig << nil # Expand this so that names are lifted out of Maybe, and "s" is added when plus and mult etc
+ end
end
end
+ number_multioccurences(sig).map {|n| nameMap[n] || n}
end
-
+
+ def number_multioccurences(sig)
+ num_sigs = sig.inject(Hash.new(0)) {|h, s| h[s] += 1 if Symbol === s; h}
+ counters = Hash.new(0)
+ sig.map do |s|
+ (num_sigs[s] > 1) ? (s.to_s + (counters[s] += 1).to_s).intern : s
+ end
+ end
+
def sub_element_name(name)
parts = name.to_s.split(/([A-Z][a-z0-9]*)/).select {|e| e.length > 0}
parts.map {|p| p.downcase}.join("_").intern
end
- def [](*args); new(*args); end
+ def constant_elem_at?(index)
+ self.sig[index].kind_of?(String)
+ end
+
+ def [](*args); new(args); end
end
-
- def initialize(*children)
- @children = children
+
+ DefaultOptions = {:only_nonconstant => true}
+
+ def initialize(children, options = {})
+ options = DefaultOptions.clone.update(options)
+ if options[:only_nonconstant]
+ @children = self.class.sig.map do |n|
+ n.kind_of?(String) ? n : children.shift
+ end
+ else
+ @children = children
+ end
end
attr_reader :children
def [](index); @children[index]; end
def ==(other)
self.class == other.class && @children == other.children
end
-
+
def inspect
self.class.inspect.split("::").last + "[" +
@children.map {|c| c.inspect}.join(", ") + "]"
end
end
@@ -421,14 +472,14 @@
def new_subclass(grammar)
klass = Class.new(self)
klass.grammar = grammar
klass
end
- def parse_string(str)
+ def parse_string(str, startSymbol = nil)
# We always add a whitespace since StringScanner cannot match /\s*/
# (typically used as whitespace) at EOS
- new(str + " ").parse_string
+ new(str + " ").parse_string(startSymbol)
end
end
attr_reader :results, :grammar
@@ -436,12 +487,13 @@
@str = string
@s = StringScanner.new(string)
@grammar = self.class.grammar
end
- def parse_string
- @grammar.start_rule.parse(self)
+ def parse_string(startSymbol = nil)
+ startSymbol ||= @grammar.start
+ @grammar[startSymbol].parse(self)
end
# Get and Set current position in string.
def pos; @s.pos; end
def pos=(p); @s.pos = p; end
@@ -482,29 +534,31 @@
end
end
class Packrat::Production
def parse(parser)
- result = []
- @rhs.each do |e|
- res = e.parse(parser)
- if res == false
- return false
- else
- result << res unless e.hidden
+ res = @result_producer.new_result
+ nonhidden_index = 0
+ @rhs.each_with_index do |e, i|
+ subres = e.parse(parser)
+ return false if false == subres
+ unless e.hidden
+ res = @result_producer.update_result(res, subres, e,
+ i, nonhidden_index)
+ nonhidden_index += 1
end
end
- return @result_modifier.modify_result(self, result)
+ return @result_producer.finalize_result(res)
end
end
class Packrat::Rule
def parse(parser)
+ oldpos = parser.pos
prods.each do |prod|
- oldpos = parser.pos
res = prod.parse(parser)
- return res if res
+ return res unless false == res
parser.pos = oldpos
end
return false
end
end
@@ -517,9 +571,10 @@
class Packrat::Repeat
def parse(parser)
result_list = []
oldpos = parser.pos
+ # XXX: Should we take only amx number of results here if max != false?
while (res = @sub.parse(parser))
result_list << res
end
if valid_result?(result_list)
return result_list