hobo_files/plugin/lib/rexml.rb in hobo-0.5.3 vs hobo_files/plugin/lib/rexml.rb in hobo-0.6
- old
+ new
@@ -1,52 +1,80 @@
-# Hobo needs to process XML as transparently as possibe. In the case of tags
-# that are not defined by hobo (i.e. html tags), they should pass through just
-# as they were in the dryml source. Recontructing the tags from the DOM is not
-# good enough. The extensions to REXML in here allow Hobo to use the original
-# start tag source in the output.
-# There's also some fixes/extras to allow error messages with line numbers.
+# Extensions to XML Parsing
+#
+# 1. Hobo needs to process XML as transparently as possibe. In the
+# case of tags that are not defined (i.e. html tags), they should pass
+# through just as they were in the dryml source. Recontructing the
+# tags from the DOM is not good enough. The extensions to REXML in
+# here allow Hobo to use the original start tag source in the output.
+#
+# 2. some fixes/extras to allow error messages with line numbers.
+#
+# 3. Attributes without a RHS are allowed. They are returned as having
+# a value of +true+ (the Ruby value, not the string 'true')
+#
+# 1 and 2 are achieved by adding two instance variables to Element
+# nodes : @start_tag_source and @source_offset
+#
+# So cool that Ruby allows us to redefine a method. Such a shame the method
+# we needed to change happened to be 200 lines long :-(
-# The main hack is that Element nodes have two instance variables added:
-# @start_tag_source and @source_offset
+require 'rexml/document'
-# So cool that Ruby allows us to redfine a method. Such a shame the method
-# happened to be 200 lines long :-(
-require 'rexml/document'
module REXML
module Parsers
+
+ class TreeParser
+ def initialize( source, build_context = Document.new )
+ @build_context = build_context
+ @parser = Parsers::BaseParser.new(source)
+ @parser.dryml_mode = build_context.context[:dryml_mode]
+ end
+ end
+
class BaseParser
+
+ DRYML_ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})(?:\s*=\s*(["'])(.*?)\2)?/um
+
+ DRYML_TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{NAME_STR}(?:\s*=\s*(["']).*?\3)?)*)\s*(\/)?>/um
+
+ attr_writer :dryml_mode
+ def dryml_mode?
+ @dryml_mode
+ end
+
+
def pull
if @closed
x, @closed = @closed, nil
- return [ :end_element, x ]
+ return [ :end_element, x, false ]
end
return [ :end_document ] if empty?
return @stack.shift if @stack.size > 0
@source.read if @source.buffer.size<2
if @document_status == nil
- @source.consume( /^\s*/um )
- word = @source.match( /(<[^>]*)>/um )
+ @source.consume(/^\s*/um)
+ word = @source.match(/(<[^>]*)>/um)
word = word[1] unless word.nil?
case word
when COMMENT_START
- return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
+ return [ :comment, @source.match(COMMENT_PATTERN, true)[1] ]
when XMLDECL_START
- results = @source.match( XMLDECL_PATTERN, true )[1]
- version = VERSION.match( results )
+ results = @source.match(XMLDECL_PATTERN, true)[1]
+ version = VERSION.match(results)
version = version[1] unless version.nil?
encoding = ENCODING.match(results)
encoding = encoding[1] unless encoding.nil?
@source.encoding = encoding
standalone = STANDALONE.match(results)
standalone = standalone[1] unless standalone.nil?
return [ :xmldecl, version, encoding, standalone]
when INSTRUCTION_START
return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ]
when DOCTYPE_START
- md = @source.match( DOCTYPE_PATTERN, true )
+ md = @source.match(DOCTYPE_PATTERN, true)
identity = md[1]
close = md[2]
identity =~ IDENTITY
name = $1
raise REXML::ParseException("DOCTYPE is missing a name") if name.nil?
@@ -71,18 +99,18 @@
end
if @document_status == :in_doctype
md = @source.match(/\s*(.*?>)/um)
case md[1]
when SYSTEMENTITY
- match = @source.match( SYSTEMENTITY, true )[1]
+ match = @source.match(SYSTEMENTITY, true)[1]
return [ :externalentity, match ]
when ELEMENTDECL_START
- return [ :elementdecl, @source.match( ELEMENTDECL_PATTERN, true )[1] ]
+ return [ :elementdecl, @source.match(ELEMENTDECL_PATTERN, true)[1] ]
when ENTITY_START
- match = @source.match( ENTITYDECL, true ).to_a.compact
+ match = @source.match(ENTITYDECL, true).to_a.compact
match[0] = :entitydecl
ref = false
if match[1] == '%'
ref = true
match.delete_at 1
@@ -104,17 +132,17 @@
# match is [ :entity, name, value ]
end
match << '%' if ref
return match
when ATTLISTDECL_START
- md = @source.match( ATTLISTDECL_PATTERN, true )
- raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
+ md = @source.match(ATTLISTDECL_PATTERN, true)
+ raise REXML::ParseException.new("Bad ATTLIST declaration!", @source) if md.nil?
element = md[1]
contents = md[0]
pairs = {}
- values = md[0].scan( ATTDEF_RE )
+ values = md[0].scan(ATTDEF_RE)
values.each do |attdef|
unless attdef[3] == "#IMPLIED"
attdef.compact!
val = attdef[3]
val = attdef[4] if val == "#FIXED "
@@ -122,58 +150,64 @@
end
end
return [ :attlistdecl, element, pairs, contents ]
when NOTATIONDECL_START
md = nil
- if @source.match( PUBLIC )
- md = @source.match( PUBLIC, true )
- elsif @source.match( SYSTEM )
- md = @source.match( SYSTEM, true )
+ if @source.match(PUBLIC)
+ md = @source.match(PUBLIC, true)
+ elsif @source.match(SYSTEM)
+ md = @source.match(SYSTEM, true)
else
- raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
+ raise REXML::ParseException.new("error parsing notation: no matching pattern", @source)
end
return [ :notationdecl, md[1], md[2], md[3] ]
when CDATA_END
@document_status = :after_doctype
- @source.match( CDATA_END, true )
+ @source.match(CDATA_END, true)
return [ :end_doctype ]
end
end
begin
if @source.buffer[0] == ?<
if @source.buffer[1] == ?/
last_tag, line_no = @tags.pop
- #md = @source.match_to_consume( '>', CLOSE_MATCH)
- md = @source.match( CLOSE_MATCH, true )
+ #md = @source.match_to_consume('>', CLOSE_MATCH)
+ md = @source.match(CLOSE_MATCH, true)
+
+ valid_end_tag = if dryml_mode?
+ last_tag =~ /^#{Regexp.escape(md[1])}(:.*)?/
+ else
+ last_tag == md[1]
+ end
raise REXML::ParseException.new("Missing end tag for "+
"'#{last_tag}' (line #{line_no}) (got \"#{md[1]}\")",
- @source) unless last_tag == md[1]
- return [ :end_element, last_tag ]
+ @source) unless valid_end_tag
+ return [ :end_element, last_tag, true ]
elsif @source.buffer[1] == ?!
md = @source.match(/\A(\s*[^>]*>)/um)
raise REXML::ParseException.new("Malformed node", @source) unless md
if md[0][2] == ?-
- md = @source.match( COMMENT_PATTERN, true )
+ md = @source.match(COMMENT_PATTERN, true)
return [ :comment, md[1] ] if md
else
- md = @source.match( CDATA_PATTERN, true )
+ md = @source.match(CDATA_PATTERN, true)
return [ :cdata, md[1] ] if md
end
- raise REXML::ParseException.new( "Declarations can only occur "+
+ raise REXML::ParseException.new("Declarations can only occur "+
"in the doctype declaration.", @source)
elsif @source.buffer[1] == ??
- md = @source.match( INSTRUCTION_PATTERN, true )
+ md = @source.match(INSTRUCTION_PATTERN, true)
return [ :processing_instruction, md[1], md[2] ] if md
- raise REXML::ParseException.new( "Bad instruction declaration",
+ raise REXML::ParseException.new("Bad instruction declaration",
@source)
else
# Get the next tag
- md = @source.match(TAG_MATCH, true)
+ md = @source.match(dryml_mode? ? DRYML_TAG_MATCH : TAG_MATCH, true)
raise REXML::ParseException.new("malformed XML: missing tag start", @source) unless md
attrs = []
if md[2].size > 0
- attrs = md[2].scan( ATTRIBUTE_PATTERN )
+ attrs = md[2].scan(dryml_mode? ? DRYML_ATTRIBUTE_PATTERN : ATTRIBUTE_PATTERN)
raise REXML::ParseException.new("error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"",
@source) if $' and $'.strip.size > 0
end
if md[4]
@@ -181,29 +215,28 @@
else
cl = @source.current_line
@tags.push([md[1], cl && cl[2]])
end
attributes = {}
- attrs.each { |a,b,c| attributes[a] = c }
+ attrs.each { |a,b,c| attributes[a] = (c || true) }
return [ :start_element, md[1], attributes, md[0],
@source.respond_to?(:last_match_offset) && @source.last_match_offset ]
end
else
- md = @source.match( TEXT_PATTERN, true )
+ md = @source.match(TEXT_PATTERN, true)
if md[0].length == 0
- @source.match( /(\s+)/, true )
+ @source.match(/(\s+)/, true)
end
#return [ :text, "" ] if md[0].length == 0
- # unnormalized = Text::unnormalize( md[1], self )
- # return PullEvent.new( :text, md[1], unnormalized )
+ # unnormalized = Text::unnormalize(md[1], self)
+ # return PullEvent.new(:text, md[1], unnormalized)
return [ :text, md[1] ]
end
rescue REXML::ParseException
raise
rescue Exception, NameError => error
- raise REXML::ParseException.new( "Exception parsing",
- @source, self, (error ? error : $!) )
+ raise REXML::ParseException.new("Exception parsing", @source, self, (error ? error : $!))
end
return [ :dummy ]
end
end
@@ -219,74 +252,131 @@
when :end_document
return
when :start_element
tag_stack.push(event[1])
# find the observers for namespaces
- @build_context = @build_context.add_element( event[1], event[2] )
- @build_context.instance_variable_set("@start_tag_source", event[3])
- @build_context.instance_variable_set("@source_offset", event[4])
+ @build_context = @build_context.add_element(event[1], event[2])
+ @build_context.start_tag_source = event[3]
+ @build_context.source_offset = event[4]
when :end_element
tag_stack.pop
+ @build_context.has_end_tag = event[2]
@build_context = @build_context.parent
when :text
if not in_doctype
if @build_context[-1].instance_of? Text
@build_context[-1] << event[1]
else
- @build_context.add(
- Text.new( event[1], @build_context.whitespace, nil, true )
- ) unless (
+ @build_context.add(
+ Text.new(event[1], @build_context.whitespace, nil, true)
+ ) unless (
event[1].strip.size==0 and
@build_context.ignore_whitespace_nodes
- )
+ )
end
end
when :comment
- c = Comment.new( event[1] )
- @build_context.add( c )
+ c = Comment.new(event[1])
+ @build_context.add(c)
when :cdata
- c = CData.new( event[1] )
- @build_context.add( c )
+ c = CData.new(event[1])
+ @build_context.add(c)
when :processing_instruction
- @build_context.add( Instruction.new( event[1], event[2] ) )
+ @build_context.add(Instruction.new(event[1], event[2]))
when :end_doctype
in_doctype = false
entities.each { |k,v| entities[k] = @build_context.entities[k].value }
@build_context = @build_context.parent
when :start_doctype
- doctype = DocType.new( event[1..-1], @build_context )
+ doctype = DocType.new(event[1..-1], @build_context)
@build_context = doctype
entities = {}
in_doctype = true
when :attlistdecl
- n = AttlistDecl.new( event[1..-1] )
- @build_context.add( n )
+ n = AttlistDecl.new(event[1..-1])
+ @build_context.add(n)
when :externalentity
- n = ExternalEntity.new( event[1] )
- @build_context.add( n )
+ n = ExternalEntity.new(event[1])
+ @build_context.add(n)
when :elementdecl
- n = ElementDecl.new( event[1] )
+ n = ElementDecl.new(event[1])
@build_context.add(n)
when :entitydecl
entities[ event[1] ] = event[2] unless event[2] =~ /PUBLIC|SYSTEM/
@build_context.add(Entity.new(event))
when :notationdecl
- n = NotationDecl.new( *event[1..-1] )
- @build_context.add( n )
+ n = NotationDecl.new(*event[1..-1])
+ @build_context.add(n)
when :xmldecl
- x = XMLDecl.new( event[1], event[2], event[3] )
- @build_context.add( x )
+ x = XMLDecl.new(event[1], event[2], event[3])
+ @build_context.add(x)
end
end
rescue REXML::Validation::ValidationException
raise
rescue
- raise ParseException.new( $!.message, @parser.source, @parser, $! )
+ raise ParseException.new($!.message, @parser.source, @parser, $!)
end
end
end
end
+
+ class Document
+
+ attr_accessor :default_attribute_value
+
+ end
+
+ class Element
+
+ def dryml_name
+ expanded_name.sub(/:.*/, "")
+ end
+
+ attr_accessor :start_tag_source, :source_offset
+
+ attr_writer :has_end_tag
+ def has_end_tag?
+ @has_end_tag
+ end
+
+ end
+
+ class Attribute
+
+ def initialize_with_dryml(first, second=nil, parent=nil)
+ initialize_without_dryml(first, second, parent)
+ if first.is_a?(String) && second == true
+ @value = true
+ end
+ end
+ alias_method_chain :initialize, :dryml
+
+ def value_with_dryml
+ if has_rhs?
+ value_without_dryml
+ else
+ element.document.default_attribute_value
+ end
+ end
+ alias_method_chain :value, :dryml
+
+ def to_string_with_dryml
+ if has_rhs?
+ to_string_without_dryml
+ else
+ @expanded_name
+ end
+ end
+ alias_method_chain :to_string, :dryml
+
+ def has_rhs?
+ @value != true
+ end
+
+ end
+
end
module Hobo::Dryml
class RexSource < REXML::Source
@@ -320,12 +410,12 @@
advance_buffer(Regexp.last_match)
end
rv
end
- def consume( pattern )
- md = remember_match(pattern.match( @buffer ))
+ def consume(pattern)
+ md = remember_match(pattern.match(@buffer))
if md
advance_buffer(md)
@buffer
end
end
@@ -335,11 +425,12 @@
advance_buffer(md) if cons and md
return md
end
def current_line
- pos = last_match_offset
- [0, 0, @orig[0..pos].count("\n") + 1]
+ pos = last_match_offset || 0
+ [0, 0, @orig[0..pos].count("\n") + 1]
end
end
+
end