#
# Copyright (c) 2006 Michael Koziarski
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of
# this software and associated documentation files (the "Software"), to deal in the
# Software without restriction, including without limitation the rights to use,
# copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the
# Software, and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
require 'rubygems'
require 'xml/libxml'
class FasterXmlSimple
Version = '0.5.0'
class << self
# Take an string containing XML, and returns a hash representing that
# XML document. For example:
#
# FasterXmlSimple.xml_in("1")
# {"root"=>{"something"=>{"__content__"=>"1"}}}
#
# Faster XML Simple is designed to be a drop in replacement for the xml_in
# functionality of http://xml-simple.rubyforge.org
#
# The following options are supported:
#
# * contentkey: The key to use for the content of text elements,
# defaults to '\_\_content__'
# * forcearray: The list of elements which should always be returned
# as arrays. Under normal circumstances single element arrays are inlined.
# * suppressempty: The value to return for empty elements, pass +true+
# to remove empty elements entirely.
# * keeproot: By default the hash returned has a single key with the
# name of the root element. If the name of the root element isn't
# interesting to you, pass +false+.
# * forcecontent: By default a text element with no attributes, will
# be collapsed to just a string instead of a hash with a single key.
# Pass +true+ to prevent this.
#
#
def xml_in(string, options={})
new(string, options).out
end
end
def initialize(string, options) #:nodoc:
@doc = parse(string)
@options = default_options.merge options
end
def out #:nodoc:
if @options['keeproot']
{@doc.root.name => collapse(@doc.root)}
else
collapse(@doc.root)
end
end
private
def default_options
{'contentkey' => '__content__', 'forcearray' => [], 'keeproot'=>true}
end
def collapse(element)
result = hash_of_attributes(element)
if text_node? element
text = collapse_text(element)
result[content_key] = text if text =~ /\S/
elsif element.children?
element.inject(result) do |hash, child|
unless child.text?
child_result = collapse(child)
(hash[child.name] ||= []) << child_result
end
hash
end
end
if result.empty?
return empty_element
end
# Compact them to ensure it complies with the user's requests
inline_single_element_arrays(result)
remove_empty_elements(result) if suppress_empty?
if content_only?(result) && !force_content?
result[content_key]
else
result
end
end
def content_only?(result)
result.keys == [content_key]
end
def content_key
@options['contentkey']
end
def force_array?(key_name)
Array(@options['forcearray']).include?(key_name)
end
def inline_single_element_arrays(result)
result.each do |key, value|
if value.size == 1 && value.is_a?(Array) && !force_array?(key)
result[key] = value.first
end
end
end
def remove_empty_elements(result)
result.each do |key, value|
if value == empty_element
result.delete key
end
end
end
def suppress_empty?
@options['suppressempty'] == true
end
def empty_element
if !@options.has_key? 'suppressempty'
{}
else
@options['suppressempty']
end
end
# removes the content if it's nothing but blanks, prevents
# the hash being polluted with lots of content like "\n\t\t\t"
def suppress_empty_content(result)
result.delete content_key if result[content_key] !~ /\S/
end
def force_content?
@options['forcecontent']
end
# a text node is one with 1 or more child nodes which are
# text nodes, and no non-text children, there's no sensible
# way to support nodes which are text and markup like:
#
Something Bold
def text_node?(element)
!element.text? && element.all? {|c| c.text?}
end
# takes a text node, and collapses it into a string
def collapse_text(element)
element.map {|c| c.content } * ''
end
def hash_of_attributes(element)
result = {}
element.each_attr do |attribute|
name = attribute.name
name = [attribute.ns, attribute.name].join(':') if attribute.ns?
result[name] = attribute.value
end
result
end
def parse(string)
if string == ''
string = ' '
end
XML::Parser.string(string).parse
end
end
class XmlSimple # :nodoc:
def self.xml_in(*args)
FasterXmlSimple.xml_in *args
end
end