#!/usr/bin/env ruby -wW1
$: << '../lib'
$: << '../ext'
require 'optparse'
require 'stringio'
require 'ox'
$verbose = 0
$iter = 100
opts = OptionParser.new
opts.on("-v", "increase verbosity") { $verbose += 1 }
opts.on("-i", "--iterations [Int]", Integer, "iterations") { |i| $iter = i }
opts.on("-h", "--help", "Show this display") { puts opts; Process.exit!(0) }
files = opts.parse(ARGV)
### XML conversion to Hash using in memory Ox parsing ###
def node_to_dict(element)
dict = Hash.new
key = nil
element.nodes.each do |n|
raise "A dict can only contain elements." unless n.is_a?(::Ox::Element)
if key.nil?
raise "Expected a key, not a #{n.name}." unless 'key' == n.name
key = first_text(n)
else
dict[key] = node_to_value(n)
key = nil
end
end
dict
end
def node_to_array(element)
a = Array.new
element.nodes.each do |n|
a.push(node_to_value(n))
end
a
end
def node_to_value(node)
raise "A dict can only contain elements." unless node.is_a?(::Ox::Element)
case node.name
when 'key'
raise "Expected a value, not a key."
when 'string'
value = first_text(node)
when 'dict'
value = node_to_dict(node)
when 'array'
value = node_to_array(node)
when 'integer'
value = first_text(node).to_i
when 'real'
value = first_text(node).to_f
when 'true'
value = true
when 'false'
value = false
else
raise "#{node.name} is not a know element type."
end
value
end
def first_text(node)
node.nodes.each do |n|
return n if n.is_a?(String)
end
nil
end
def parse_gen(xml)
doc = Ox.parse(xml)
plist = doc.root
dict = nil
plist.nodes.each do |n|
if n.is_a?(::Ox::Element)
dict = node_to_dict(n)
break
end
end
dict
end
### XML conversion to Hash using Ox SAX parser ###
class Handler
def initialize()
@key = nil
@type = nil
@plist = nil
@stack = []
end
def text(value)
last = @stack.last
if last.is_a?(Hash) and @key.nil?
raise "Expected a key, not #{@type} with a value of #{value}." unless :key == @type
@key = value
else
append(value)
end
end
def start_element(name)
if :dict == name
dict = Hash.new
append(dict)
@stack.push(dict)
elsif :array == name
a = Array.new
append(a)
@stack.push(a)
elsif :true == name
append(true)
elsif :false == name
append(false)
else
@type = name
end
end
def end_element(name)
@stack.pop if :dict == name or :array == name
end
def plist
@plist
end
def append(value)
unless value.is_a?(Array) or value.is_a?(Hash)
case @type
when :string
# ignore
when :key
# ignore
when :integer
value = value.to_i
when :real
value = value.to_f
end
end
last = @stack.last
if last.is_a?(Hash)
raise "Expected a key, not with a value of #{value}." if @key.nil?
last[@key] = value
@key = nil
elsif last.is_a?(Array)
last.push(value)
elsif last.nil?
@plist = value
end
end
end
def parse_sax(xml)
io = StringIO.new(xml)
start = Time.now
handler = Handler.new()
Ox.sax_parse(handler, io)
handler.plist
end
### XML conversion to Hash using Ox Object parsing with gsub! replacements ###
def convert_parse_obj(xml)
xml = plist_to_obj_xml(xml)
::Ox.load(xml, :mode => :object)
end
### XML conversion to Hash using Ox Object parsing after gsub! replacements ###
def parse_obj(xml)
::Ox.load(xml, :mode => :object)
end
def plist_to_obj_xml(xml)
xml = xml.gsub(%{
}, '')
xml.gsub!(%{
}, '')
{ '' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
}.each do |pat,rep|
xml.gsub!(pat, rep)
end
xml
end
files.each do |filename|
xml = File.read(filename)
if 0 < $verbose
d1 = parse_gen(xml)
d2 = parse_sax(xml)
d3 = convert_parse_obj(xml)
puts "--- It is #{d1 == d2 and d2 == d3} that all parsers yield the same Hash. ---"
end
start = Time.now
$iter.times do
parse_gen(xml)
end
gen_time = Time.now - start
start = Time.now
$iter.times do
parse_sax(xml)
end
sax_time = Time.now - start
start = Time.now
$iter.times do
convert_parse_obj(xml)
end
conv_obj_time = Time.now - start
xml = plist_to_obj_xml(xml)
start = Time.now
$iter.times do
parse_obj(xml)
end
obj_time = Time.now - start
puts "In memory parsing and conversion took #{gen_time} for #{$iter} iterations."
puts "SAX parsing and conversion took #{sax_time} for #{$iter} iterations."
puts "XML gsub Object parsing and conversion took #{conv_obj_time} for #{$iter} iterations."
puts "Object parsing and conversion took #{obj_time} for #{$iter} iterations."
end
# Results for a run:
#
# > parse_cmp.rb Sample.graffle -i 1000
# In memory parsing and conversion took 4.135701 for 1000 iterations.
# SAX parsing and conversion took 3.731695 for 1000 iterations.
# XML gsub Object parsing and conversion took 3.292397 for 1000 iterations.
# Object parsing and conversion took 0.808877 for 1000 iterations.