lib/nori/parser/rexml.rb in nori-2.2.0 vs lib/nori/parser/rexml.rb in nori-2.3.0

- old
+ new

@@ -1,6 +1,8 @@ require "rexml/parsers/baseparser" +require "rexml/text" +require "rexml/document" class Nori module Parser # = Nori::Parser::REXML @@ -11,11 +13,11 @@ def self.parse(xml, options) stack = [] parser = ::REXML::Parsers::BaseParser.new(xml) while true - event = parser.pull + event = unnormalize(parser.pull) case event[0] when :end_document break when :end_doctype, :start_doctype # do nothing @@ -30,9 +32,20 @@ stack.last.add_node(event[1]) unless event[1].strip.length == 0 || stack.empty? end end stack.length > 0 ? stack.pop.to_hash : {} end - end + def self.unnormalize(event) + event.map! do |el| + if el.is_a?(String) + ::REXML::Text.unnormalize(el) + elsif el.is_a?(Hash) + el.each {|k,v| el[k] = ::REXML::Text.unnormalize(v)} + else + el + end + end + end + end end end