lib/feedzirra/parser/atom.rb in feedzirra-0.5.0 vs lib/feedzirra/parser/atom.rb in feedzirra-0.6.0

- old
+ new

@@ -1,7 +1,6 @@ module Feedzirra - module Parser # Parser for dealing with Atom feeds. class Atom include SAXMachine include FeedUtilities @@ -22,9 +21,41 @@ end def feed_url @feed_url ||= links.first end + + def self.preprocess(xml) + Preprocessor.new(xml).to_xml + end + + class Preprocessor + def initialize(xml) + @xml = xml + end + + def to_xml + process_nodes + doc.to_xml + end + + private + + def process_nodes + nodes.each { |node| node.content = raw_html(node) unless node.cdata? } + end + + def nodes + doc.search 'entry > content[type="xhtml"]' + end + + def raw_html(node) + CGI.unescape_html node.inner_html + end + + def doc + @doc ||= Nokogiri::XML @xml + end + end end end - end