lib/feedzirra/parser/atom.rb in feedzirra-0.5.0 vs lib/feedzirra/parser/atom.rb in feedzirra-0.6.0
- old
+ new
@@ -1,7 +1,6 @@
module Feedzirra
-
module Parser
# Parser for dealing with Atom feeds.
class Atom
include SAXMachine
include FeedUtilities
@@ -22,9 +21,41 @@
end
def feed_url
@feed_url ||= links.first
end
+
+ def self.preprocess(xml)
+ Preprocessor.new(xml).to_xml
+ end
+
+ class Preprocessor
+ def initialize(xml)
+ @xml = xml
+ end
+
+ def to_xml
+ process_nodes
+ doc.to_xml
+ end
+
+ private
+
+ def process_nodes
+ nodes.each { |node| node.content = raw_html(node) unless node.cdata? }
+ end
+
+ def nodes
+ doc.search 'entry > content[type="xhtml"]'
+ end
+
+ def raw_html(node)
+ CGI.unescape_html node.inner_html
+ end
+
+ def doc
+ @doc ||= Nokogiri::XML @xml
+ end
+ end
end
end
-
end