lib/feed_tools/feed_item.rb in feedtools-0.2.17 vs lib/feed_tools/feed_item.rb in feedtools-0.2.18

- old
+ new

@@ -1,10 +1,37 @@ +#-- +# Copyright (c) 2005 Robert Aman +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +#++ + module FeedTools # The <tt>FeedTools::FeedItem</tt> class represents the structure of # a single item within a web feed. class FeedItem + # :stopdoc: include REXML + include GenericHelper + private :validate_options + # :startdoc: # This class stores information about a feed item's file enclosures. class Enclosure # The url for the enclosure attr_accessor :url @@ -123,66 +150,54 @@ :width ) # Initialize the feed object def initialize super - @feed = nil @feed_data = nil @feed_data_type = :xml @xml_doc = nil @root_node = nil @title = nil @id = nil @time = Time.now.gmtime end # Returns the parent feed of this feed item + # Warning, this method may be slow if you have a + # large number of FeedTools::Feed objects. Can't + # use a direct reference to the parent because it plays + # havoc with the garbage collector. def feed - return @feed + parent_feed = nil + ObjectSpace.each_object(FeedTools::Feed) do |feed| + if feed.instance_variable_get("@items").nil? + feed.items + end + unsorted_items = feed.instance_variable_get("@items") + for item in unsorted_items + if item.object_id == self.object_id + if parent_feed.nil? + parent_feed = feed + break + else + raise "Multiple parent feeds found." + end + end + end + end + return parent_feed end - # Sets the parent feed of this feed item - def feed=(new_feed) - @feed = new_feed - end - # Returns the feed item's raw data. def feed_data return @feed_data end # Sets the feed item's data. def feed_data=(new_feed_data) @time = nil @feed_data = new_feed_data - - # We need an immediate parse of the time so we don't mess up sort orders - unless root_node.nil? - repair_entities = false - time_node = XPath.first(root_node, "pubDate") - if time_node.nil? - time_node = XPath.first(root_node, "dc:date") - end - if time_node.nil? - time_node = XPath.first(root_node, "dc:date", FEED_TOOLS_NAMESPACES) - end - if time_node.nil? - time_node = XPath.first(root_node, "issued") - end - if time_node.nil? - time_node = XPath.first(root_node, "updated") - end - if time_node.nil? - time_node = XPath.first(root_node, "time") - end - end - unless time_node.nil? - begin - @time = Time.parse(time_node.inner_xml) - rescue - end - end end # Returns the feed item's data type. def feed_data_type return @feed_data_type @@ -251,34 +266,51 @@ # Returns the feed item title def title if @title.nil? unless root_node.nil? repair_entities = false - title_node = XPath.first(root_node, "title") + title_node = XPath.first(root_node, "atom10:title", + FEED_TOOLS_NAMESPACES) if title_node.nil? + title_node = XPath.first(root_node, "title") + end + if title_node.nil? + title_node = XPath.first(root_node, "atom03:title", + FEED_TOOLS_NAMESPACES) + end + if title_node.nil? title_node = XPath.first(root_node, "atom:title") end if title_node.nil? + title_node = XPath.first(root_node, "dc:title", + FEED_TOOLS_NAMESPACES) + end + if title_node.nil? title_node = XPath.first(root_node, "dc:title") end if title_node.nil? title_node = XPath.first(root_node, "TITLE") end end if title_node.nil? return nil end - if XPath.first(title_node, "@type").to_s == "xhtml" || - XPath.first(title_node, "@mode").to_s == "xhtml" || - XPath.first(title_node, "@type").to_s == "xml" || - XPath.first(title_node, "@mode").to_s == "xml" || - XPath.first(title_node, "@type").to_s == "application/xhtml+xml" + title_type = XPath.first(title_node, "@type").to_s + title_mode = XPath.first(title_node, "@mode").to_s + title_encoding = XPath.first(title_node, "@encoding").to_s + + # Note that we're checking for misuse of type, mode and encoding here + if title_type == "base64" || title_mode == "base64" || + title_encoding == "base64" + @title = Base64.decode64(title_node.inner_xml.strip) + elsif title_type == "xhtml" || title_mode == "xhtml" || + title_type == "xml" || title_mode == "xml" || + title_type == "application/xhtml+xml" @title = title_node.inner_xml - elsif XPath.first(title_node, "@type").to_s == "escaped" || - XPath.first(title_node, "@mode").to_s == "escaped" + elsif title_type == "escaped" || title_mode == "escaped" @title = FeedTools.unescape_entities( - XPath.first(title_node, "text()").to_s) + title_node.inner_xml) else @title = title_node.inner_xml repair_entities = true end unless @title.nil? @@ -364,31 +396,33 @@ end end if description_node.nil? return nil end - unless description_node.nil? - if XPath.first(description_node, "@encoding").to_s != "" - @description = - "[Embedded data objects are not currently supported.]" - elsif description_node.cdatas.size > 0 - @description = description_node.cdatas.first.value - elsif XPath.first(description_node, "@type").to_s == "xhtml" || - XPath.first(description_node, "@mode").to_s == "xhtml" || - XPath.first(description_node, "@type").to_s == "xml" || - XPath.first(description_node, "@mode").to_s == "xml" || - XPath.first(description_node, "@type").to_s == - "application/xhtml+xml" - @description = description_node.inner_xml - elsif XPath.first(description_node, "@type").to_s == "escaped" || - XPath.first(description_node, "@mode").to_s == "escaped" - @description = FeedTools.unescape_entities( - description_node.inner_xml) - else - @description = description_node.inner_xml - repair_entities = true - end + description_type = XPath.first(description_node, "@type").to_s + description_mode = XPath.first(description_node, "@mode").to_s + description_encoding = XPath.first(description_node, "@encoding").to_s + + # Note that we're checking for misuse of type, mode and encoding here + if description_encoding != "" + @description = + "[Embedded data objects are not currently supported.]" + elsif description_node.cdatas.size > 0 + @description = description_node.cdatas.first.value + elsif description_type == "base64" || description_mode == "base64" || + description_encoding == "base64" + @description = Base64.decode64(description_node.inner_xml.strip) + elsif description_type == "xhtml" || description_mode == "xhtml" || + description_type == "xml" || description_mode == "xml" || + description_type == "application/xhtml+xml" + @description = description_node.inner_xml + elsif description_type == "escaped" || description_mode == "escaped" + @description = FeedTools.unescape_entities( + description_node.inner_xml) + else + @description = description_node.inner_xml + repair_entities = true end if @description == "" @description = self.itunes_summary @description = "" if @description.nil? end @@ -664,23 +698,74 @@ # Returns the feed item's copyright information def copyright if @copyright.nil? unless root_node.nil? - @copyright = XPath.first(root_node, "dc:rights/text()").to_s - if @copyright == "" - @copyright = XPath.first(root_node, "rights/text()").to_s + repair_entities = false + + copyright_node = XPath.first(root_node, "dc:rights") + if copyright_node.nil? + copyright_node = XPath.first(root_node, "dc:rights", + FEED_TOOLS_NAMESPACES) end - if @copyright == "" - @copyright = XPath.first(root_node, "copyright/text()").to_s + if copyright_node.nil? + copyright_node = XPath.first(root_node, "rights", + FEED_TOOLS_NAMESPACES) end - if @copyright == "" - @copyright = XPath.first(root_node, "copyrights/text()").to_s + if copyright_node.nil? + copyright_node = XPath.first(root_node, "copyright", + FEED_TOOLS_NAMESPACES) end + if copyright_node.nil? + copyright_node = XPath.first(root_node, "atom03:copyright", + FEED_TOOLS_NAMESPACES) + end + if copyright_node.nil? + copyright_node = XPath.first(root_node, "atom10:copyright", + FEED_TOOLS_NAMESPACES) + end + if copyright_node.nil? + copyright_node = XPath.first(root_node, "copyrights", + FEED_TOOLS_NAMESPACES) + end + end + if copyright_node.nil? + return nil + end + copyright_type = XPath.first(copyright_node, "@type").to_s + copyright_mode = XPath.first(copyright_node, "@mode").to_s + copyright_encoding = XPath.first(copyright_node, "@encoding").to_s + + # Note that we're checking for misuse of type, mode and encoding here + if copyright_encoding != "" + @copyright = + "[Embedded data objects are not currently supported.]" + elsif copyright_node.cdatas.size > 0 + @copyright = copyright_node.cdatas.first.value + elsif copyright_type == "base64" || copyright_mode == "base64" || + copyright_encoding == "base64" + @copyright = Base64.decode64(copyright_node.inner_xml.strip) + elsif copyright_type == "xhtml" || copyright_mode == "xhtml" || + copyright_type == "xml" || copyright_mode == "xml" || + copyright_type == "application/xhtml+xml" + @copyright = copyright_node.inner_xml + elsif copyright_type == "escaped" || copyright_mode == "escaped" + @copyright = FeedTools.unescape_entities( + copyright_node.inner_xml) + else + @copyright = copyright_node.inner_xml + repair_entities = true + end + + unless @copyright.nil? @copyright = FeedTools.sanitize_html(@copyright, :strip) - @copyright = nil if @copyright == "" + @copyright = FeedTools.unescape_entities(@copyright) if repair_entities + @copyright = FeedTools.tidy_html(@copyright) end + + @copyright = @copyright.strip unless @copyright.nil? + @copyright = nil if @copyright == "" end return @copyright end # Sets the feed item's copyright information @@ -961,15 +1046,17 @@ end for enclosure in @enclosures if enclosure.categories.nil? enclosure.categories = [] end - enclosure.categories << EnclosureCategory.new( - FeedTools.unescape_entities(category_path), - FeedTools.unescape_entities("http://www.apple.com/itunes/store/"), - FeedTools.unescape_entities("iTunes Music Store Categories") - ) + enclosure.categories << FeedTools::Feed::Category.new + enclosure.categories.last.term = + FeedTools.unescape_entities(category_path) + enclosure.categories.last.scheme = + "http://www.apple.com/itunes/store/" + enclosure.categories.last.label = + "iTunes Music Store Categories" end end for enclosure in @enclosures # Clean up any of those attributes that incorrectly have "" @@ -1059,22 +1146,38 @@ # Returns the feed item author def author if @author.nil? @author = FeedTools::Feed::Author.new unless root_node.nil? - author_node = XPath.first(root_node, "author") + author_node = XPath.first(root_node, "atom10:author", + FEED_TOOLS_NAMESPACES) if author_node.nil? + author_node = XPath.first(root_node, "atom03:author", + FEED_TOOLS_NAMESPACES) + end + if author_node.nil? + author_node = XPath.first(root_node, "atom:author") + end + if author_node.nil? + author_node = XPath.first(root_node, "author") + end + if author_node.nil? author_node = XPath.first(root_node, "managingEditor") end if author_node.nil? + author_node = XPath.first(root_node, "dc:author", + FEED_TOOLS_NAMESPACES) + end + if author_node.nil? author_node = XPath.first(root_node, "dc:author") end if author_node.nil? - author_node = XPath.first(root_node, "dc:creator") + author_node = XPath.first(root_node, "dc:creator", + FEED_TOOLS_NAMESPACES) end if author_node.nil? - author_node = XPath.first(root_node, "atom:author") + author_node = XPath.first(root_node, "dc:creator") end end unless author_node.nil? @author.raw = FeedTools.unescape_entities( XPath.first(author_node, "text()").to_s) @@ -1278,11 +1381,14 @@ def itunes_duration=(new_itunes_duration) @itunes_duration = new_itunes_duration end # Returns the feed item time - def time + def time(options = {}) + validate_options([ :estimate_timestamp ], + options.keys) + options = { :estimate_timestamp => true }.merge(options) if @time.nil? unless root_node.nil? time_string = XPath.first(root_node, "pubDate/text()").to_s if time_string == "" time_string = XPath.first(root_node, "dc:date/text()").to_s @@ -1298,26 +1404,28 @@ end end begin time_string = "" if time_string.nil? if time_string != "" - @time = Time.parse(time_string) - else - @time = succ_time - if @time.nil? - @time = prev_time - end + @time = Time.parse(time_string).gmtime end rescue - @time = succ_time + end + if options[:estimate_timestamp] if @time.nil? - @time = prev_time + begin + @time = succ_time + if @time.nil? + @time = prev_time + end + rescue + end + if @time.nil? + @time = Time.now.gmtime + end end end - if @time.nil? - @time = Time.now.gmtime - end end return @time end # Sets the feed item time @@ -1326,51 +1434,53 @@ end # Returns 1 second after the previous item's time. def succ_time #:nodoc: begin - if feed.nil? + parent_feed = self.feed + if parent_feed.nil? return nil end - if feed.instance_variable_get("@items").nil? - feed.items + if parent_feed.instance_variable_get("@items").nil? + parent_feed.items end - unsorted_items = feed.instance_variable_get("@items") + unsorted_items = parent_feed.instance_variable_get("@items") item_index = unsorted_items.index(self) if item_index.nil? return nil end if item_index <= 0 return nil end previous_item = unsorted_items[item_index - 1] - return (previous_item.time + 1) + return (previous_item.time(:estimate_timestamp => false) + 1) rescue return nil end end #private :succ_time # Returns 1 second before the succeeding item's time. def prev_time #:nodoc: begin - if feed.nil? + parent_feed = self.feed + if parent_feed.nil? return nil end - if feed.instance_variable_get("@items").nil? - feed.items + if parent_feed.instance_variable_get("@items").nil? + parent_feed.items end - unsorted_items = feed.instance_variable_get("@items") + unsorted_items = parent_feed.instance_variable_get("@items") item_index = unsorted_items.index(self) if item_index.nil? return nil end if item_index >= (unsorted_items.size - 1) return nil end succeeding_item = unsorted_items[item_index + 1] - return (succeeding_item.time - 1) + return (succeeding_item.time(:estimate_timestamp => false) - 1) rescue return nil end end #private :prev_time @@ -1383,11 +1493,11 @@ if updated_string == "" updated_string = XPath.first(root_node, "modified/text()").to_s end end if updated_string != nil && updated_string != "" - @updated = Time.parse(updated_string) rescue nil + @updated = Time.parse(updated_string).gmtime rescue nil else @updated = nil end end return @updated @@ -1412,11 +1522,11 @@ if issued_string == "" issued_string = XPath.first(root_node, "dc:date/text()").to_s end end if issued_string != nil && issued_string != "" - @issued = Time.parse(issued_string) rescue nil + @issued = Time.parse(issued_string).gmtime rescue nil else @issued = nil end end return @issued @@ -1629,11 +1739,12 @@ end build_xml_hook(feed_type, version, xml_builder) end elsif feed_type == "atom" && version == 0.3 # normal atom format - return xml_builder.entry("xmlns" => "http://purl.org/atom/ns#") do + return xml_builder.entry("xmlns" => + FEED_TOOLS_NAMESPACES['atom03']) do unless title.nil? || title == "" xml_builder.title(title, "mode" => "escaped", "type" => "text/html") end @@ -1671,10 +1782,11 @@ end build_xml_hook(feed_type, version, xml_builder) end elsif feed_type == "atom" && version == 1.0 # normal atom format - return xml_builder.entry("xmlns" => "http://www.w3.org/2005/Atom") do + return xml_builder.entry("xmlns" => + FEED_TOOLS_NAMESPACES['atom10']) do unless title.nil? || title == "" xml_builder.title(title, "type" => "html") end xml_builder.author do