lib/feed_tools/feed_item.rb in feedtools-0.2.17 vs lib/feed_tools/feed_item.rb in feedtools-0.2.18
- old
+ new
@@ -1,10 +1,37 @@
+#--
+# Copyright (c) 2005 Robert Aman
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#++
+
module FeedTools
# The <tt>FeedTools::FeedItem</tt> class represents the structure of
# a single item within a web feed.
class FeedItem
+ # :stopdoc:
include REXML
+ include GenericHelper
+ private :validate_options
+ # :startdoc:
# This class stores information about a feed item's file enclosures.
class Enclosure
# The url for the enclosure
attr_accessor :url
@@ -123,66 +150,54 @@
:width )
# Initialize the feed object
def initialize
super
- @feed = nil
@feed_data = nil
@feed_data_type = :xml
@xml_doc = nil
@root_node = nil
@title = nil
@id = nil
@time = Time.now.gmtime
end
# Returns the parent feed of this feed item
+ # Warning, this method may be slow if you have a
+ # large number of FeedTools::Feed objects. Can't
+ # use a direct reference to the parent because it plays
+ # havoc with the garbage collector.
def feed
- return @feed
+ parent_feed = nil
+ ObjectSpace.each_object(FeedTools::Feed) do |feed|
+ if feed.instance_variable_get("@items").nil?
+ feed.items
+ end
+ unsorted_items = feed.instance_variable_get("@items")
+ for item in unsorted_items
+ if item.object_id == self.object_id
+ if parent_feed.nil?
+ parent_feed = feed
+ break
+ else
+ raise "Multiple parent feeds found."
+ end
+ end
+ end
+ end
+ return parent_feed
end
- # Sets the parent feed of this feed item
- def feed=(new_feed)
- @feed = new_feed
- end
-
# Returns the feed item's raw data.
def feed_data
return @feed_data
end
# Sets the feed item's data.
def feed_data=(new_feed_data)
@time = nil
@feed_data = new_feed_data
-
- # We need an immediate parse of the time so we don't mess up sort orders
- unless root_node.nil?
- repair_entities = false
- time_node = XPath.first(root_node, "pubDate")
- if time_node.nil?
- time_node = XPath.first(root_node, "dc:date")
- end
- if time_node.nil?
- time_node = XPath.first(root_node, "dc:date", FEED_TOOLS_NAMESPACES)
- end
- if time_node.nil?
- time_node = XPath.first(root_node, "issued")
- end
- if time_node.nil?
- time_node = XPath.first(root_node, "updated")
- end
- if time_node.nil?
- time_node = XPath.first(root_node, "time")
- end
- end
- unless time_node.nil?
- begin
- @time = Time.parse(time_node.inner_xml)
- rescue
- end
- end
end
# Returns the feed item's data type.
def feed_data_type
return @feed_data_type
@@ -251,34 +266,51 @@
# Returns the feed item title
def title
if @title.nil?
unless root_node.nil?
repair_entities = false
- title_node = XPath.first(root_node, "title")
+ title_node = XPath.first(root_node, "atom10:title",
+ FEED_TOOLS_NAMESPACES)
if title_node.nil?
+ title_node = XPath.first(root_node, "title")
+ end
+ if title_node.nil?
+ title_node = XPath.first(root_node, "atom03:title",
+ FEED_TOOLS_NAMESPACES)
+ end
+ if title_node.nil?
title_node = XPath.first(root_node, "atom:title")
end
if title_node.nil?
+ title_node = XPath.first(root_node, "dc:title",
+ FEED_TOOLS_NAMESPACES)
+ end
+ if title_node.nil?
title_node = XPath.first(root_node, "dc:title")
end
if title_node.nil?
title_node = XPath.first(root_node, "TITLE")
end
end
if title_node.nil?
return nil
end
- if XPath.first(title_node, "@type").to_s == "xhtml" ||
- XPath.first(title_node, "@mode").to_s == "xhtml" ||
- XPath.first(title_node, "@type").to_s == "xml" ||
- XPath.first(title_node, "@mode").to_s == "xml" ||
- XPath.first(title_node, "@type").to_s == "application/xhtml+xml"
+ title_type = XPath.first(title_node, "@type").to_s
+ title_mode = XPath.first(title_node, "@mode").to_s
+ title_encoding = XPath.first(title_node, "@encoding").to_s
+
+ # Note that we're checking for misuse of type, mode and encoding here
+ if title_type == "base64" || title_mode == "base64" ||
+ title_encoding == "base64"
+ @title = Base64.decode64(title_node.inner_xml.strip)
+ elsif title_type == "xhtml" || title_mode == "xhtml" ||
+ title_type == "xml" || title_mode == "xml" ||
+ title_type == "application/xhtml+xml"
@title = title_node.inner_xml
- elsif XPath.first(title_node, "@type").to_s == "escaped" ||
- XPath.first(title_node, "@mode").to_s == "escaped"
+ elsif title_type == "escaped" || title_mode == "escaped"
@title = FeedTools.unescape_entities(
- XPath.first(title_node, "text()").to_s)
+ title_node.inner_xml)
else
@title = title_node.inner_xml
repair_entities = true
end
unless @title.nil?
@@ -364,31 +396,33 @@
end
end
if description_node.nil?
return nil
end
- unless description_node.nil?
- if XPath.first(description_node, "@encoding").to_s != ""
- @description =
- "[Embedded data objects are not currently supported.]"
- elsif description_node.cdatas.size > 0
- @description = description_node.cdatas.first.value
- elsif XPath.first(description_node, "@type").to_s == "xhtml" ||
- XPath.first(description_node, "@mode").to_s == "xhtml" ||
- XPath.first(description_node, "@type").to_s == "xml" ||
- XPath.first(description_node, "@mode").to_s == "xml" ||
- XPath.first(description_node, "@type").to_s ==
- "application/xhtml+xml"
- @description = description_node.inner_xml
- elsif XPath.first(description_node, "@type").to_s == "escaped" ||
- XPath.first(description_node, "@mode").to_s == "escaped"
- @description = FeedTools.unescape_entities(
- description_node.inner_xml)
- else
- @description = description_node.inner_xml
- repair_entities = true
- end
+ description_type = XPath.first(description_node, "@type").to_s
+ description_mode = XPath.first(description_node, "@mode").to_s
+ description_encoding = XPath.first(description_node, "@encoding").to_s
+
+ # Note that we're checking for misuse of type, mode and encoding here
+ if description_encoding != ""
+ @description =
+ "[Embedded data objects are not currently supported.]"
+ elsif description_node.cdatas.size > 0
+ @description = description_node.cdatas.first.value
+ elsif description_type == "base64" || description_mode == "base64" ||
+ description_encoding == "base64"
+ @description = Base64.decode64(description_node.inner_xml.strip)
+ elsif description_type == "xhtml" || description_mode == "xhtml" ||
+ description_type == "xml" || description_mode == "xml" ||
+ description_type == "application/xhtml+xml"
+ @description = description_node.inner_xml
+ elsif description_type == "escaped" || description_mode == "escaped"
+ @description = FeedTools.unescape_entities(
+ description_node.inner_xml)
+ else
+ @description = description_node.inner_xml
+ repair_entities = true
end
if @description == ""
@description = self.itunes_summary
@description = "" if @description.nil?
end
@@ -664,23 +698,74 @@
# Returns the feed item's copyright information
def copyright
if @copyright.nil?
unless root_node.nil?
- @copyright = XPath.first(root_node, "dc:rights/text()").to_s
- if @copyright == ""
- @copyright = XPath.first(root_node, "rights/text()").to_s
+ repair_entities = false
+
+ copyright_node = XPath.first(root_node, "dc:rights")
+ if copyright_node.nil?
+ copyright_node = XPath.first(root_node, "dc:rights",
+ FEED_TOOLS_NAMESPACES)
end
- if @copyright == ""
- @copyright = XPath.first(root_node, "copyright/text()").to_s
+ if copyright_node.nil?
+ copyright_node = XPath.first(root_node, "rights",
+ FEED_TOOLS_NAMESPACES)
end
- if @copyright == ""
- @copyright = XPath.first(root_node, "copyrights/text()").to_s
+ if copyright_node.nil?
+ copyright_node = XPath.first(root_node, "copyright",
+ FEED_TOOLS_NAMESPACES)
end
+ if copyright_node.nil?
+ copyright_node = XPath.first(root_node, "atom03:copyright",
+ FEED_TOOLS_NAMESPACES)
+ end
+ if copyright_node.nil?
+ copyright_node = XPath.first(root_node, "atom10:copyright",
+ FEED_TOOLS_NAMESPACES)
+ end
+ if copyright_node.nil?
+ copyright_node = XPath.first(root_node, "copyrights",
+ FEED_TOOLS_NAMESPACES)
+ end
+ end
+ if copyright_node.nil?
+ return nil
+ end
+ copyright_type = XPath.first(copyright_node, "@type").to_s
+ copyright_mode = XPath.first(copyright_node, "@mode").to_s
+ copyright_encoding = XPath.first(copyright_node, "@encoding").to_s
+
+ # Note that we're checking for misuse of type, mode and encoding here
+ if copyright_encoding != ""
+ @copyright =
+ "[Embedded data objects are not currently supported.]"
+ elsif copyright_node.cdatas.size > 0
+ @copyright = copyright_node.cdatas.first.value
+ elsif copyright_type == "base64" || copyright_mode == "base64" ||
+ copyright_encoding == "base64"
+ @copyright = Base64.decode64(copyright_node.inner_xml.strip)
+ elsif copyright_type == "xhtml" || copyright_mode == "xhtml" ||
+ copyright_type == "xml" || copyright_mode == "xml" ||
+ copyright_type == "application/xhtml+xml"
+ @copyright = copyright_node.inner_xml
+ elsif copyright_type == "escaped" || copyright_mode == "escaped"
+ @copyright = FeedTools.unescape_entities(
+ copyright_node.inner_xml)
+ else
+ @copyright = copyright_node.inner_xml
+ repair_entities = true
+ end
+
+ unless @copyright.nil?
@copyright = FeedTools.sanitize_html(@copyright, :strip)
- @copyright = nil if @copyright == ""
+ @copyright = FeedTools.unescape_entities(@copyright) if repair_entities
+ @copyright = FeedTools.tidy_html(@copyright)
end
+
+ @copyright = @copyright.strip unless @copyright.nil?
+ @copyright = nil if @copyright == ""
end
return @copyright
end
# Sets the feed item's copyright information
@@ -961,15 +1046,17 @@
end
for enclosure in @enclosures
if enclosure.categories.nil?
enclosure.categories = []
end
- enclosure.categories << EnclosureCategory.new(
- FeedTools.unescape_entities(category_path),
- FeedTools.unescape_entities("http://www.apple.com/itunes/store/"),
- FeedTools.unescape_entities("iTunes Music Store Categories")
- )
+ enclosure.categories << FeedTools::Feed::Category.new
+ enclosure.categories.last.term =
+ FeedTools.unescape_entities(category_path)
+ enclosure.categories.last.scheme =
+ "http://www.apple.com/itunes/store/"
+ enclosure.categories.last.label =
+ "iTunes Music Store Categories"
end
end
for enclosure in @enclosures
# Clean up any of those attributes that incorrectly have ""
@@ -1059,22 +1146,38 @@
# Returns the feed item author
def author
if @author.nil?
@author = FeedTools::Feed::Author.new
unless root_node.nil?
- author_node = XPath.first(root_node, "author")
+ author_node = XPath.first(root_node, "atom10:author",
+ FEED_TOOLS_NAMESPACES)
if author_node.nil?
+ author_node = XPath.first(root_node, "atom03:author",
+ FEED_TOOLS_NAMESPACES)
+ end
+ if author_node.nil?
+ author_node = XPath.first(root_node, "atom:author")
+ end
+ if author_node.nil?
+ author_node = XPath.first(root_node, "author")
+ end
+ if author_node.nil?
author_node = XPath.first(root_node, "managingEditor")
end
if author_node.nil?
+ author_node = XPath.first(root_node, "dc:author",
+ FEED_TOOLS_NAMESPACES)
+ end
+ if author_node.nil?
author_node = XPath.first(root_node, "dc:author")
end
if author_node.nil?
- author_node = XPath.first(root_node, "dc:creator")
+ author_node = XPath.first(root_node, "dc:creator",
+ FEED_TOOLS_NAMESPACES)
end
if author_node.nil?
- author_node = XPath.first(root_node, "atom:author")
+ author_node = XPath.first(root_node, "dc:creator")
end
end
unless author_node.nil?
@author.raw = FeedTools.unescape_entities(
XPath.first(author_node, "text()").to_s)
@@ -1278,11 +1381,14 @@
def itunes_duration=(new_itunes_duration)
@itunes_duration = new_itunes_duration
end
# Returns the feed item time
- def time
+ def time(options = {})
+ validate_options([ :estimate_timestamp ],
+ options.keys)
+ options = { :estimate_timestamp => true }.merge(options)
if @time.nil?
unless root_node.nil?
time_string = XPath.first(root_node, "pubDate/text()").to_s
if time_string == ""
time_string = XPath.first(root_node, "dc:date/text()").to_s
@@ -1298,26 +1404,28 @@
end
end
begin
time_string = "" if time_string.nil?
if time_string != ""
- @time = Time.parse(time_string)
- else
- @time = succ_time
- if @time.nil?
- @time = prev_time
- end
+ @time = Time.parse(time_string).gmtime
end
rescue
- @time = succ_time
+ end
+ if options[:estimate_timestamp]
if @time.nil?
- @time = prev_time
+ begin
+ @time = succ_time
+ if @time.nil?
+ @time = prev_time
+ end
+ rescue
+ end
+ if @time.nil?
+ @time = Time.now.gmtime
+ end
end
end
- if @time.nil?
- @time = Time.now.gmtime
- end
end
return @time
end
# Sets the feed item time
@@ -1326,51 +1434,53 @@
end
# Returns 1 second after the previous item's time.
def succ_time #:nodoc:
begin
- if feed.nil?
+ parent_feed = self.feed
+ if parent_feed.nil?
return nil
end
- if feed.instance_variable_get("@items").nil?
- feed.items
+ if parent_feed.instance_variable_get("@items").nil?
+ parent_feed.items
end
- unsorted_items = feed.instance_variable_get("@items")
+ unsorted_items = parent_feed.instance_variable_get("@items")
item_index = unsorted_items.index(self)
if item_index.nil?
return nil
end
if item_index <= 0
return nil
end
previous_item = unsorted_items[item_index - 1]
- return (previous_item.time + 1)
+ return (previous_item.time(:estimate_timestamp => false) + 1)
rescue
return nil
end
end
#private :succ_time
# Returns 1 second before the succeeding item's time.
def prev_time #:nodoc:
begin
- if feed.nil?
+ parent_feed = self.feed
+ if parent_feed.nil?
return nil
end
- if feed.instance_variable_get("@items").nil?
- feed.items
+ if parent_feed.instance_variable_get("@items").nil?
+ parent_feed.items
end
- unsorted_items = feed.instance_variable_get("@items")
+ unsorted_items = parent_feed.instance_variable_get("@items")
item_index = unsorted_items.index(self)
if item_index.nil?
return nil
end
if item_index >= (unsorted_items.size - 1)
return nil
end
succeeding_item = unsorted_items[item_index + 1]
- return (succeeding_item.time - 1)
+ return (succeeding_item.time(:estimate_timestamp => false) - 1)
rescue
return nil
end
end
#private :prev_time
@@ -1383,11 +1493,11 @@
if updated_string == ""
updated_string = XPath.first(root_node, "modified/text()").to_s
end
end
if updated_string != nil && updated_string != ""
- @updated = Time.parse(updated_string) rescue nil
+ @updated = Time.parse(updated_string).gmtime rescue nil
else
@updated = nil
end
end
return @updated
@@ -1412,11 +1522,11 @@
if issued_string == ""
issued_string = XPath.first(root_node, "dc:date/text()").to_s
end
end
if issued_string != nil && issued_string != ""
- @issued = Time.parse(issued_string) rescue nil
+ @issued = Time.parse(issued_string).gmtime rescue nil
else
@issued = nil
end
end
return @issued
@@ -1629,11 +1739,12 @@
end
build_xml_hook(feed_type, version, xml_builder)
end
elsif feed_type == "atom" && version == 0.3
# normal atom format
- return xml_builder.entry("xmlns" => "http://purl.org/atom/ns#") do
+ return xml_builder.entry("xmlns" =>
+ FEED_TOOLS_NAMESPACES['atom03']) do
unless title.nil? || title == ""
xml_builder.title(title,
"mode" => "escaped",
"type" => "text/html")
end
@@ -1671,10 +1782,11 @@
end
build_xml_hook(feed_type, version, xml_builder)
end
elsif feed_type == "atom" && version == 1.0
# normal atom format
- return xml_builder.entry("xmlns" => "http://www.w3.org/2005/Atom") do
+ return xml_builder.entry("xmlns" =>
+ FEED_TOOLS_NAMESPACES['atom10']) do
unless title.nil? || title == ""
xml_builder.title(title,
"type" => "html")
end
xml_builder.author do