lib/feed_tools/feed_item.rb in feedtools-0.2.18 vs lib/feed_tools/feed_item.rb in feedtools-0.2.19
- old
+ new
@@ -163,18 +163,22 @@
# Returns the parent feed of this feed item
# Warning, this method may be slow if you have a
# large number of FeedTools::Feed objects. Can't
# use a direct reference to the parent because it plays
- # havoc with the garbage collector.
+ # havoc with the garbage collector. Could've used
+ # a WeakRef object, but really, if there are multiple
+ # parent feeds, something is going to go wrong, and the
+ # programmer needs to be notified. A WeakRef
+ # implementation can't detect this condition.
def feed
parent_feed = nil
ObjectSpace.each_object(FeedTools::Feed) do |feed|
- if feed.instance_variable_get("@items").nil?
+ if feed.instance_variable_get("@entries").nil?
feed.items
end
- unsorted_items = feed.instance_variable_get("@items")
+ unsorted_items = feed.instance_variable_get("@entries")
for item in unsorted_items
if item.object_id == self.object_id
if parent_feed.nil?
parent_feed = feed
break
@@ -222,17 +226,25 @@
end
return @xml_doc
end
# Returns the first node within the root_node that matches the xpath query.
- def find_node(xpath)
- return XPath.first(root_node, xpath)
+ def find_node(xpath, select_result_value=false)
+ if feed.feed_data_type != :xml
+ raise "The feed data type is not xml."
+ end
+ return try_xpaths(self.root_node, [xpath],
+ :select_result_value => select_result_value)
end
# Returns all nodes within the root_node that match the xpath query.
- def find_all_nodes(xpath)
- return XPath.match(root_node, xpath)
+ def find_all_nodes(xpath, select_result_value=false)
+ if feed.feed_data_type != :xml
+ raise "The feed data type is not xml."
+ end
+ return try_xpaths_all(self.root_node, [xpath],
+ :select_result_value => select_result_value)
end
# Returns the root node of the feed item.
def root_node
if @root_node.nil?
@@ -245,17 +257,17 @@
end
# Returns the feed items's unique id
def id
if @id.nil?
- unless root_node.nil?
- @id = XPath.first(root_node, "id/text()").to_s
- if @id == ""
- @id = XPath.first(root_node, "guid/text()").to_s
- end
- end
- @id = nil if @id == ""
+ @id = try_xpaths(self.root_node, [
+ "atom10:id/text()",
+ "atom03:id/text()",
+ "atom:id/text()",
+ "id/text()",
+ "guid/text()"
+ ], :select_result_value => true)
end
return @id
end
# Sets the feed item's unique id
@@ -264,41 +276,27 @@
end
# Returns the feed item title
def title
if @title.nil?
- unless root_node.nil?
- repair_entities = false
- title_node = XPath.first(root_node, "atom10:title",
- FEED_TOOLS_NAMESPACES)
- if title_node.nil?
- title_node = XPath.first(root_node, "title")
- end
- if title_node.nil?
- title_node = XPath.first(root_node, "atom03:title",
- FEED_TOOLS_NAMESPACES)
- end
- if title_node.nil?
- title_node = XPath.first(root_node, "atom:title")
- end
- if title_node.nil?
- title_node = XPath.first(root_node, "dc:title",
- FEED_TOOLS_NAMESPACES)
- end
- if title_node.nil?
- title_node = XPath.first(root_node, "dc:title")
- end
- if title_node.nil?
- title_node = XPath.first(root_node, "TITLE")
- end
- end
+ repair_entities = false
+ title_node = try_xpaths(self.root_node, [
+ "atom10:title",
+ "atom03:title",
+ "atom:title",
+ "title",
+ "dc:title"
+ ])
if title_node.nil?
return nil
end
- title_type = XPath.first(title_node, "@type").to_s
- title_mode = XPath.first(title_node, "@mode").to_s
- title_encoding = XPath.first(title_node, "@encoding").to_s
+ title_type = try_xpaths(title_node, "@type",
+ :select_result_value => true)
+ title_mode = try_xpaths(title_node, "@mode",
+ :select_result_value => true)
+ title_encoding = try_xpaths(title_node, "@encoding",
+ :select_result_value => true)
# Note that we're checking for misuse of type, mode and encoding here
if title_type == "base64" || title_mode == "base64" ||
title_encoding == "base64"
@title = Base64.decode64(title_node.inner_xml.strip)
@@ -316,11 +314,11 @@
unless @title.nil?
@title = FeedTools.sanitize_html(@title, :strip)
@title = FeedTools.unescape_entities(@title) if repair_entities
@title = FeedTools.tidy_html(@title) unless repair_entities
end
- if @title != ""
+ if !@title.blank? && FeedTools.configurations[:strip_comment_count]
# Some blogging tools include the number of comments in a post
# in the title... this is supremely ugly, and breaks any
# applications which expect the title to be static, so we're
# gonna strip them out.
#
@@ -329,138 +327,105 @@
@title = @title.strip.gsub(/\[\d*\]$/, "").strip
end
@title.gsub!(/>\n</, "><")
@title.gsub!(/\n/, " ")
@title.strip!
- @title = nil if @title == ""
+ @title = nil if @title.blank?
end
return @title
end
# Sets the feed item title
def title=(new_title)
@title = new_title
end
- # Returns the feed item description
- def description
- if @description.nil?
- unless root_node.nil?
- repair_entities = false
- description_node = XPath.first(root_node, "content:encoded")
- if description_node.nil?
- description_node = XPath.first(root_node, "content:encoded",
- FEED_TOOLS_NAMESPACES)
- end
- if description_node.nil?
- description_node = XPath.first(root_node, "encoded")
- end
- if description_node.nil?
- description_node = XPath.first(root_node, "content")
- end
- if description_node.nil?
- description_node = XPath.first(root_node, "fullitem")
- end
- if description_node.nil?
- description_node = XPath.first(root_node, "xhtml:body")
- end
- if description_node.nil?
- description_node = XPath.first(root_node, "xhtml:body",
- FEED_TOOLS_NAMESPACES)
- end
- if description_node.nil?
- description_node = XPath.first(root_node, "body")
- end
- if description_node.nil?
- description_node = XPath.first(root_node, "description")
- end
- if description_node.nil?
- description_node = XPath.first(root_node, "tagline")
- end
- if description_node.nil?
- description_node = XPath.first(root_node, "subtitle")
- end
- if description_node.nil?
- description_node = XPath.first(root_node, "summary")
- end
- if description_node.nil?
- description_node = XPath.first(root_node, "abstract")
- end
- if description_node.nil?
- description_node = XPath.first(root_node, "ABSTRACT")
- end
- if description_node.nil?
- description_node = XPath.first(root_node, "blurb")
- end
- if description_node.nil?
- description_node = XPath.first(root_node, "info")
- end
- end
- if description_node.nil?
+ # Returns the feed item content
+ def content
+ if @content.nil?
+ repair_entities = false
+ content_node = try_xpaths(self.root_node, [
+ "content:encoded",
+ "content",
+ "fullitem",
+ "xhtml:body",
+ "body",
+ "encoded",
+ "description",
+ "tagline",
+ "subtitle",
+ "summary",
+ "abstract",
+ "blurb",
+ "info"
+ ])
+ if content_node.nil?
return nil
end
- description_type = XPath.first(description_node, "@type").to_s
- description_mode = XPath.first(description_node, "@mode").to_s
- description_encoding = XPath.first(description_node, "@encoding").to_s
+ content_type = try_xpaths(content_node, "@type",
+ :select_result_value => true)
+ content_mode = try_xpaths(content_node, "@mode",
+ :select_result_value => true)
+ content_encoding = try_xpaths(content_node, "@encoding",
+ :select_result_value => true)
# Note that we're checking for misuse of type, mode and encoding here
- if description_encoding != ""
- @description =
+ if !content_encoding.blank?
+ @content =
"[Embedded data objects are not currently supported.]"
- elsif description_node.cdatas.size > 0
- @description = description_node.cdatas.first.value
- elsif description_type == "base64" || description_mode == "base64" ||
- description_encoding == "base64"
- @description = Base64.decode64(description_node.inner_xml.strip)
- elsif description_type == "xhtml" || description_mode == "xhtml" ||
- description_type == "xml" || description_mode == "xml" ||
- description_type == "application/xhtml+xml"
- @description = description_node.inner_xml
- elsif description_type == "escaped" || description_mode == "escaped"
- @description = FeedTools.unescape_entities(
- description_node.inner_xml)
+ elsif content_node.cdatas.size > 0
+ @content = content_node.cdatas.first.value
+ elsif content_type == "base64" || content_mode == "base64" ||
+ content_encoding == "base64"
+ @content = Base64.decode64(content_node.inner_xml.strip)
+ elsif content_type == "xhtml" || content_mode == "xhtml" ||
+ content_type == "xml" || content_mode == "xml" ||
+ content_type == "application/xhtml+xml"
+ @content = content_node.inner_xml
+ elsif content_type == "escaped" || content_mode == "escaped"
+ @content = FeedTools.unescape_entities(
+ content_node.inner_xml)
else
- @description = description_node.inner_xml
+ @content = content_node.inner_xml
repair_entities = true
end
- if @description == ""
- @description = self.itunes_summary
- @description = "" if @description.nil?
+ if @content.blank?
+ @content = self.itunes_summary
end
- if @description == ""
- @description = self.itunes_subtitle
- @description = "" if @description.nil?
+ if @content.blank?
+ @content = self.itunes_subtitle
end
- unless @description.nil?
- @description = FeedTools.sanitize_html(@description, :strip)
- @description = FeedTools.unescape_entities(@description) if repair_entities
- @description = FeedTools.tidy_html(@description)
+ unless @content.blank?
+ @content = FeedTools.sanitize_html(@content, :strip)
+ @content = FeedTools.unescape_entities(@content) if repair_entities
+ @content = FeedTools.tidy_html(@content)
end
- @description = @description.strip unless @description.nil?
- @description = nil if @description == ""
+ @content = @content.strip unless @content.nil?
+ @content = nil if @content.blank?
end
- return @description
+ return @content
end
- # Sets the feed item description
- def description=(new_description)
- @description = new_description
+ # Sets the feed item content
+ def content=(new_content)
+ @content = new_content
end
# Returns the contents of the itunes:summary element
def itunes_summary
if @itunes_summary.nil?
- @itunes_summary = FeedTools.unescape_entities(XPath.first(root_node,
- "itunes:summary/text()").to_s)
- if @itunes_summary == ""
+ @itunes_summary = try_xpaths(self.root_node, [
+ "itunes:summary/text()"
+ ])
+ unless @itunes_summary.blank?
+ @itunes_summary = FeedTools.unescape_entities(@itunes_summary)
+ @itunes_summary = FeedTools.sanitize_html(@itunes_summary)
+ else
@itunes_summary = nil
end
- unless @itunes_summary.nil?
- @itunes_summary = FeedTools.sanitize_html(@itunes_summary)
- end
end
return @itunes_summary
end
# Sets the contents of the itunes:summary element
@@ -469,18 +434,19 @@
end
# Returns the contents of the itunes:subtitle element
def itunes_subtitle
if @itunes_subtitle.nil?
- @itunes_subtitle = FeedTools.unescape_entities(XPath.first(root_node,
- "itunes:subtitle/text()").to_s)
- if @itunes_subtitle == ""
+ @itunes_subtitle = try_xpaths(self.root_node, [
+ "itunes:subtitle/text()"
+ ])
+ unless @itunes_subtitle.blank?
+ @itunes_subtitle = FeedTools.unescape_entities(@itunes_subtitle)
+ @itunes_subtitle = FeedTools.sanitize_html(@itunes_subtitle)
+ else
@itunes_subtitle = nil
end
- unless @itunes_subtitle.nil?
- @itunes_subtitle = FeedTools.sanitize_html(@itunes_subtitle)
- end
end
return @itunes_subtitle
end
# Sets the contents of the itunes:subtitle element
@@ -509,43 +475,35 @@
end
# Returns the feed item link
def link
if @link.nil?
- unless root_node.nil?
- @link = XPath.first(root_node, "link[@rel='alternate']/@href").to_s
- if @link == ""
- @link = XPath.first(root_node, "link/@href").to_s
- end
- if @link == ""
- @link = XPath.first(root_node, "link/text()").to_s
- end
- if @link == ""
- @link = XPath.first(root_node, "@rdf:about").to_s
- end
- if @link == ""
- @link = XPath.first(root_node, "guid[@isPermaLink='true']/text()").to_s
- end
- if @link == ""
- @link = XPath.first(root_node, "@href").to_s
- end
- if @link == ""
- @link = XPath.first(root_node, "a/@href").to_s
- end
- if @link == ""
- @link = XPath.first(root_node, "@HREF").to_s
- end
- if @link == ""
- @link = XPath.first(root_node, "A/@HREF").to_s
- end
- end
- if @link == "" || @link.nil?
+ @link = try_xpaths(self.root_node, [
+ "atom10:link[@type='application/xhtml+xml']/@href",
+ "atom10:link[@type='text/html']/@href",
+ "atom10:link[@rel='alternate']/@href",
+ "atom03:link[@type='application/xhtml+xml']/@href",
+ "atom03:link[@type='text/html']/@href",
+ "atom03:link[@rel='alternate']/@href",
+ "atom:link[@type='application/xhtml+xml']/@href",
+ "atom:link[@type='text/html']/@href",
+ "atom:link[@rel='alternate']/@href",
+ "link[@type='application/xhtml+xml']/@href",
+ "link[@type='text/html']/@href",
+ "link[@rel='alternate']/@href",
+ "link/text()",
+ "@rdf:about",
+ "guid[@isPermaLink='true']/text()",
+ "@href",
+ "a/@href"
+ ], :select_result_value => true)
+ if @link.blank?
if FeedTools.is_uri? self.guid
@link = self.guid
end
end
- if @link != ""
+ if !@link.blank?
@link = FeedTools.unescape_entities(@link)
end
# TODO: Actually implement proper relative url resolving instead of this crap
# ===========================================================================
#
@@ -554,11 +512,54 @@
# @link = @link[1..-1]
# end
# # prepend the base to the link since they seem to have used a relative path
# @link = feed.base + @link
# end
- @link = FeedTools.normalize_url(@link)
+ if @link.blank?
+ link_node = try_xpaths(self.root_node, [
+ "atom10:link",
+ "atom03:link",
+ "atom:link",
+ "link"
+ ])
+ if link_node != nil
+ if link_node.attributes['type'].to_s =~ /^image/ ||
+ link_node.attributes['type'].to_s =~ /^application/ ||
+ link_node.attributes['type'].to_s =~ /xml/ ||
+ link_node.attributes['rel'].to_s =~ /self/
+ for child in self.root_node
+ if child.class == REXML::Element
+ if child.name.downcase == "link"
+ if child.attributes['type'].to_s =~ /^image/ ||
+ child.attributes['type'].to_s =~ /^application/ ||
+ child.attributes['type'].to_s =~ /xml/ ||
+ child.attributes['rel'].to_s =~ /self/
+ @link = nil
+ next
+ else
+ @link = child.attributes['href'].to_s
+ if @link.blank?
+ @link = child.inner_xml
+ end
+ if @link.blank?
+ next
+ end
+ break
+ end
+ end
+ end
+ end
+ else
+ @link = link_node.attributes['href'].to_s
+ end
+ end
+ end
+ @link = self.comments if @link.blank?
+ @link = nil if @link.blank?
+ if FeedTools.configurations[:url_normalization_enabled]
+ @link = FeedTools.normalize_url(@link)
+ end
end
return @link
end
# Sets the feed item link
@@ -568,127 +569,126 @@
# Returns a list of the feed item's categories
def categories
if @categories.nil?
@categories = []
- category_nodes = XPath.match(root_node, "category")
- if category_nodes.nil? || category_nodes.empty?
- category_nodes = XPath.match(root_node, "dc:subject")
+ category_nodes = try_xpaths_all(self.root_node, [
+ "category",
+ "dc:subject"
+ ])
+ for category_node in category_nodes
+ category = FeedTools::Feed::Category.new
+ category.term = try_xpaths(category_node, ["@term", "text()"],
+ :select_result_value => true)
+ category.term.strip! unless category.term.nil?
+ category.label = try_xpaths(category_node, ["@label"],
+ :select_result_value => true)
+ category.label.strip! unless category.label.nil?
+ category.scheme = try_xpaths(category_node, [
+ "@scheme",
+ "@domain"
+ ], :select_result_value => true)
+ category.scheme.strip! unless category.scheme.nil?
+ @categories << category
end
- unless category_nodes.nil?
- for category_node in category_nodes
- category = FeedTools::Feed::Category.new
- category.term = XPath.first(category_node, "@term").to_s
- if category.term == ""
- category.term = XPath.first(category_node, "text()").to_s
- end
- category.term.strip! unless category.term.nil?
- category.term = nil if category.term == ""
- category.label = XPath.first(category_node, "@label").to_s
- category.label.strip! unless category.label.nil?
- category.label = nil if category.label == ""
- category.scheme = XPath.first(category_node, "@scheme").to_s
- if category.scheme == ""
- category.scheme = XPath.first(category_node, "@domain").to_s
- end
- category.scheme.strip! unless category.scheme.nil?
- category.scheme = nil if category.scheme == ""
- @categories << category
- end
- end
end
return @categories
end
# Returns a list of the feed items's images
def images
if @images.nil?
@images = []
- image_nodes = XPath.match(root_node, "link")
- if image_nodes.nil? || image_nodes.empty?
- image_nodes = XPath.match(root_node, "logo")
- end
- if image_nodes.nil? || image_nodes.empty?
- image_nodes = XPath.match(root_node, "LOGO")
- end
- if image_nodes.nil? || image_nodes.empty?
- image_nodes = XPath.match(root_node, "image")
- end
- unless image_nodes.nil?
+ image_nodes = try_xpaths_all(self.root_node, [
+ "image",
+ "logo",
+ "atom10:link",
+ "atom03:link",
+ "atom:link",
+ "link"
+ ])
+ unless image_nodes.blank?
for image_node in image_nodes
image = FeedTools::Feed::Image.new
- image.url = XPath.first(image_node, "url/text()").to_s
- if image.url == ""
- image.url = XPath.first(image_node, "@rdf:resource").to_s
+ image.url = try_xpaths(image_node, [
+ "url/text()",
+ "@rdf:resource"
+ ], :select_result_value => true)
+ if image.url.blank? && (image_node.name == "logo" ||
+ (image_node.attributes['type'].to_s =~ /^image/) == 0)
+ image.url = try_xpaths(image_node, [
+ "@atom10:href",
+ "@atom03:href",
+ "@atom:href",
+ "@href"
+ ], :select_result_value => true)
+ if image.url == self.link && image.url != nil
+ image.url = nil
+ end
end
- if image.url == "" && (image_node.name == "logo" ||
- (image_node.attributes['type'] =~ /^image/) == 0)
- image.url = XPath.first(image_node, "@href").to_s
+ if image.url.blank? && image_node.name == "LOGO"
+ image.url = try_xpaths(image_node, [
+ "@href"
+ ], :select_result_value => true)
end
- if image.url == "" && image_node.name == "LOGO"
- image.url = XPath.first(image_node, "@HREF").to_s
- end
image.url.strip! unless image.url.nil?
- image.url = nil if image.url == ""
- image.title = XPath.first(image_node, "title/text()").to_s
+ image.title = try_xpaths(image_node,
+ ["title/text()"], :select_result_value => true)
image.title.strip! unless image.title.nil?
- image.title = nil if image.title == ""
- image.description =
- XPath.first(image_node, "description/text()").to_s
+ image.description = try_xpaths(image_node,
+ ["description/text()"], :select_result_value => true)
image.description.strip! unless image.description.nil?
- image.description = nil if image.description == ""
- image.link = XPath.first(image_node, "link/text()").to_s
+ image.link = try_xpaths(image_node,
+ ["link/text()"], :select_result_value => true)
image.link.strip! unless image.link.nil?
- image.link = nil if image.link == ""
- image.height = XPath.first(image_node, "height/text()").to_s.to_i
+ image.height = try_xpaths(image_node,
+ ["height/text()"], :select_result_value => true).to_i
image.height = nil if image.height <= 0
- image.width = XPath.first(image_node, "width/text()").to_s.to_i
+ image.width = try_xpaths(image_node,
+ ["width/text()"], :select_result_value => true).to_i
image.width = nil if image.width <= 0
- image.style = XPath.first(image_node, "@style").to_s.downcase
- if image.style == ""
- image.style = XPath.first(image_node, "@STYLE").to_s.downcase
- end
+ image.style = try_xpaths(image_node, [
+ "style/text()",
+ "@style"
+ ], :select_result_value => true)
image.style.strip! unless image.style.nil?
- image.style = nil if image.style == ""
- @images << image
+ image.style.downcase! unless image.style.nil?
+ @images << image unless image.url.nil?
end
end
end
return @images
end
# Returns the feed item itunes image link
- #
- # If it's not present, falls back to the normal image link.
- # Technically, the itunes spec says that the image needs to be
- # square and larger than 300x300, but hey, if there's an image
- # to be had, it's better than none at all.
def itunes_image_link
if @itunes_image_link.nil?
- # get the feed item itunes image link from the xml document
- @itunes_image_link = XPath.first(root_node, "itunes:image/@href").to_s
- if @itunes_image_link == ""
- @itunes_image_link = XPath.first(root_node, "itunes:link[@rel='image']/@href").to_s
+ @itunes_image_link = try_xpaths(self.root_node, [
+ "itunes:image/@href",
+ "itunes:link[@rel='image']/@href"
+ ], :select_result_value => true)
+ if FeedTools.configurations[:url_normalization_enabled]
+ @itunes_image_link = FeedTools.normalize_url(@itunes_image_link)
end
- @itunes_image_link = FeedTools.normalize_url(@itunes_image_link)
end
return @itunes_image_link
end
# Sets the feed item itunes image link
def itunes_image_link=(new_itunes_image_link)
@itunes_image_link = new_itunes_image_link
end
# Returns the feed item media thumbnail link
- #
- # If it's not present, falls back to the normal image link.
def media_thumbnail_link
if @media_thumbnail_link.nil?
- # get the feed item itunes image link from the xml document
- @media_thumbnail_link = XPath.first(root_node, "media:thumbnail/@url").to_s
- @media_thumbnail_link = FeedTools.normalize_url(@media_thumbnail_link)
+ @media_thumbnail_link = try_xpaths(self.root_node, [
+ "media:thumbnail/@url"
+ ], :select_result_value => true)
+ if FeedTools.configurations[:url_normalization_enabled]
+ @media_thumbnail_link = FeedTools.normalize_url(@media_thumbnail_link)
+ end
end
return @media_thumbnail_link
end
# Sets the feed item media thumbnail url
@@ -697,48 +697,32 @@
end
# Returns the feed item's copyright information
def copyright
if @copyright.nil?
- unless root_node.nil?
- repair_entities = false
-
- copyright_node = XPath.first(root_node, "dc:rights")
- if copyright_node.nil?
- copyright_node = XPath.first(root_node, "dc:rights",
- FEED_TOOLS_NAMESPACES)
- end
- if copyright_node.nil?
- copyright_node = XPath.first(root_node, "rights",
- FEED_TOOLS_NAMESPACES)
- end
- if copyright_node.nil?
- copyright_node = XPath.first(root_node, "copyright",
- FEED_TOOLS_NAMESPACES)
- end
- if copyright_node.nil?
- copyright_node = XPath.first(root_node, "atom03:copyright",
- FEED_TOOLS_NAMESPACES)
- end
- if copyright_node.nil?
- copyright_node = XPath.first(root_node, "atom10:copyright",
- FEED_TOOLS_NAMESPACES)
- end
- if copyright_node.nil?
- copyright_node = XPath.first(root_node, "copyrights",
- FEED_TOOLS_NAMESPACES)
- end
- end
+ repair_entities = false
+ copyright_node = try_xpaths(self.root_node, [
+ "atom10:copyright",
+ "atom03:copyright",
+ "atom:copyright",
+ "copyright",
+ "copyrights",
+ "dc:rights",
+ "rights"
+ ])
if copyright_node.nil?
return nil
end
- copyright_type = XPath.first(copyright_node, "@type").to_s
- copyright_mode = XPath.first(copyright_node, "@mode").to_s
- copyright_encoding = XPath.first(copyright_node, "@encoding").to_s
+ copyright_type = try_xpaths(copyright_node, "@type",
+ :select_result_value => true)
+ copyright_mode = try_xpaths(copyright_node, "@mode",
+ :select_result_value => true)
+ copyright_encoding = try_xpaths(copyright_node, "@encoding",
+ :select_result_value => true)
# Note that we're checking for misuse of type, mode and encoding here
- if copyright_encoding != ""
+ if !copyright_encoding.blank?
@copyright =
"[Embedded data objects are not currently supported.]"
elsif copyright_node.cdatas.size > 0
@copyright = copyright_node.cdatas.first.value
elsif copyright_type == "base64" || copyright_mode == "base64" ||
@@ -761,11 +745,11 @@
@copyright = FeedTools.unescape_entities(@copyright) if repair_entities
@copyright = FeedTools.tidy_html(@copyright)
end
@copyright = @copyright.strip unless @copyright.nil?
- @copyright = nil if @copyright == ""
+ @copyright = nil if @copyright.blank?
end
return @copyright
end
# Sets the feed item's copyright information
@@ -777,31 +761,43 @@
def enclosures
if @enclosures.nil?
@enclosures = []
# First, load up all the different possible sources of enclosures
- rss_enclosures = XPath.match(root_node, "enclosure")
- atom_enclosures = XPath.match(root_node, "link[@rel='enclosure']")
- media_content_enclosures = XPath.match(root_node, "media:content")
- media_group_enclosures = XPath.match(root_node, "media:group")
-
- # Parse RSS-type enclosures. Thanks to a few buggy enclosures implementations,
- # sometimes these also manage to show up in atom files.
+ rss_enclosures =
+ try_xpaths_all(self.root_node, ["enclosure"])
+ atom_enclosures =
+ try_xpaths_all(self.root_node, [
+ "atom10:link[@rel='enclosure']",
+ "atom03:link[@rel='enclosure']",
+ "atom:link[@rel='enclosure']",
+ "link[@rel='enclosure']"
+ ])
+ media_content_enclosures =
+ try_xpaths_all(self.root_node, ["media:content"])
+ media_group_enclosures =
+ try_xpaths_all(self.root_node, ["media:group"])
+
+ # Parse RSS-type enclosures. Thanks to a few buggy enclosures
+ # implementations, sometimes these also manage to show up in atom
+ # files.
for enclosure_node in rss_enclosures
enclosure = Enclosure.new
- enclosure.url = FeedTools.unescape_entities(enclosure_node.attributes["url"].to_s)
+ enclosure.url = FeedTools.unescape_entities(
+ enclosure_node.attributes["url"].to_s)
enclosure.type = enclosure_node.attributes["type"].to_s
enclosure.file_size = enclosure_node.attributes["length"].to_i
enclosure.credits = []
enclosure.explicit = false
@enclosures << enclosure
end
- # Parse atom-type enclosures. If there are repeats of the same enclosure object,
- # we merge the two together.
+ # Parse atom-type enclosures. If there are repeats of the same
+ # enclosure object, we merge the two together.
for enclosure_node in atom_enclosures
- enclosure_url = FeedTools.unescape_entities(enclosure_node.attributes["href"].to_s)
+ enclosure_url = FeedTools.unescape_entities(
+ enclosure_node.attributes["href"].to_s)
enclosure = nil
new_enclosure = false
for existing_enclosure in @enclosures
if existing_enclosure.url == enclosure_url
enclosure = existing_enclosure
@@ -820,17 +816,19 @@
if new_enclosure
@enclosures << enclosure
end
end
- # Creates an anonymous method to parse content objects from the media module. We
- # do this to avoid excessive duplication of code since we have to do identical
- # processing for content objects within group objects.
+ # Creates an anonymous method to parse content objects from the media
+ # module. We do this to avoid excessive duplication of code since we
+ # have to do identical processing for content objects within group
+ # objects.
parse_media_content = lambda do |media_content_nodes|
affected_enclosures = []
for enclosure_node in media_content_nodes
- enclosure_url = FeedTools.unescape_entities(enclosure_node.attributes["url"].to_s)
+ enclosure_url = FeedTools.unescape_entities(
+ enclosure_node.attributes["url"].to_s)
enclosure = nil
new_enclosure = false
for existing_enclosure in @enclosures
if existing_enclosure.url == enclosure_url
enclosure = existing_enclosure
@@ -847,77 +845,88 @@
enclosure.duration = enclosure_node.attributes["duration"].to_s
enclosure.height = enclosure_node.attributes["height"].to_i
enclosure.width = enclosure_node.attributes["width"].to_i
enclosure.bitrate = enclosure_node.attributes["bitrate"].to_i
enclosure.framerate = enclosure_node.attributes["framerate"].to_i
- enclosure.expression = enclosure_node.attributes["expression"].to_s
+ enclosure.expression =
+ enclosure_node.attributes["expression"].to_s
enclosure.is_default =
(enclosure_node.attributes["isDefault"].to_s.downcase == "true")
- if XPath.first(enclosure_node, "media:thumbnail/@url").to_s != ""
+ enclosure_thumbnail_url = try_xpaths(enclosure_node,
+ ["media:thumbnail/@url"], :select_result_value => true)
+ if !enclosure_thumbnail_url.blank?
enclosure.thumbnail = EnclosureThumbnail.new(
- FeedTools.unescape_entities(XPath.first(enclosure_node, "media:thumbnail/@url").to_s),
- FeedTools.unescape_entities(XPath.first(enclosure_node, "media:thumbnail/@height").to_s),
- FeedTools.unescape_entities(XPath.first(enclosure_node, "media:thumbnail/@width").to_s)
+ FeedTools.unescape_entities(enclosure_thumbnail_url),
+ FeedTools.unescape_entities(
+ try_xpaths(enclosure_node, ["media:thumbnail/@height"],
+ :select_result_value => true)),
+ FeedTools.unescape_entities(
+ try_xpaths(enclosure_node, ["media:thumbnail/@width"],
+ :select_result_value => true))
)
- if enclosure.thumbnail.height == ""
- enclosure.thumbnail.height = nil
- end
- if enclosure.thumbnail.width == ""
- enclosure.thumbnail.width = nil
- end
end
enclosure.categories = []
- for category in XPath.match(enclosure_node, "media:category")
+ for category in try_xpaths_all(enclosure_node, ["media:category"])
enclosure.categories << FeedTools::Feed::Category.new
enclosure.categories.last.term =
- FeedTools.unescape_entities(category.text)
+ FeedTools.unescape_entities(category.inner_xml)
enclosure.categories.last.scheme =
- FeedTools.unescape_entities(category.attributes["scheme"].to_s)
+ FeedTools.unescape_entities(
+ category.attributes["scheme"].to_s)
enclosure.categories.last.label =
- FeedTools.unescape_entities(category.attributes["label"].to_s)
- if enclosure.categories.last.scheme == ""
+ FeedTools.unescape_entities(
+ category.attributes["label"].to_s)
+ if enclosure.categories.last.scheme.blank?
enclosure.categories.last.scheme = nil
end
- if enclosure.categories.last.label == ""
+ if enclosure.categories.last.label.blank?
enclosure.categories.last.label = nil
end
end
- if XPath.first(enclosure_node, "media:hash/text()").to_s != ""
+ enclosure_media_hash = try_xpaths(enclosure_node,
+ ["media:hash/text()"], :select_result_value => true)
+ if !enclosure_media_hash.nil?
enclosure.hash = EnclosureHash.new(
- FeedTools.sanitize_html(FeedTools.unescape_entities(XPath.first(
- enclosure_node, "media:hash/text()").to_s), :strip),
+ FeedTools.sanitize_html(FeedTools.unescape_entities(
+ enclosure_media_hash), :strip),
"md5"
)
end
- if XPath.first(enclosure_node, "media:player/@url").to_s != ""
+ enclosure_media_player_url = try_xpaths(enclosure_node,
+ ["media:player/@url"], :select_result_value => true)
+ if !enclosure_media_player_url.blank?
enclosure.player = EnclosurePlayer.new(
- FeedTools.unescape_entities(XPath.first(enclosure_node, "media:player/@url").to_s),
- FeedTools.unescape_entities(XPath.first(enclosure_node, "media:player/@height").to_s),
- FeedTools.unescape_entities(XPath.first(enclosure_node, "media:player/@width").to_s)
+ FeedTools.unescape_entities(enclosure_media_player_url),
+ FeedTools.unescape_entities(
+ try_xpaths(enclosure_node,
+ ["media:player/@height"], :select_result_value => true)),
+ FeedTools.unescape_entities(
+ try_xpaths(enclosure_node,
+ ["media:player/@width"], :select_result_value => true))
)
- if enclosure.player.height == ""
- enclosure.player.height = nil
- end
- if enclosure.player.width == ""
- enclosure.player.width = nil
- end
end
enclosure.credits = []
- for credit in XPath.match(enclosure_node, "media:credit")
+ for credit in try_xpaths_all(enclosure_node, ["media:credit"])
enclosure.credits << EnclosureCredit.new(
- FeedTools.unescape_entities(credit.text),
- FeedTools.unescape_entities(credit.attributes["role"].to_s.downcase)
+ FeedTools.unescape_entities(credit.inner_xml.to_s.strip),
+ FeedTools.unescape_entities(
+ credit.attributes["role"].to_s.downcase)
)
- if enclosure.credits.last.role == ""
+ if enclosure.credits.last.name.blank?
+ enclosure.credits.last.name = nil
+ end
+ if enclosure.credits.last.role.blank?
enclosure.credits.last.role = nil
end
end
- enclosure.explicit = (XPath.first(enclosure_node,
- "media:adult/text()").to_s.downcase == "true")
- if XPath.first(enclosure_node, "media:text/text()").to_s != ""
- enclosure.text = FeedTools.unescape_entities(XPath.first(enclosure_node,
- "media:text/text()").to_s)
+ enclosure.explicit = (try_xpaths(enclosure_node,
+ ["media:adult/text()"]).to_s.downcase == "true")
+ enclosure_media_text =
+ try_xpaths(enclosure_node, ["media:text/text()"])
+ if !enclosure_media_text.blank?
+ enclosure.text = FeedTools.unescape_entities(
+ enclosure_media_text)
end
affected_enclosures << enclosure
if new_enclosure
@enclosures << enclosure
end
@@ -931,95 +940,100 @@
media_groups = []
# Parse the group objects.
for media_group in media_group_enclosures
group_media_content_enclosures =
- XPath.match(media_group, "media:content")
+ try_xpaths_all(media_group, ["media:content"])
# Parse the content objects within the group objects.
affected_enclosures =
parse_media_content.call(group_media_content_enclosures)
# Now make sure that content objects inherit certain properties from
# the group objects.
for enclosure in affected_enclosures
- if enclosure.thumbnail.nil? &&
- XPath.first(media_group, "media:thumbnail/@url").to_s != ""
+ media_group_thumbnail = try_xpaths(media_group,
+ ["media:thumbnail/@url"], :select_result_value => true)
+ if enclosure.thumbnail.nil? && !media_group_thumbnail.blank?
enclosure.thumbnail = EnclosureThumbnail.new(
FeedTools.unescape_entities(
- XPath.first(media_group, "media:thumbnail/@url").to_s),
+ media_group_thumbnail),
FeedTools.unescape_entities(
- XPath.first(media_group, "media:thumbnail/@height").to_s),
+ try_xpaths(media_group, ["media:thumbnail/@height"],
+ :select_result_value => true)),
FeedTools.unescape_entities(
- XPath.first(media_group, "media:thumbnail/@width").to_s)
+ try_xpaths(media_group, ["media:thumbnail/@width"],
+ :select_result_value => true))
)
- if enclosure.thumbnail.height == ""
- enclosure.thumbnail.height = nil
- end
- if enclosure.thumbnail.width == ""
- enclosure.thumbnail.width = nil
- end
end
- if (enclosure.categories.nil? || enclosure.categories.size == 0)
+ if (enclosure.categories.blank?)
enclosure.categories = []
- for category in XPath.match(media_group, "media:category")
+ for category in try_xpaths_all(media_group, ["media:category"])
enclosure.categories << FeedTools::Feed::Category.new
enclosure.categories.last.term =
- FeedTools.unescape_entities(category.text)
+ FeedTools.unescape_entities(category.inner_xml)
enclosure.categories.last.scheme =
- FeedTools.unescape_entities(category.attributes["scheme"].to_s)
+ FeedTools.unescape_entities(
+ category.attributes["scheme"].to_s)
enclosure.categories.last.label =
- FeedTools.unescape_entities(category.attributes["label"].to_s)
- if enclosure.categories.last.scheme == ""
+ FeedTools.unescape_entities(
+ category.attributes["label"].to_s)
+ if enclosure.categories.last.scheme.blank?
enclosure.categories.last.scheme = nil
end
- if enclosure.categories.last.label == ""
+ if enclosure.categories.last.label.blank?
enclosure.categories.last.label = nil
end
end
end
- if enclosure.hash.nil? &&
- XPath.first(media_group, "media:hash/text()").to_s != ""
+ enclosure_media_group_hash = try_xpaths(enclosure_node,
+ ["media:hash/text()"], :select_result_value => true)
+ if enclosure.hash.nil? && !enclosure_media_group_hash.blank?
enclosure.hash = EnclosureHash.new(
- FeedTools.unescape_entities(XPath.first(media_group, "media:hash/text()").to_s),
+ FeedTools.sanitize_html(FeedTools.unescape_entities(
+ enclosure_media_group_hash), :strip),
"md5"
)
end
- if enclosure.player.nil? &&
- XPath.first(media_group, "media:player/@url").to_s != ""
+ enclosure_media_group_url = try_xpaths(media_group,
+ "media:player/@url", :select_result_value => true)
+ if enclosure.player.nil? && !enclosure_media_group_url.blank?
enclosure.player = EnclosurePlayer.new(
- FeedTools.unescape_entities(XPath.first(media_group, "media:player/@url").to_s),
- FeedTools.unescape_entities(XPath.first(media_group, "media:player/@height").to_s),
- FeedTools.unescape_entities(XPath.first(media_group, "media:player/@width").to_s)
+ FeedTools.unescape_entities(enclosure_media_group_url),
+ FeedTools.unescape_entities(
+ try_xpaths(media_group, ["media:player/@height"],
+ :select_result_value => true)),
+ FeedTools.unescape_entities(
+ try_xpaths(media_group, ["media:player/@width"],
+ :select_result_value => true))
)
- if enclosure.player.height == ""
- enclosure.player.height = nil
- end
- if enclosure.player.width == ""
- enclosure.player.width = nil
- end
end
if enclosure.credits.nil? || enclosure.credits.size == 0
enclosure.credits = []
- for credit in XPath.match(media_group, "media:credit")
+ for credit in try_xpaths_all(media_group, ["media:credit"])
enclosure.credits << EnclosureCredit.new(
- FeedTools.unescape_entities(credit.text),
- FeedTools.unescape_entities(credit.attributes["role"].to_s.downcase)
+ FeedTools.unescape_entities(credit.inner_xml),
+ FeedTools.unescape_entities(
+ credit.attributes["role"].to_s.downcase)
)
- if enclosure.credits.last.role == ""
+ if enclosure.credits.last.role.blank?
enclosure.credits.last.role = nil
end
end
end
if enclosure.explicit?.nil?
- enclosure.explicit = (XPath.first(media_group,
- "media:adult/text()").to_s.downcase == "true") ? true : false
+ enclosure.explicit = ((try_xpaths(media_group, [
+ "media:adult/text()"
+ ], :select_result_value => true).downcase == "true") ?
+ true : false)
end
- if enclosure.text.nil? &&
- XPath.first(media_group, "media:text/text()").to_s != ""
- enclosure.text = FeedTools.sanitize_html(FeedTools.unescape_entities(
- XPath.first(media_group, "media:text/text()").to_s), :strip)
+ enclosure_media_group_text = try_xpaths(media_group,
+ ["media:text/text()"], :select_result_value => true)
+ if enclosure.text.nil? && !enclosure_media_group_text.blank?
+ enclosure.text = FeedTools.sanitize_html(
+ FeedTools.unescape_entities(
+ enclosure_media_group_text), :strip)
end
end
# Keep track of the media groups
media_groups << affected_enclosures
@@ -1031,14 +1045,18 @@
enclosure.explicit = true
end
end
# Add all the itunes categories
- for itunes_category in XPath.match(root_node, "itunes:category")
+ itunes_categories =
+ try_xpaths_all(self.root_node, ["itunes:category"])
+ for itunes_category in itunes_categories
genre = "Podcasts"
category = itunes_category.attributes["text"].to_s
- subcategory = XPath.first(itunes_category, "itunes:category/@text").to_s
+ subcategory =
+ try_xpaths(itunes_category, ["itunes:category/@text"],
+ :select_result_value => true)
category_path = genre
if category != ""
category_path << "/" + category
end
if subcategory != ""
@@ -1059,11 +1077,11 @@
end
for enclosure in @enclosures
# Clean up any of those attributes that incorrectly have ""
# or 0 as their values
- if enclosure.type == ""
+ if enclosure.type.blank?
enclosure.type = nil
end
if enclosure.file_size == 0
enclosure.file_size = nil
end
@@ -1080,16 +1098,17 @@
enclosure.bitrate = nil
end
if enclosure.framerate == 0
enclosure.framerate = nil
end
- if enclosure.expression == "" || enclosure.expression.nil?
+ if enclosure.expression.blank?
enclosure.expression = "full"
end
- # If an enclosure is missing the text field, fall back on the itunes:summary field
- if enclosure.text.nil? || enclosure.text = ""
+ # If an enclosure is missing the text field, fall back on the
+ # itunes:summary field
+ if enclosure.text.blank?
enclosure.text = self.itunes_summary
end
# Make sure we don't have duplicate categories
unless enclosure.categories.nil?
@@ -1126,12 +1145,12 @@
end
@enclosures << default_enclosure
end
end
- # If we have a single enclosure, it's safe to inherit the itunes:duration field
- # if it's missing.
+ # If we have a single enclosure, it's safe to inherit the
+ # itunes:duration field if it's missing.
if @enclosures.size == 1
if @enclosures.first.duration.nil? || @enclosures.first.duration == 0
@enclosures.first.duration = self.itunes_duration
end
end
@@ -1145,52 +1164,33 @@
# Returns the feed item author
def author
if @author.nil?
@author = FeedTools::Feed::Author.new
- unless root_node.nil?
- author_node = XPath.first(root_node, "atom10:author",
- FEED_TOOLS_NAMESPACES)
- if author_node.nil?
- author_node = XPath.first(root_node, "atom03:author",
- FEED_TOOLS_NAMESPACES)
- end
- if author_node.nil?
- author_node = XPath.first(root_node, "atom:author")
- end
- if author_node.nil?
- author_node = XPath.first(root_node, "author")
- end
- if author_node.nil?
- author_node = XPath.first(root_node, "managingEditor")
- end
- if author_node.nil?
- author_node = XPath.first(root_node, "dc:author",
- FEED_TOOLS_NAMESPACES)
- end
- if author_node.nil?
- author_node = XPath.first(root_node, "dc:author")
- end
- if author_node.nil?
- author_node = XPath.first(root_node, "dc:creator",
- FEED_TOOLS_NAMESPACES)
- end
- if author_node.nil?
- author_node = XPath.first(root_node, "dc:creator")
- end
- end
+ author_node = try_xpaths(self.root_node, [
+ "atom10:author",
+ "atom03:author",
+ "atom:author",
+ "author",
+ "managingEditor",
+ "dc:author",
+ "dc:creator",
+ "creator"
+ ])
unless author_node.nil?
@author.raw = FeedTools.unescape_entities(
- XPath.first(author_node, "text()").to_s)
- @author.raw = nil if @author.raw == ""
+ XPath.first(author_node, "text()").to_s).strip
+ @author.raw = nil if @author.raw.blank?
unless @author.raw.nil?
raw_scan = @author.raw.scan(
/(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
if raw_scan.nil? || raw_scan.size == 0
raw_scan = @author.raw.scan(
/(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\s*\((.*)\)/i)
- author_raw_pair = raw_scan.first.reverse unless raw_scan.size == 0
+ unless raw_scan.size == 0
+ author_raw_pair = raw_scan.first.reverse
+ end
else
author_raw_pair = raw_scan.first
end
if raw_scan.nil? || raw_scan.size == 0
email_scan = @author.raw.scan(
@@ -1203,46 +1203,48 @@
@author.name = author_raw_pair.first.strip
@author.email = author_raw_pair.last.strip
else
unless @author.raw.include?("@")
# We can be reasonably sure we are looking at something
- # that the creator didn't intend to contain an email address if
- # it got through the preceeding regexes and it doesn't
+ # that the creator didn't intend to contain an email address
+ # if it got through the preceeding regexes and it doesn't
# contain the tell-tale '@' symbol.
@author.name = @author.raw
end
end
end
- @author.name = "" if @author.name.nil?
- if @author.name == ""
+ if @author.name.blank?
@author.name = FeedTools.unescape_entities(
- XPath.first(author_node, "name/text()").to_s)
+ try_xpaths(author_node, [
+ "name/text()",
+ "@name"
+ ], :select_result_value => true)
+ )
end
- if @author.name == ""
- @author.name = FeedTools.unescape_entities(
- XPath.first(author_node, "@name").to_s)
- end
- if @author.email == ""
+ if @author.email.blank?
@author.email = FeedTools.unescape_entities(
- XPath.first(author_node, "email/text()").to_s)
+ try_xpaths(author_node, [
+ "email/text()",
+ "@email"
+ ], :select_result_value => true)
+ )
end
- if @author.email == ""
- @author.email = FeedTools.unescape_entities(
- XPath.first(author_node, "@email").to_s)
- end
- if @author.url == ""
+ if @author.url.blank?
@author.url = FeedTools.unescape_entities(
- XPath.first(author_node, "url/text()").to_s)
+ try_xpaths(author_node, [
+ "url/text()",
+ "uri/text()",
+ "@url",
+ "@uri",
+ "@href"
+ ], :select_result_value => true)
+ )
end
- if @author.url == ""
- @author.url = FeedTools.unescape_entities(
- XPath.first(author_node, "@url").to_s)
- end
- @author.name = nil if @author.name == ""
- @author.raw = nil if @author.raw == ""
- @author.email = nil if @author.email == ""
- @author.url = nil if @author.url == ""
+ @author.name = nil if @author.name.blank?
+ @author.raw = nil if @author.raw.blank?
+ @author.email = nil if @author.email.blank?
+ @author.url = nil if @author.url.blank?
end
# Fallback on the itunes module if we didn't find an author name
begin
@author.name = self.itunes_author if @author.name.nil?
rescue
@@ -1274,16 +1276,15 @@
if @publisher.nil?
@publisher = FeedTools::Feed::Author.new
# Set the author name
@publisher.raw = FeedTools.unescape_entities(
- XPath.first(root_node, "dc:publisher/text()").to_s)
- if @publisher.raw == ""
- @publisher.raw = FeedTools.unescape_entities(
- XPath.first(root_node, "webMaster/text()").to_s)
- end
- unless @publisher.raw == ""
+ try_xpaths(self.root_node, [
+ "dc:publisher/text()",
+ "webMaster/text()"
+ ], :select_result_value => true))
+ unless @publisher.raw.blank?
raw_scan = @publisher.raw.scan(
/(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
if raw_scan.nil? || raw_scan.size == 0
raw_scan = @publisher.raw.scan(
/(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\s*\((.*)\)/i)
@@ -1312,14 +1313,14 @@
@publisher.name = @publisher.raw
end
end
end
- @publisher.name = nil if @publisher.name == ""
- @publisher.raw = nil if @publisher.raw == ""
- @publisher.email = nil if @publisher.email == ""
- @publisher.url = nil if @publisher.url == ""
+ @publisher.name = nil if @publisher.name.blank?
+ @publisher.raw = nil if @publisher.raw.blank?
+ @publisher.email = nil if @publisher.email.blank?
+ @publisher.url = nil if @publisher.url.blank?
end
return @publisher
end
# Sets the feed publisher
@@ -1343,14 +1344,14 @@
#
# This inherits from any incorrectly placed channel-level itunes:author
# elements. They're actually amazingly common. People don't read specs.
def itunes_author
if @itunes_author.nil?
- @itunes_author = FeedTools.unescape_entities(XPath.first(root_node,
- "itunes:author/text()").to_s)
- @itunes_author = feed.itunes_author if @itunes_author == ""
- @itunes_author = nil if @itunes_author == ""
+ @itunes_author = FeedTools.unescape_entities(
+ try_xpaths(self.root_node,
+ ["itunes:author/text()"], :select_result_value => true))
+ @itunes_author = feed.itunes_author if @itunes_author.blank?
end
return @itunes_author
end
# Sets the contents of the itunes:author element
@@ -1359,18 +1360,19 @@
end
# Returns the number of seconds that the associated media runs for
def itunes_duration
if @itunes_duration.nil?
- raw_duration = FeedTools.unescape_entities(XPath.first(root_node,
- "itunes:duration/text()").to_s)
- if raw_duration != ""
+ raw_duration = FeedTools.unescape_entities(
+ try_xpaths(self.root_node,
+ ["itunes:duration/text()"], :select_result_value => true))
+ if !raw_duration.blank?
hms = raw_duration.split(":").map { |x| x.to_i }
if hms.size == 3
- @itunes_duration = hms[0].hour + hms[1].minute + hms[2]
+ @itunes_duration = hms[0].hours + hms[1].minutes + hms[2]
elsif hms.size == 2
- @itunes_duration = hms[0].minute + hms[1]
+ @itunes_duration = hms[0].minutes + hms[1]
elsif hms.size == 1
@itunes_duration = hms[0]
end
end
end
@@ -1386,44 +1388,56 @@
def time(options = {})
validate_options([ :estimate_timestamp ],
options.keys)
options = { :estimate_timestamp => true }.merge(options)
if @time.nil?
- unless root_node.nil?
- time_string = XPath.first(root_node, "pubDate/text()").to_s
- if time_string == ""
- time_string = XPath.first(root_node, "dc:date/text()").to_s
- end
- if time_string == ""
- time_string = XPath.first(root_node, "issued/text()").to_s
- end
- if time_string == ""
- time_string = XPath.first(root_node, "updated/text()").to_s
- end
- if time_string == ""
- time_string = XPath.first(root_node, "time/text()").to_s
- end
- end
+ time_string = try_xpaths(self.root_node, [
+ "atom10:updated/text()",
+ "atom03:updated/text()",
+ "atom:updated/text()",
+ "updated/text()",
+ "atom10:modified/text()",
+ "atom03:modified/text()",
+ "atom:modified/text()",
+ "modified/text()",
+ "time/text()",
+ "atom10:issued/text()",
+ "atom03:issued/text()",
+ "atom:issued/text()",
+ "issued/text()",
+ "atom10:published/text()",
+ "atom03:published/text()",
+ "atom:published/text()",
+ "published/text()",
+ "pubDate/text()",
+ "dc:date/text()",
+ "date/text()"
+ ], :select_result_value => true)
begin
- time_string = "" if time_string.nil?
- if time_string != ""
+ if !time_string.blank?
@time = Time.parse(time_string).gmtime
+ elsif FeedTools.configurations[:timestamp_estimation_enabled] &&
+ !self.title.nil? &&
+ (Time.parse(self.title) - Time.now).abs > 100
+ @time = Time.parse(self.title).gmtime
end
rescue
end
- if options[:estimate_timestamp]
- if @time.nil?
- begin
- @time = succ_time
+ if FeedTools.configurations[:timestamp_estimation_enabled]
+ if options[:estimate_timestamp]
+ if @time.nil?
+ begin
+ @time = succ_time
+ if @time.nil?
+ @time = prev_time
+ end
+ rescue
+ end
if @time.nil?
- @time = prev_time
+ @time = Time.now.gmtime
end
- rescue
end
- if @time.nil?
- @time = Time.now.gmtime
- end
end
end
end
return @time
end
@@ -1438,14 +1452,14 @@
begin
parent_feed = self.feed
if parent_feed.nil?
return nil
end
- if parent_feed.instance_variable_get("@items").nil?
+ if parent_feed.instance_variable_get("@entries").nil?
parent_feed.items
end
- unsorted_items = parent_feed.instance_variable_get("@items")
+ unsorted_items = parent_feed.instance_variable_get("@entries")
item_index = unsorted_items.index(self)
if item_index.nil?
return nil
end
if item_index <= 0
@@ -1455,23 +1469,23 @@
return (previous_item.time(:estimate_timestamp => false) + 1)
rescue
return nil
end
end
- #private :succ_time
+ private :succ_time
# Returns 1 second before the succeeding item's time.
def prev_time #:nodoc:
begin
parent_feed = self.feed
if parent_feed.nil?
return nil
end
- if parent_feed.instance_variable_get("@items").nil?
+ if parent_feed.instance_variable_get("@entries").nil?
parent_feed.items
end
- unsorted_items = parent_feed.instance_variable_get("@items")
+ unsorted_items = parent_feed.instance_variable_get("@entries")
item_index = unsorted_items.index(self)
if item_index.nil?
return nil
end
if item_index >= (unsorted_items.size - 1)
@@ -1481,22 +1495,26 @@
return (succeeding_item.time(:estimate_timestamp => false) - 1)
rescue
return nil
end
end
- #private :prev_time
-
+ private :prev_time
+
# Returns the feed item updated time
def updated
if @updated.nil?
- unless root_node.nil?
- updated_string = XPath.first(root_node, "updated/text()").to_s
- if updated_string == ""
- updated_string = XPath.first(root_node, "modified/text()").to_s
- end
- end
- if updated_string != nil && updated_string != ""
+ updated_string = try_xpaths(self.root_node, [
+ "atom10:updated/text()",
+ "atom03:updated/text()",
+ "atom:updated/text()",
+ "updated/text()",
+ "atom10:modified/text()",
+ "atom03:modified/text()",
+ "atom:modified/text()",
+ "modified/text()"
+ ], :select_result_value => true)
+ if !updated_string.blank?
@updated = Time.parse(updated_string).gmtime rescue nil
else
@updated = nil
end
end
@@ -1506,45 +1524,48 @@
# Sets the feed item updated time
def updated=(new_updated)
@updated = new_updated
end
- # Returns the feed item issued time
- def issued
- if @issued.nil?
- unless root_node.nil?
- issued_string = XPath.first(root_node, "issued/text()").to_s
- if issued_string == ""
- issued_string = XPath.first(root_node, "published/text()").to_s
- end
- if issued_string == ""
- issued_string = XPath.first(root_node, "pubDate/text()").to_s
- end
- if issued_string == ""
- issued_string = XPath.first(root_node, "dc:date/text()").to_s
- end
- end
- if issued_string != nil && issued_string != ""
- @issued = Time.parse(issued_string).gmtime rescue nil
+ # Returns the feed item published time
+ def published
+ if @published.nil?
+ published_string = try_xpaths(self.root_node, [
+ "atom10:issued/text()",
+ "atom03:issued/text()",
+ "atom:issued/text()",
+ "issued/text()",
+ "atom10:published/text()",
+ "atom03:published/text()",
+ "atom:published/text()",
+ "published/text()",
+ "pubDate/text()",
+ "dc:date/text()",
+ "date/text()"
+ ], :select_result_value => true)
+ if !published_string.blank?
+ @issued = Time.parse(published_string).gmtime rescue nil
else
@issued = nil
end
end
return @issued
end
- # Sets the feed item issued time
- def issued=(new_issued)
- @issued = new_issued
+ # Sets the feed item published time
+ def published=(new_published)
+ @published = new_published
end
# Returns the url for posting comments
def comments
if @comments.nil?
- @comments = FeedTools.normalize_url(
- XPath.first(root_node, "comments/text()").to_s)
- @comments = nil if @comments == ""
+ @comments = try_xpaths(self.root_node, ["comments/text()"],
+ :select_result_value => true)
+ if FeedTools.configurations[:url_normalization_enabled]
+ @comments = FeedTools.normalize_url(@comments)
+ end
end
return @comments
end
# Sets the url for posting comments
@@ -1554,14 +1575,14 @@
# The source that this post was based on
def source
if @source.nil?
@source = FeedTools::Feed::Link.new
- @source.url = XPath.first(root_node, "source/@url").to_s
- @source.url = nil if @source.url == ""
- @source.value = XPath.first(root_node, "source/text()").to_s
- @source.value = nil if @source.value == ""
+ @source.url = try_xpaths(self.root_node, ["source/@url"],
+ :select_result_value => true)
+ @source.value = try_xpaths(self.root_node, ["source/text()"],
+ :select_result_value => true)
end
return @source
end
# Returns the feed item tags
@@ -1573,55 +1594,70 @@
if root_node.nil?
return @tags
end
if @tags.nil? || @tags.size == 0
@tags = []
- tag_list = XPath.match(root_node, "dc:subject/rdf:Bag/rdf:li/text()")
- if tag_list.nil? || tag_list.size == 0
- tag_list = XPath.match(root_node,
- "dc:subject/rdf:Bag/rdf:li/text()", FEED_TOOLS_NAMESPACES)
- end
- if tag_list != nil && tag_list.size > 1
+ tag_list = try_xpaths_all(self.root_node,
+ ["dc:subject/rdf:Bag/rdf:li/text()"],
+ :select_result_value => true)
+ if tag_list != nil && tag_list.size > 0
for tag in tag_list
- @tags << tag.to_s.downcase.strip
+ @tags << tag.downcase.strip
end
end
end
if @tags.nil? || @tags.size == 0
# messy effort to find ourselves some tags, mainly for del.icio.us
@tags = []
- rdf_bag = XPath.match(root_node, "taxo:topics/rdf:Bag/rdf:li")
+ rdf_bag = try_xpaths_all(self.root_node,
+ ["taxo:topics/rdf:Bag/rdf:li"])
if rdf_bag != nil && rdf_bag.size > 0
for tag_node in rdf_bag
begin
- tag_url = XPath.first(root_node, "@resource").to_s
- tag_match = tag_url.scan(/\/(tag|tags)\/(\w+)/)
+ tag_url = try_xpaths(tag_node, ["@resource"],
+ :select_result_value => true)
+ tag_match = tag_url.scan(/\/(tag|tags)\/(\w+)$/)
if tag_match.size > 0
@tags << tag_match.first.last.downcase.strip
end
rescue
end
end
end
end
if @tags.nil? || @tags.size == 0
@tags = []
- tag_list = XPath.match(root_node, "category/text()")
+ tag_list = try_xpaths_all(self.root_node, ["category/text()"],
+ :select_result_value => true)
for tag in tag_list
@tags << tag.to_s.downcase.strip
end
end
if @tags.nil? || @tags.size == 0
@tags = []
- tag_list = XPath.match(root_node, "dc:subject/text()")
+ tag_list = try_xpaths_all(self.root_node, ["dc:subject/text()"],
+ :select_result_value => true)
for tag in tag_list
@tags << tag.to_s.downcase.strip
end
end
- if @tags.nil? || @tags.size == 0
+ if @tags.blank?
begin
- @tags = XPath.first(root_node, "itunes:keywords/text()").to_s.downcase.split(" ")
+ itunes_keywords_string = try_xpaths(self.root_node, [
+ "itunes:keywords/text()"
+ ], :select_result_value => true)
+ unless itunes_keywords_string.blank?
+ @tags = itunes_keywords_string.downcase.split(",")
+ if @tags.size == 1
+ @tags = itunes_keywords_string.downcase.split(" ")
+ @tags = @tags.map { |tag| tag.chomp(",") }
+ end
+ if @tags.size == 1
+ @tags = itunes_keywords_string.downcase.split(",")
+ end
+ @tags = @tags.map { |tag| tag.strip }
+ end
rescue
@tags = []
end
end
if @tags.nil?
@@ -1640,16 +1676,15 @@
# Returns true if this feed item contains explicit material. If the whole
# feed has been marked as explicit, this will return true even if the item
# isn't explicitly marked as explicit.
def explicit?
if @explicit.nil?
- if XPath.first(root_node,
- "media:adult/text()").to_s.downcase == "true" ||
- XPath.first(root_node,
- "itunes:explicit/text()").to_s.downcase == "yes" ||
- XPath.first(root_node,
- "itunes:explicit/text()").to_s.downcase == "true" ||
+ explicit_string = try_xpaths(self.root_node, [
+ "media:adult/text()",
+ "itunes:explicit/text()"
+ ], :select_result_value => true)
+ if explicit_string == "true" || explicit_string == "yes" ||
feed.explicit?
@explicit = true
else
@explicit = false
end
@@ -1667,23 +1702,25 @@
def build_xml_hook(feed_type, version, xml_builder)
return nil
end
# Generates xml based on the content of the feed item
- def build_xml(feed_type=(self.feed.feed_type or "rss"), version=nil,
- xml_builder=Builder::XmlMarkup.new(:indent => 2))
+ def build_xml(feed_type=(self.feed.feed_type or "atom"), version=nil,
+ xml_builder=Builder::XmlMarkup.new(
+ :indent => 2, :escape_attrs => false))
if feed_type == "rss" && (version == nil || version == 0.0)
version = 1.0
elsif feed_type == "atom" && (version == nil || version == 0.0)
version = 1.0
end
if feed_type == "rss" && (version == 0.9 || version == 1.0 || version == 1.1)
# RDF-based rss format
if link.nil?
raise "Cannot generate an rdf-based feed item with a nil link field."
end
- return xml_builder.item("rdf:about" => CGI.escapeHTML(link)) do
+ return xml_builder.item("rdf:about" =>
+ FeedTools.escape_entities(link)) do
unless title.nil? || title == ""
xml_builder.title(title)
else
xml_builder.title
end
@@ -1706,84 +1743,95 @@
for tag in tags
xml_builder.tag!("rdf:li", tag)
end
end
end
- xml_builder.tag!("itunes:keywords", tags.join(" "))
+ if self.feed.podcast?
+ xml_builder.tag!("itunes:keywords", tags.join(", "))
+ end
end
build_xml_hook(feed_type, version, xml_builder)
end
elsif feed_type == "rss"
# normal rss format
return xml_builder.item do
- unless title.nil? || title == ""
- xml_builder.title(title)
+ unless self.title.blank?
+ xml_builder.title(self.title)
end
- unless link.nil? || link == ""
+ unless self.link.blank?
xml_builder.link(link)
end
- unless description.nil? || description == ""
- xml_builder.description(description)
+ unless self.description.blank?
+ xml_builder.description(self.description)
end
- unless time.nil?
- xml_builder.pubDate(time.rfc822)
+ unless self.time.nil?
+ xml_builder.pubDate(self.time.rfc822)
end
+ unless self.guid.blank?
+ if FeedTools.is_uri?(self.guid)
+ xml_builder.guid(self.guid, "isPermaLink" => "true")
+ else
+ xml_builder.guid(self.guid, "isPermaLink" => "false")
+ end
+ else
+ unless self.link.blank?
+ xml_builder.guid(self.link, "isPermaLink" => "true")
+ end
+ end
unless tags.nil? || tags.size == 0
xml_builder.tag!("taxo:topics") do
xml_builder.tag!("rdf:Bag") do
for tag in tags
xml_builder.tag!("rdf:li", tag)
end
end
end
- xml_builder.tag!("itunes:keywords", tags.join(" "))
- end
- build_xml_hook(feed_type, version, xml_builder)
- end
- elsif feed_type == "atom" && version == 0.3
- # normal atom format
- return xml_builder.entry("xmlns" =>
- FEED_TOOLS_NAMESPACES['atom03']) do
- unless title.nil? || title == ""
- xml_builder.title(title,
- "mode" => "escaped",
- "type" => "text/html")
- end
- xml_builder.author do
- unless self.author.nil? || self.author.name.nil?
- xml_builder.name(self.author.name)
- else
- xml_builder.name("n/a")
+ if self.feed.podcast?
+ xml_builder.tag!("itunes:keywords", tags.join(", "))
end
- unless self.author.nil? || self.author.email.nil?
- xml_builder.email(self.author.email)
- end
- unless self.author.nil? || self.author.url.nil?
- xml_builder.url(self.author.url)
- end
end
- unless link.nil? || link == ""
- xml_builder.link("href" => link,
- "rel" => "alternate",
- "type" => "text/html",
- "title" => title)
- end
- unless description.nil? || description == ""
- xml_builder.content(description,
- "mode" => "escaped",
- "type" => "text/html")
- end
- unless time.nil?
- xml_builder.issued(time.iso8601)
- end
- unless tags.nil? || tags.size == 0
- for tag in tags
- xml_builder.category(tag)
+ unless self.enclosures.blank? || self.enclosures.size == 0
+ for enclosure in self.enclosures
+ attribute_hash = {}
+ next if enclosure.url.blank?
+ begin
+ if enclosure.file_size.blank? || enclosure.file_size.to_i == 0
+ # We can't use this enclosure because it's missing the
+ # required file size. Check alternate versions for
+ # file_size.
+ if !enclosure.versions.blank? && enclosure.versions.size > 0
+ for alternate in enclosure.versions
+ if alternate.file_size != nil &&
+ alternate.file_size.to_i > 0
+ enclosure = alternate
+ break
+ end
+ end
+ end
+ end
+ rescue
+ end
+ attribute_hash["url"] = FeedTools.normalize_url(enclosure.url)
+ if enclosure.type != nil
+ attribute_hash["type"] = enclosure.type
+ end
+ if enclosure.file_size != nil && enclosure.file_size.to_i > 0
+ attribute_hash["length"] = enclosure.file_size.to_s
+ else
+ # We couldn't find an alternate and the problem is still
+ # there. Give up and go on.
+ xml_builder.comment!(
+ "*** Enclosure failed to include file size. Ignoring. ***")
+ next
+ end
+ xml_builder.enclosure(attribute_hash)
end
end
build_xml_hook(feed_type, version, xml_builder)
end
+ elsif feed_type == "atom" && version == 0.3
+ raise "Atom 0.3 is obsolete."
elsif feed_type == "atom" && version == 1.0
# normal atom format
return xml_builder.entry("xmlns" =>
FEED_TOOLS_NAMESPACES['atom10']) do
unless title.nil? || title == ""
@@ -1798,24 +1846,24 @@
end
unless self.author.nil? || self.author.email.nil?
xml_builder.email(self.author.email)
end
unless self.author.nil? || self.author.url.nil?
- xml_builder.url(self.author.url)
+ xml_builder.uri(self.author.url)
end
end
unless link.nil? || link == ""
- xml_builder.link("href" => link,
+ xml_builder.link("href" => FeedTools.escape_entities(self.link),
"rel" => "alternate",
"type" => "text/html",
- "title" => title)
+ "title" => FeedTools.escape_entities(title))
end
- unless description.nil? || description == ""
+ if !description.blank?
xml_builder.content(description,
"type" => "html")
- else
- xml_builder.content(FeedTools.no_content_string,
+ elsif !FeedTools.configurations[:no_content_string].blank?
+ xml_builder.content(FeedTools.configurations[:no_content_string],
"type" => "html")
end
if self.updated != nil
xml_builder.updated(self.updated.iso8601)
elsif self.time != nil
@@ -1849,28 +1897,39 @@
unless self.tags.nil? || self.tags.size == 0
for tag in self.tags
xml_builder.category("term" => tag)
end
end
+ unless self.enclosures.blank? || self.enclosures.size == 0
+ for enclosure in self.enclosures
+ attribute_hash = {}
+ next if enclosure.url.blank?
+ attribute_hash["rel"] = "enclosure"
+ attribute_hash["href"] = FeedTools.normalize_url(enclosure.url)
+ if enclosure.type != nil
+ attribute_hash["type"] = enclosure.type
+ end
+ if enclosure.file_size != nil && enclosure.file_size.to_i > 0
+ attribute_hash["length"] = enclosure.file_size.to_s
+ end
+ xml_builder.link(attribute_hash)
+ end
+ end
build_xml_hook(feed_type, version, xml_builder)
end
+ else
+ raise "Unsupported feed format/version."
end
end
- alias_method :tagline, :description
- alias_method :tagline=, :description=
- alias_method :subtitle, :description
- alias_method :subtitle=, :description=
- alias_method :summary, :description
- alias_method :summary=, :description=
- alias_method :abstract, :description
- alias_method :abstract=, :description=
- alias_method :content, :description
- alias_method :content=, :description=
+ alias_method :summary, :content
+ alias_method :summary=, :content=
+ alias_method :abstract, :content
+ alias_method :abstract=, :content=
+ alias_method :description, :content
+ alias_method :description=, :content=
alias_method :guid, :id
alias_method :guid=, :id=
- alias_method :published, :issued
- alias_method :published=, :issued=
# Returns a simple representation of the feed item object's state.
def inspect
return "#<FeedTools::FeedItem:0x#{self.object_id.to_s(16)} " +
"LINK:#{self.link}>"