lib/feed_tools/feed_item.rb in feedtools-0.2.26 vs lib/feed_tools/feed_item.rb in feedtools-0.2.27
- old
+ new
@@ -35,11 +35,25 @@
@xml_document = nil
@root_node = nil
@title = nil
@id = nil
@time = Time.now.gmtime
+ @version = FeedTools::FEED_TOOLS_VERSION::STRING
end
+
+ # Breaks any references that the feed entry may be keeping around, thus
+ # making the job of the garbage collector much, much easier. Call this
+ # method prior to feed entries going out of scope to prevent memory leaks.
+ def dispose()
+ @feed_data = nil
+ @feed_data_type = nil
+ @xml_document = nil
+ @root_node = nil
+ @title = nil
+ @id = nil
+ @time = nil
+ end
# Returns the parent feed of this feed item
# Warning, this method may be slow if you have a
# large number of FeedTools::Feed objects. Can't
# use a direct reference to the parent because it plays
@@ -67,10 +81,62 @@
end
end
return parent_feed
end
+ # Does a full parse of the feed item.
+ def full_parse
+ self.configurations
+
+ self.encoding
+ self.xml_document
+ self.root_node
+
+ self.feed_type
+ self.feed_version
+
+ self.id
+ self.title
+ self.content
+ self.summary
+ self.links
+ self.link
+ self.comments
+ self.time
+ self.updated
+ self.published
+ self.source
+ self.categories
+ self.tags
+ self.images
+ self.rights
+ self.author
+ self.publisher
+
+ self.itunes_summary
+ self.itunes_subtitle
+ self.itunes_image_link
+ self.itunes_author
+ self.itunes_duration
+
+ self.media_text
+ self.media_thumbnail_link
+
+ self.explicit?
+ end
+
+ # Returns a duplicate object suitable for serialization
+ def serializable
+ self.full_parse()
+ feed_item_to_dump = self.dup
+ feed_item_to_dump.author
+ feed_item_to_dump.publisher
+ feed_item_to_dump.instance_variable_set("@xml_document", nil)
+ feed_item_to_dump.instance_variable_set("@root_node", nil)
+ return feed_item_to_dump
+ end
+
# Returns the load options for this feed.
def configurations
if @configurations.blank?
parent_feed = self.feed
if parent_feed != nil
@@ -88,11 +154,16 @@
end
# Returns the feed item's encoding.
def encoding
if @encoding.nil?
- @encoding = self.feed.encoding
+ parent_feed = self.feed
+ if parent_feed != nil
+ @encoding = parent_feed.encoding
+ else
+ @encoding = nil
+ end
end
return @encoding
end
# Returns the feed item's raw data.
@@ -112,39 +183,44 @@
end
# Sets the feed item's data type.
def feed_data_type=(new_feed_data_type)
@feed_data_type = new_feed_data_type
+ if self.feed_data_type != :xml
+ @xml_document = nil
+ end
end
# Returns a REXML Document of the feed_data
def xml_document
- if self.feed_data_type != :xml
- @xml_document = nil
- else
- if @xml_document.nil?
+ if @xml_document.nil?
+ return nil if self.feed_data.blank?
+ if self.feed_data_type != :xml
+ @xml_document = nil
+ else
# TODO: :ignore_whitespace_nodes => :all
# Add that?
# ======================================
@xml_document = REXML::Document.new(self.feed_data)
end
end
return @xml_document
end
- # Returns the first node within the root_node that matches the xpath query.
+ # Returns the first node within the root_node that matches the xpath
+ # query.
def find_node(xpath, select_result_value=false)
- if feed.feed_data_type != :xml
+ if self.feed_data_type != :xml
raise "The feed data type is not xml."
end
return FeedTools::XmlHelper.try_xpaths(self.root_node, [xpath],
:select_result_value => select_result_value)
end
# Returns all nodes within the root_node that match the xpath query.
def find_all_nodes(xpath, select_result_value=false)
- if feed.feed_data_type != :xml
+ if self.feed_data_type != :xml
raise "The feed data type is not xml."
end
return FeedTools::XmlHelper.try_xpaths_all(self.root_node, [xpath],
:select_result_value => select_result_value)
end
@@ -189,10 +265,14 @@
# Returns the feed items's unique id
def id
if @id.nil?
@id = FeedTools::XmlHelper.try_xpaths(self.root_node, [
+ "atom10:id/@gr:original-id",
+ "atom03:id/@gr:original-id",
+ "atom:id/@gr:original-id",
+ "id/@gr:original-id",
"atom10:id/text()",
"atom03:id/text()",
"atom:id/text()",
"id/text()",
"guid/text()"
@@ -213,14 +293,15 @@
title_node = FeedTools::XmlHelper.try_xpaths(self.root_node, [
"atom10:title",
"atom03:title",
"atom:title",
"title",
- "dc:title"
+ "dc:title",
+ "headline"
])
@title = FeedTools::HtmlHelper.process_text_construct(title_node,
- self.feed_type, self.feed_version)
+ self.feed_type, self.feed_version, [self.base_uri])
if self.feed_type == "atom" ||
self.configurations[:always_strip_wrapper_elements]
@title = FeedTools::HtmlHelper.strip_wrapper_element(@title)
end
if !@title.blank? && self.configurations[:strip_comment_count]
@@ -249,10 +330,11 @@
repair_entities = false
content_node = FeedTools::XmlHelper.try_xpaths(self.root_node, [
"atom10:content",
"atom03:content",
"atom:content",
+ "body/datacontent",
"xhtml:body",
"body",
"xhtml:div",
"div",
"p:payload",
@@ -271,22 +353,22 @@
"abstract",
"blurb",
"info"
])
@content = FeedTools::HtmlHelper.process_text_construct(content_node,
- self.feed_type, self.feed_version)
+ self.feed_type, self.feed_version, [self.base_uri])
if self.feed_type == "atom" ||
self.configurations[:always_strip_wrapper_elements]
@content = FeedTools::HtmlHelper.strip_wrapper_element(@content)
end
- if @content.blank?
+ if @content.nil?
@content = self.media_text
end
- if @content.blank?
+ if @content.nil?
@content = self.itunes_summary
end
- if @content.blank?
+ if @content.nil?
@content = self.itunes_subtitle
end
end
return @content
end
@@ -321,14 +403,15 @@
"encoded",
"atom10:content",
"atom03:content",
"atom:content",
"content",
- "info"
+ "info",
+ "body/datacontent"
])
@summary = FeedTools::HtmlHelper.process_text_construct(summary_node,
- self.feed_type, self.feed_version)
+ self.feed_type, self.feed_version, [self.base_uri])
if self.feed_type == "atom" ||
self.configurations[:always_strip_wrapper_elements]
@summary = FeedTools::HtmlHelper.strip_wrapper_element(@summary)
end
if @summary.blank?
@@ -347,147 +430,10 @@
# Sets the feed item summary
def summary=(new_summary)
@summary = new_summary
end
- # Returns the contents of the itunes:summary element
- def itunes_summary
- if @itunes_summary.nil?
- @itunes_summary = FeedTools::XmlHelper.try_xpaths(self.root_node, [
- "itunes:summary/text()"
- ], :select_result_value => true)
- unless @itunes_summary.blank?
- @itunes_summary = FeedTools::HtmlHelper.unescape_entities(@itunes_summary)
- @itunes_summary = FeedTools::HtmlHelper.sanitize_html(@itunes_summary)
- @itunes_summary.strip!
- else
- @itunes_summary = nil
- end
- end
- return @itunes_summary
- end
-
- # Sets the contents of the itunes:summary element
- def itunes_summary=(new_itunes_summary)
- @itunes_summary = new_itunes_summary
- end
-
- # Returns the contents of the itunes:subtitle element
- def itunes_subtitle
- if @itunes_subtitle.nil?
- @itunes_subtitle = FeedTools::XmlHelper.try_xpaths(self.root_node, [
- "itunes:subtitle/text()"
- ], :select_result_value => true)
- unless @itunes_subtitle.blank?
- @itunes_subtitle = FeedTools::HtmlHelper.unescape_entities(@itunes_subtitle)
- @itunes_subtitle = FeedTools::HtmlHelper.sanitize_html(@itunes_subtitle)
- @itunes_subtitle.strip!
- else
- @itunes_subtitle = nil
- end
- end
- return @itunes_subtitle
- end
-
- # Sets the contents of the itunes:subtitle element
- def itunes_subtitle=(new_itunes_subtitle)
- @itunes_subtitle = new_itunes_subtitle
- end
-
- # Returns the contents of the media:text element
- def media_text
- if @media_text.nil?
- @media_text = FeedTools::XmlHelper.try_xpaths(self.root_node, [
- "media:text/text()"
- ], :select_result_value => true)
- unless @media_text.blank?
- @media_text = FeedTools::HtmlHelper.unescape_entities(@media_text)
- @media_text = FeedTools::HtmlHelper.sanitize_html(@media_text)
- @media_text.strip!
- else
- @media_text = nil
- end
- end
- return @media_text
- end
-
- # Sets the contents of the media:text element
- def media_text=(new_media_text)
- @media_text = new_media_text
- end
-
- # Returns the feed item link
- def link
- if @link.nil?
- max_score = 0
- for link_object in self.links.reverse
- score = 0
- if FeedTools::HtmlHelper.html_type?(link_object.type)
- score = score + 2
- elsif link_object.type != nil
- score = score - 1
- end
- if FeedTools::HtmlHelper.xml_type?(link_object.type)
- score = score + 1
- end
- if link_object.rel == "alternate"
- score = score + 1
- end
- if link_object.rel == "self"
- score = score - 1
- end
- if score >= max_score
- max_score = score
- @link = link_object.href
- end
- end
- if @link.blank?
- @link = FeedTools::XmlHelper.try_xpaths(self.root_node, [
- "@href",
- "@rdf:about",
- "@about"
- ], :select_result_value => true)
- end
- if @link.blank?
- if FeedTools::UriHelper.is_uri?(self.id) &&
- (self.id =~ /^http/)
- @link = self.id
- end
- end
- if !@link.blank?
- @link = FeedTools::HtmlHelper.unescape_entities(@link)
- end
- @link = self.comments if @link.blank?
- @link = nil if @link.blank?
- begin
- if !(@link =~ /^file:/) &&
- !FeedTools::UriHelper.is_uri?(@link)
- stored_base_uri =
- FeedTools::GenericHelper.recursion_trap(:feed_link) do
- self.feed.base_uri if self.feed != nil
- end
- root_base_uri = nil
- unless self.root_node.nil?
- root_base_uri = self.root_node.base_uri
- end
- @link = FeedTools::UriHelper.resolve_relative_uri(
- @link, [root_base_uri,stored_base_uri])
- end
- rescue
- end
- if self.configurations[:url_normalization_enabled]
- @link = FeedTools::UriHelper.normalize_url(@link)
- end
- end
- return @link
- end
-
- # Sets the feed item link
- def link=(new_link)
- @link = new_link
- end
-
# Returns the links collection
def links
if @links.nil?
@links = []
link_nodes =
@@ -505,21 +451,22 @@
link_object.href = FeedTools::XmlHelper.try_xpaths(link_node, [
"@atom10:href",
"@atom03:href",
"@atom:href",
"@href",
+ "@url",
"text()"
], :select_result_value => true)
if link_object.href.nil? && link_node.base_uri != nil
link_object.href = ""
end
begin
if !(link_object.href =~ /^file:/) &&
!FeedTools::UriHelper.is_uri?(link_object.href)
stored_base_uri =
FeedTools::GenericHelper.recursion_trap(:feed_link) do
- self.feed.base_uri if self.feed != nil
+ self.base_uri if self.feed != nil
end
link_object.href = FeedTools::UriHelper.resolve_relative_uri(
link_object.href,
[link_node.base_uri, stored_base_uri])
end
@@ -586,39 +533,237 @@
link_object.length = nil
end
end
@links << link_object
end
+ if @links.empty? && self.enclosures.size > 0
+ # If there's seriously nothing to link to, but there's enclosures
+ # available, then add a link to the first one.
+ enclosure_link = self.enclosures[0]
+ link_object = FeedTools::Link.new
+ link_object.href = enclosure_link.url
+ link_object.type = enclosure_link.type
+ @links << link_object
+ end
end
return @links
end
# Sets the links collection
def links=(new_links)
@links = new_links
end
-
+
+ # Returns the feed item link
+ def link
+ if @link.nil?
+ max_score = 0
+ for link_object in self.links.reverse
+ score = 0
+ if FeedTools::HtmlHelper.html_type?(link_object.type)
+ score = score + 2
+ elsif link_object.type != nil
+ score = score - 1
+ end
+ if FeedTools::HtmlHelper.xml_type?(link_object.type)
+ score = score + 1
+ end
+ if link_object.type =~ /^video/ && self.links.size == 1
+ score = score + 1
+ elsif link_object.type =~ /^audio/ && self.links.size == 1
+ score = score + 1
+ end
+ if link_object.rel == "alternate"
+ score = score + 1
+ end
+ if link_object.rel == "self"
+ score = score - 1
+ end
+ if score >= max_score
+ max_score = score
+ @link = link_object.href
+ end
+ end
+ if @link.blank?
+ @link = FeedTools::XmlHelper.try_xpaths(self.root_node, [
+ "@href",
+ "@rdf:about",
+ "@about"
+ ], :select_result_value => true)
+ end
+ if @link.blank?
+ if FeedTools::UriHelper.is_uri?(self.id) &&
+ (self.id =~ /^http/)
+ @link = self.id
+ end
+ end
+ if !@link.blank?
+ @link = FeedTools::HtmlHelper.unescape_entities(@link)
+ end
+ @link = self.comments if @link.blank?
+ @link = nil if @link.blank?
+ begin
+ if !(@link =~ /^file:/) &&
+ !FeedTools::UriHelper.is_uri?(@link)
+ stored_base_uri =
+ FeedTools::GenericHelper.recursion_trap(:feed_link) do
+ self.base_uri if self.feed != nil
+ end
+ root_base_uri = nil
+ unless self.root_node.nil?
+ root_base_uri = self.root_node.base_uri
+ end
+ @link = FeedTools::UriHelper.resolve_relative_uri(
+ @link, [root_base_uri,stored_base_uri])
+ end
+ rescue
+ end
+ if self.configurations[:url_normalization_enabled]
+ @link = FeedTools::UriHelper.normalize_url(@link)
+ end
+ end
+ return @link
+ end
+
+ # Sets the feed item link
+ def link=(new_link)
+ @link = new_link
+ end
+
+ # Returns the parent feed's base_uri if any.
+ def base_uri
+ parent_feed = self.feed
+ if parent_feed != nil
+ return parent_feed.base_uri
+ else
+ return nil
+ end
+ end
+
+ # Returns the url for posting comments
+ def comments
+ if @comments.nil?
+ @comments = FeedTools::XmlHelper.try_xpaths(
+ self.root_node, ["comments/text()"],
+ :select_result_value => true)
+ begin
+ if !(@comments =~ /^file:/) &&
+ !FeedTools::UriHelper.is_uri?(@comments)
+ root_base_uri = nil
+ unless self.root_node.nil?
+ root_base_uri = self.root_node.base_uri
+ end
+ @comments = FeedTools::UriHelper.resolve_relative_uri(
+ @comments, [root_base_uri, self.base_uri])
+ end
+ rescue
+ end
+ if self.configurations[:url_normalization_enabled]
+ @comments = FeedTools::UriHelper.normalize_url(@comments)
+ end
+ end
+ return @comments
+ end
+
+ # Sets the url for posting comments
+ def comments=(new_comments)
+ @comments = new_comments
+ end
+
+ # Returns the contents of the itunes:summary element
+ def itunes_summary
+ if @itunes_summary.nil?
+ @itunes_summary = FeedTools::XmlHelper.try_xpaths(self.root_node, [
+ "itunes:summary/text()"
+ ], :select_result_value => true)
+ unless @itunes_summary.blank?
+ @itunes_summary =
+ FeedTools::HtmlHelper.unescape_entities(@itunes_summary)
+ @itunes_summary =
+ FeedTools::HtmlHelper.sanitize_html(@itunes_summary)
+ @itunes_summary.strip!
+ else
+ @itunes_summary = nil
+ end
+ end
+ return @itunes_summary
+ end
+
+ # Sets the contents of the itunes:summary element
+ def itunes_summary=(new_itunes_summary)
+ @itunes_summary = new_itunes_summary
+ end
+
+ # Returns the contents of the itunes:subtitle element
+ def itunes_subtitle
+ if @itunes_subtitle.nil?
+ @itunes_subtitle = FeedTools::XmlHelper.try_xpaths(self.root_node, [
+ "itunes:subtitle/text()"
+ ], :select_result_value => true)
+ unless @itunes_subtitle.blank?
+ @itunes_subtitle =
+ FeedTools::HtmlHelper.unescape_entities(@itunes_subtitle)
+ @itunes_subtitle =
+ FeedTools::HtmlHelper.sanitize_html(@itunes_subtitle)
+ @itunes_subtitle.strip!
+ else
+ @itunes_subtitle = nil
+ end
+ end
+ return @itunes_subtitle
+ end
+
+ # Sets the contents of the itunes:subtitle element
+ def itunes_subtitle=(new_itunes_subtitle)
+ @itunes_subtitle = new_itunes_subtitle
+ end
+
+ # Returns the contents of the media:text element
+ def media_text
+ if @media_text.nil?
+ @media_text = FeedTools::XmlHelper.try_xpaths(self.root_node, [
+ "media:text/text()"
+ ], :select_result_value => true)
+ unless @media_text.blank?
+ @media_text = FeedTools::HtmlHelper.unescape_entities(@media_text)
+ @media_text = FeedTools::HtmlHelper.sanitize_html(@media_text)
+ @media_text.strip!
+ else
+ @media_text = nil
+ end
+ end
+ return @media_text
+ end
+
+ # Sets the contents of the media:text element
+ def media_text=(new_media_text)
+ @media_text = new_media_text
+ end
+
# Returns a list of the feed item's categories
def categories
if @categories.nil?
@categories = []
category_nodes = FeedTools::XmlHelper.try_xpaths_all(self.root_node, [
"category",
"dc:subject"
])
for category_node in category_nodes
category = FeedTools::Category.new
- category.term = FeedTools::XmlHelper.try_xpaths(category_node, ["@term", "text()"],
+ category.term = FeedTools::XmlHelper.try_xpaths(
+ category_node, ["@term", "text()"],
:select_result_value => true)
category.term.strip! unless category.term.nil?
- category.label = FeedTools::XmlHelper.try_xpaths(category_node, ["@label"],
+ category.label = FeedTools::XmlHelper.try_xpaths(
+ category_node, ["@label"],
:select_result_value => true)
category.label.strip! unless category.label.nil?
- category.scheme = FeedTools::XmlHelper.try_xpaths(category_node, [
- "@scheme",
- "@domain"
- ], :select_result_value => true)
+ category.scheme = FeedTools::XmlHelper.try_xpaths(
+ category_node, [
+ "@scheme",
+ "@domain"
+ ], :select_result_value => true)
category.scheme.strip! unless category.scheme.nil?
@categories << category
end
end
return @categories
@@ -639,21 +784,22 @@
image = FeedTools::Image.new
image.href = FeedTools::XmlHelper.try_xpaths(image_node, [
"url/text()",
"@rdf:resource",
"@href",
+ "@url",
"text()"
], :select_result_value => true)
if image.href.nil? && image_node.base_uri != nil
image.href = ""
end
begin
if !(image.href =~ /^file:/) &&
!FeedTools::UriHelper.is_uri?(image.href)
stored_base_uri =
FeedTools::GenericHelper.recursion_trap(:feed_link) do
- self.feed.base_uri if self.feed != nil
+ self.base_uri if self.feed != nil
end
image.href = FeedTools::UriHelper.resolve_relative_uri(
image.href, [image_node.base_uri, stored_base_uri])
end
rescue
@@ -705,11 +851,12 @@
@itunes_image_link = FeedTools::XmlHelper.try_xpaths(self.root_node, [
"itunes:image/@href",
"itunes:link[@rel='image']/@href"
], :select_result_value => true)
if self.configurations[:url_normalization_enabled]
- @itunes_image_link = FeedTools::UriHelper.normalize_url(@itunes_image_link)
+ @itunes_image_link =
+ FeedTools::UriHelper.normalize_url(@itunes_image_link)
end
end
return @itunes_image_link
end
@@ -719,15 +866,17 @@
end
# Returns the feed item media thumbnail link
def media_thumbnail_link
if @media_thumbnail_link.nil?
- @media_thumbnail_link = FeedTools::XmlHelper.try_xpaths(self.root_node, [
- "media:thumbnail/@url"
- ], :select_result_value => true)
+ @media_thumbnail_link = FeedTools::XmlHelper.try_xpaths(
+ self.root_node, [
+ "media:thumbnail/@url"
+ ], :select_result_value => true)
if self.configurations[:url_normalization_enabled]
- @media_thumbnail_link = FeedTools::UriHelper.normalize_url(@media_thumbnail_link)
+ @media_thumbnail_link =
+ FeedTools::UriHelper.normalize_url(@media_thumbnail_link)
end
end
return @media_thumbnail_link
end
@@ -748,11 +897,11 @@
"copyrights",
"dc:rights",
"rights"
])
@rights = FeedTools::HtmlHelper.process_text_construct(rights_node,
- self.feed_type, self.feed_version)
+ self.feed_type, self.feed_version, [self.base_uri])
if self.feed_type == "atom" ||
self.configurations[:always_strip_wrapper_elements]
@rights = FeedTools::HtmlHelper.strip_wrapper_element(@rights)
end
end
@@ -762,16 +911,28 @@
# Sets the feed item's rights information
def rights=(new_rights)
@rights = new_rights
end
- def license #:nodoc:
- raise "Not implemented yet."
+ # Returns the first license link for the feed item.
+ def license
+ return self.licenses.first
end
+
+ # Returns all licenses linked from this feed item.
+ def licenses
+ if @licenses.nil?
+ @licenses = self.links.select do |link|
+ link.rel == "license"
+ end
+ end
+ return @licenses
+ end
- def license=(new_license) #:nodoc:
- raise "Not implemented yet."
+ # Sets the feed item's licenses.
+ def licenses=(new_licenses)
+ @licenses = new_licenses
end
# Returns all feed item enclosures
def enclosures
if @enclosures.nil?
@@ -791,10 +952,13 @@
FeedTools::XmlHelper.try_xpaths_all(self.root_node,
["media:content"])
media_group_enclosures =
FeedTools::XmlHelper.try_xpaths_all(self.root_node, ["media:group"])
+ bogus_enclosures =
+ FeedTools::XmlHelper.try_xpaths_all(self.root_node, ["video"])
+
# TODO: Implement this
bittorrent_enclosures =
FeedTools::XmlHelper.try_xpaths_all(self.root_node,
["bitTorrent:torrent"])
@@ -837,10 +1001,37 @@
enclosure.explicit = false
if new_enclosure
@enclosures << enclosure
end
end
+
+ # Parse atom-type enclosures. If there are repeats of the same
+ # enclosure object, we merge the two together.
+ for enclosure_node in bogus_enclosures
+ enclosure_url = FeedTools::HtmlHelper.unescape_entities(
+ enclosure_node.attributes["url"].to_s)
+ enclosure = nil
+ new_enclosure = false
+ for existing_enclosure in @enclosures
+ if existing_enclosure.url == enclosure_url
+ enclosure = existing_enclosure
+ break
+ end
+ end
+ if enclosure.nil?
+ new_enclosure = true
+ enclosure = FeedTools::Enclosure.new
+ end
+ enclosure.url = enclosure_url
+ if File.extname(enclosure_url) == ".wmv"
+ enclosure.type = "video/x-ms-wmv"
+ end
+ enclosure.explicit = false
+ if new_enclosure
+ @enclosures << enclosure
+ end
+ end
# Creates an anonymous method to parse content objects from the media
# module. We do this to avoid excessive duplication of code since we
# have to do identical processing for content objects within group
# objects.
@@ -871,25 +1062,30 @@
enclosure.framerate = enclosure_node.attributes["framerate"].to_i
enclosure.expression =
enclosure_node.attributes["expression"].to_s
enclosure.is_default =
(enclosure_node.attributes["isDefault"].to_s.downcase == "true")
- enclosure_thumbnail_url = FeedTools::XmlHelper.try_xpaths(enclosure_node,
- ["media:thumbnail/@url"], :select_result_value => true)
+ enclosure_thumbnail_url =
+ FeedTools::XmlHelper.try_xpaths(enclosure_node,
+ ["media:thumbnail/@url"], :select_result_value => true)
if !enclosure_thumbnail_url.blank?
enclosure.thumbnail = FeedTools::EnclosureThumbnail.new(
- FeedTools::HtmlHelper.unescape_entities(enclosure_thumbnail_url),
FeedTools::HtmlHelper.unescape_entities(
- FeedTools::XmlHelper.try_xpaths(enclosure_node, ["media:thumbnail/@height"],
+ enclosure_thumbnail_url),
+ FeedTools::HtmlHelper.unescape_entities(
+ FeedTools::XmlHelper.try_xpaths(enclosure_node,
+ ["media:thumbnail/@height"],
:select_result_value => true)),
FeedTools::HtmlHelper.unescape_entities(
- FeedTools::XmlHelper.try_xpaths(enclosure_node, ["media:thumbnail/@width"],
+ FeedTools::XmlHelper.try_xpaths(enclosure_node,
+ ["media:thumbnail/@width"],
:select_result_value => true))
)
end
enclosure.categories = []
- for category in FeedTools::XmlHelper.try_xpaths_all(enclosure_node, ["media:category"])
+ for category in FeedTools::XmlHelper.try_xpaths_all(
+ enclosure_node, ["media:category"])
enclosure.categories << FeedTools::Category.new
enclosure.categories.last.term =
FeedTools::HtmlHelper.unescape_entities(category.inner_xml)
enclosure.categories.last.scheme =
FeedTools::HtmlHelper.unescape_entities(
@@ -902,50 +1098,58 @@
end
if enclosure.categories.last.label.blank?
enclosure.categories.last.label = nil
end
end
- enclosure_media_hash = FeedTools::XmlHelper.try_xpaths(enclosure_node,
- ["media:hash/text()"], :select_result_value => true)
+ enclosure_media_hash =
+ FeedTools::XmlHelper.try_xpaths(enclosure_node,
+ ["media:hash/text()"], :select_result_value => true)
if !enclosure_media_hash.nil?
enclosure.hash = FeedTools::EnclosureHash.new(
- FeedTools::HtmlHelper.sanitize_html(FeedTools::HtmlHelper.unescape_entities(
- enclosure_media_hash), :strip),
+ FeedTools::HtmlHelper.sanitize_html(
+ FeedTools::HtmlHelper.unescape_entities(
+ enclosure_media_hash), :strip),
"md5"
)
end
- enclosure_media_player_url = FeedTools::XmlHelper.try_xpaths(enclosure_node,
- ["media:player/@url"], :select_result_value => true)
+ enclosure_media_player_url =
+ FeedTools::XmlHelper.try_xpaths(enclosure_node,
+ ["media:player/@url"], :select_result_value => true)
if !enclosure_media_player_url.blank?
enclosure.player = FeedTools::EnclosurePlayer.new(
- FeedTools::HtmlHelper.unescape_entities(enclosure_media_player_url),
FeedTools::HtmlHelper.unescape_entities(
+ enclosure_media_player_url),
+ FeedTools::HtmlHelper.unescape_entities(
FeedTools::XmlHelper.try_xpaths(enclosure_node,
["media:player/@height"], :select_result_value => true)),
FeedTools::HtmlHelper.unescape_entities(
FeedTools::XmlHelper.try_xpaths(enclosure_node,
["media:player/@width"], :select_result_value => true))
)
end
enclosure.credits = []
- for credit in FeedTools::XmlHelper.try_xpaths_all(enclosure_node, ["media:credit"])
+ for credit in FeedTools::XmlHelper.try_xpaths_all(
+ enclosure_node, ["media:credit"])
enclosure.credits << FeedTools::EnclosureCredit.new(
- FeedTools::HtmlHelper.unescape_entities(credit.inner_xml.to_s.strip),
FeedTools::HtmlHelper.unescape_entities(
+ credit.inner_xml.to_s.strip),
+ FeedTools::HtmlHelper.unescape_entities(
credit.attributes["role"].to_s.downcase)
)
if enclosure.credits.last.name.blank?
enclosure.credits.last.name = nil
end
if enclosure.credits.last.role.blank?
enclosure.credits.last.role = nil
end
end
- enclosure.explicit = (FeedTools::XmlHelper.try_xpaths(enclosure_node,
- ["media:adult/text()"]).to_s.downcase == "true")
+ enclosure.explicit =
+ (FeedTools::XmlHelper.try_xpaths(enclosure_node,
+ ["media:adult/text()"]).to_s.downcase == "true")
enclosure_media_text =
- FeedTools::XmlHelper.try_xpaths(enclosure_node, ["media:text/text()"])
+ FeedTools::XmlHelper.try_xpaths(enclosure_node,
+ ["media:text/text()"])
if !enclosure_media_text.blank?
enclosure.text = FeedTools::HtmlHelper.unescape_entities(
enclosure_media_text)
end
affected_enclosures << enclosure
@@ -962,36 +1166,41 @@
media_groups = []
# Parse the group objects.
for media_group in media_group_enclosures
group_media_content_enclosures =
- FeedTools::XmlHelper.try_xpaths_all(media_group, ["media:content"])
+ FeedTools::XmlHelper.try_xpaths_all(media_group,
+ ["media:content"])
# Parse the content objects within the group objects.
affected_enclosures =
parse_media_content.call(group_media_content_enclosures)
# Now make sure that content objects inherit certain properties from
# the group objects.
for enclosure in affected_enclosures
- media_group_thumbnail = FeedTools::XmlHelper.try_xpaths(media_group,
- ["media:thumbnail/@url"], :select_result_value => true)
+ media_group_thumbnail =
+ FeedTools::XmlHelper.try_xpaths(media_group,
+ ["media:thumbnail/@url"], :select_result_value => true)
if enclosure.thumbnail.nil? && !media_group_thumbnail.blank?
enclosure.thumbnail = FeedTools::EnclosureThumbnail.new(
FeedTools::HtmlHelper.unescape_entities(
media_group_thumbnail),
FeedTools::HtmlHelper.unescape_entities(
- FeedTools::XmlHelper.try_xpaths(media_group, ["media:thumbnail/@height"],
+ FeedTools::XmlHelper.try_xpaths(media_group,
+ ["media:thumbnail/@height"],
:select_result_value => true)),
FeedTools::HtmlHelper.unescape_entities(
- FeedTools::XmlHelper.try_xpaths(media_group, ["media:thumbnail/@width"],
+ FeedTools::XmlHelper.try_xpaths(media_group,
+ ["media:thumbnail/@width"],
:select_result_value => true))
)
end
if (enclosure.categories.blank?)
enclosure.categories = []
- for category in FeedTools::XmlHelper.try_xpaths_all(media_group, ["media:category"])
+ for category in FeedTools::XmlHelper.try_xpaths_all(
+ media_group, ["media:category"])
enclosure.categories << FeedTools::Category.new
enclosure.categories.last.term =
FeedTools::HtmlHelper.unescape_entities(category.inner_xml)
enclosure.categories.last.scheme =
FeedTools::HtmlHelper.unescape_entities(
@@ -1005,35 +1214,46 @@
if enclosure.categories.last.label.blank?
enclosure.categories.last.label = nil
end
end
end
- enclosure_media_group_hash = FeedTools::XmlHelper.try_xpaths(enclosure_node,
- ["media:hash/text()"], :select_result_value => true)
+ enclosure_media_group_hash =
+ FeedTools::XmlHelper.try_xpaths(enclosure_node,
+ ["media:hash/text()"], :select_result_value => true)
if enclosure.hash.nil? && !enclosure_media_group_hash.blank?
enclosure.hash = FeedTools::EnclosureHash.new(
- FeedTools::HtmlHelper.sanitize_html(FeedTools::HtmlHelper.unescape_entities(
- enclosure_media_group_hash), :strip),
+ FeedTools::HtmlHelper.sanitize_html(
+ FeedTools::HtmlHelper.unescape_entities(
+ enclosure_media_group_hash), :strip),
"md5"
)
end
- enclosure_media_group_url = FeedTools::XmlHelper.try_xpaths(media_group,
- "media:player/@url", :select_result_value => true)
+ enclosure_media_group_url = FeedTools::XmlHelper.try_xpaths(
+ media_group,
+ "media:player/@url",
+ :select_result_value => true
+ )
if enclosure.player.nil? && !enclosure_media_group_url.blank?
enclosure.player = FeedTools::EnclosurePlayer.new(
- FeedTools::HtmlHelper.unescape_entities(enclosure_media_group_url),
FeedTools::HtmlHelper.unescape_entities(
- FeedTools::XmlHelper.try_xpaths(media_group, ["media:player/@height"],
+ enclosure_media_group_url),
+ FeedTools::HtmlHelper.unescape_entities(
+ FeedTools::XmlHelper.try_xpaths(media_group,
+ ["media:player/@height"],
:select_result_value => true)),
FeedTools::HtmlHelper.unescape_entities(
- FeedTools::XmlHelper.try_xpaths(media_group, ["media:player/@width"],
- :select_result_value => true))
+ FeedTools::XmlHelper.try_xpaths(media_group,
+ ["media:player/@width"],
+ :select_result_value => true
+ )
+ )
)
end
if enclosure.credits.nil? || enclosure.credits.size == 0
enclosure.credits = []
- for credit in FeedTools::XmlHelper.try_xpaths_all(media_group, ["media:credit"])
+ for credit in FeedTools::XmlHelper.try_xpaths_all(
+ media_group, ["media:credit"])
enclosure.credits << FeedTools::EnclosureCredit.new(
FeedTools::HtmlHelper.unescape_entities(credit.inner_xml),
FeedTools::HtmlHelper.unescape_entities(
credit.attributes["role"].to_s.downcase)
)
@@ -1041,17 +1261,19 @@
enclosure.credits.last.role = nil
end
end
end
if enclosure.explicit?.nil?
- enclosure.explicit = ((FeedTools::XmlHelper.try_xpaths(media_group, [
- "media:adult/text()"
- ], :select_result_value => true).downcase == "true") ?
- true : false)
+ enclosure.explicit =
+ ((FeedTools::XmlHelper.try_xpaths(media_group, [
+ "media:adult/text()"
+ ], :select_result_value => true).downcase == "true") ?
+ true : false)
end
- enclosure_media_group_text = FeedTools::XmlHelper.try_xpaths(media_group,
- ["media:text/text()"], :select_result_value => true)
+ enclosure_media_group_text =
+ FeedTools::XmlHelper.try_xpaths(media_group,
+ ["media:text/text()"], :select_result_value => true)
if enclosure.text.nil? && !enclosure_media_group_text.blank?
enclosure.text = FeedTools::HtmlHelper.sanitize_html(
FeedTools::HtmlHelper.unescape_entities(
enclosure_media_group_text), :strip)
end
@@ -1068,16 +1290,18 @@
end
end
# Add all the itunes categories
itunes_categories =
- FeedTools::XmlHelper.try_xpaths_all(self.root_node, ["itunes:category"])
+ FeedTools::XmlHelper.try_xpaths_all(self.root_node,
+ ["itunes:category"])
for itunes_category in itunes_categories
genre = "Podcasts"
category = itunes_category.attributes["text"].to_s
subcategory =
- FeedTools::XmlHelper.try_xpaths(itunes_category, ["itunes:category/@text"],
+ FeedTools::XmlHelper.try_xpaths(itunes_category,
+ ["itunes:category/@text"],
:select_result_value => true)
category_path = genre
if !category.blank?
category_path << "/" + category
end
@@ -1134,10 +1358,18 @@
# Make sure we don't have duplicate categories
unless enclosure.categories.nil?
enclosure.categories.uniq!
end
+
+ # Normalize enclosure URIs
+ if !enclosure.href.blank?
+ enclosure.href =
+ FeedTools::UriHelper.normalize_url(enclosure.href)
+ else
+ enclosure.href = nil
+ end
end
# And finally, now things get complicated. This is where we make
# sure that the enclosures method only returns either default
# enclosures or enclosures with only one version. Any enclosures
@@ -1300,17 +1532,30 @@
@author.url, [author_node.base_uri, self.base_uri])
end
rescue
end
end
+ if FeedTools::XmlHelper.try_xpaths(author_node,
+ ["@gr:unknown-author"], :select_result_value => true) == "true"
+ if @author.name == "(author unknown)"
+ @author.name = nil
+ end
+ end
end
# Fallback on the itunes module if we didn't find an author name
begin
@author.name = self.itunes_author if @author.name.nil?
rescue
@author.name = nil
end
+ if @author.name.blank? && @author.email.blank? &&
+ @author.href.blank?
+ parent_feed = self.feed
+ if parent_feed != nil
+ @author = parent_feed.author.dup
+ end
+ end
end
return @author
end
# Sets the feed item author
@@ -1390,10 +1635,17 @@
@publisher.url, [root_base_uri, self.base_uri])
end
rescue
end
end
+ if @publisher.name.blank? && @publisher.email.blank? &&
+ @publisher.href.blank?
+ parent_feed = self.feed
+ if parent_feed != nil
+ @publisher = parent_feed.publisher.dup
+ end
+ end
end
return @publisher
end
# Sets the feed publisher
@@ -1420,11 +1672,16 @@
def itunes_author
if @itunes_author.nil?
@itunes_author = FeedTools::HtmlHelper.unescape_entities(
FeedTools::XmlHelper.try_xpaths(self.root_node,
["itunes:author/text()"], :select_result_value => true))
- @itunes_author = feed.itunes_author if @itunes_author.blank?
+ if @itunes_author.blank?
+ parent_feed = self.feed
+ if parent_feed != nil
+ @itunes_author = parent_feed.itunes_author
+ end
+ end
end
return @itunes_author
end
# Sets the contents of the itunes:author element
@@ -1482,11 +1739,12 @@
"atom03:published/text()",
"atom:published/text()",
"published/text()",
"dc:date/text()",
"pubDate/text()",
- "date/text()"
+ "date/text()",
+ "lastupdated/text()"
], :select_result_value => true)
begin
if !time_string.blank?
@time = Time.parse(time_string).gmtime
elsif self.configurations[:timestamp_estimation_enabled] &&
@@ -1583,11 +1841,12 @@
"updated/text()",
"atom10:modified/text()",
"atom03:modified/text()",
"atom:modified/text()",
"modified/text()",
- "lastBuildDate/text()"
+ "lastBuildDate/text()",
+ "lastupdated/text()"
], :select_result_value => true)
if !updated_string.blank?
@updated = Time.parse(updated_string).gmtime rescue nil
else
@updated = nil
@@ -1628,40 +1887,12 @@
# Sets the feed item published time
def published=(new_published)
@published = new_published
end
-
- # Returns the url for posting comments
- def comments
- if @comments.nil?
- @comments = FeedTools::XmlHelper.try_xpaths(self.root_node, ["comments/text()"],
- :select_result_value => true)
- begin
- if !(@comments =~ /^file:/) &&
- !FeedTools::UriHelper.is_uri?(@comments)
- root_base_uri = nil
- unless self.root_node.nil?
- root_base_uri = self.root_node.base_uri
- end
- @comments = FeedTools::UriHelper.resolve_relative_uri(
- @comments, [root_base_uri, self.base_uri])
- end
- rescue
- end
- if self.configurations[:url_normalization_enabled]
- @comments = FeedTools::UriHelper.normalize_url(@comments)
- end
- end
- return @comments
- end
- # Sets the url for posting comments
- def comments=(new_comments)
- @comments = new_comments
- end
-
+ # TODO: FIX ME! This code is completely wrong.
# The source that this post was based on
def source
if @source.nil?
@source = FeedTools::Link.new
@source.href = FeedTools::XmlHelper.try_xpaths(
@@ -1700,11 +1931,12 @@
rdf_bag = FeedTools::XmlHelper.try_xpaths_all(self.root_node,
["taxo:topics/rdf:Bag/rdf:li"])
if rdf_bag != nil && rdf_bag.size > 0
for tag_node in rdf_bag
begin
- tag_url = FeedTools::XmlHelper.try_xpaths(tag_node, ["@resource"],
+ tag_url = FeedTools::XmlHelper.try_xpaths(tag_node,
+ ["@resource"],
:select_result_value => true)
tag_match = tag_url.scan(/\/(tag|tags)\/(\w+)$/)
if tag_match.size > 0
@tags << tag_match.first.last.downcase.strip
end
@@ -1713,29 +1945,32 @@
end
end
end
if @tags.nil? || @tags.size == 0
@tags = []
- tag_list = FeedTools::XmlHelper.try_xpaths_all(self.root_node, ["category/text()"],
+ tag_list = FeedTools::XmlHelper.try_xpaths_all(self.root_node,
+ ["category/text()"],
:select_result_value => true)
for tag in tag_list
@tags << tag.to_s.downcase.strip
end
end
if @tags.nil? || @tags.size == 0
@tags = []
- tag_list = FeedTools::XmlHelper.try_xpaths_all(self.root_node, ["dc:subject/text()"],
+ tag_list = FeedTools::XmlHelper.try_xpaths_all(self.root_node,
+ ["dc:subject/text()"],
:select_result_value => true)
for tag in tag_list
@tags << tag.to_s.downcase.strip
end
end
if @tags.blank?
begin
- itunes_keywords_string = FeedTools::XmlHelper.try_xpaths(self.root_node, [
- "itunes:keywords/text()"
- ], :select_result_value => true)
+ itunes_keywords_string =
+ FeedTools::XmlHelper.try_xpaths(self.root_node, [
+ "itunes:keywords/text()"
+ ], :select_result_value => true)
unless itunes_keywords_string.blank?
@tags = itunes_keywords_string.downcase.split(",")
if @tags.size == 1
@tags = itunes_keywords_string.downcase.split(" ")
@tags = @tags.map { |tag| tag.chomp(",") }
@@ -1769,13 +2004,15 @@
if @explicit.nil?
explicit_string = FeedTools::XmlHelper.try_xpaths(self.root_node, [
"media:adult/text()",
"itunes:explicit/text()"
], :select_result_value => true)
- if explicit_string == "true" || explicit_string == "yes" ||
- feed.explicit?
+ parent_feed = self.feed
+ if explicit_string == "true" || explicit_string == "yes"
@explicit = true
+ elsif parent_feed != nil && parent_feed.explicit?
+ @explicit = true
else
@explicit = false
end
end
return @explicit
@@ -1784,34 +2021,56 @@
# Sets whether or not the feed contains explicit material
def explicit=(new_explicit)
@explicit = (new_explicit ? true : false)
end
- # A hook method that is called during the feed generation process. Overriding this method
- # will enable additional content to be inserted into the feed.
+ # A hook method that is called during the feed generation process.
+ # Overriding this method will enable additional content to be inserted
+ # into the feed.
def build_xml_hook(feed_type, version, xml_builder)
return nil
end
# Generates xml based on the content of the feed item
def build_xml(feed_type=(self.feed.feed_type or "atom"), version=nil,
xml_builder=Builder::XmlMarkup.new(
:indent => 2, :escape_attrs => false))
+
+ parent_feed = self.feed
+ if parent_feed.find_node(
+ "access:restriction/@relationship").to_s == "deny"
+ raise StandardError,
+ "Operation not permitted. This feed denies redistribution."
+ elsif parent_feed.find_node("@indexing:index").to_s == "no"
+ raise StandardError,
+ "Operation not permitted. This feed denies redistribution."
+ end
+ if self.find_node(
+ "access:restriction/@relationship").to_s == "deny"
+ raise StandardError,
+ "Operation not permitted. This feed item denies redistribution."
+ end
+
+ self.full_parse()
+
if feed_type == "rss" && (version == nil || version == 0.0)
version = 1.0
elsif feed_type == "atom" && (version == nil || version == 0.0)
version = 1.0
end
- if feed_type == "rss" && (version == 0.9 || version == 1.0 || version == 1.1)
+ if feed_type == "rss" &&
+ (version == 0.9 || version == 1.0 || version == 1.1)
# RDF-based rss format
if link.nil?
- raise "Cannot generate an rdf-based feed item with a nil link field."
+ raise "Cannot generate an rdf-based feed item with a " +
+ "nil link field."
end
return xml_builder.item("rdf:about" =>
FeedTools::HtmlHelper.escape_entities(link)) do
unless self.title.blank?
- xml_builder.title(FeedTools::HtmlHelper.strip_html_tags(self.title))
+ xml_builder.title(
+ FeedTools::HtmlHelper.strip_html_tags(self.title))
else
xml_builder.title
end
unless self.link.blank?
xml_builder.link(self.link)
@@ -1849,11 +2108,12 @@
end
elsif feed_type == "rss"
# normal rss format
return xml_builder.item do
unless self.title.blank?
- xml_builder.title(FeedTools::HtmlHelper.strip_html_tags(self.title))
+ xml_builder.title(
+ FeedTools::HtmlHelper.strip_html_tags(self.title))
end
unless self.link.blank?
xml_builder.link(self.link)
end
unless self.author.nil? || self.author.name.nil?
@@ -1874,15 +2134,16 @@
if !self.published.nil?
xml_builder.pubDate(self.published.rfc822)
elsif !self.time.nil?
xml_builder.pubDate(self.time.rfc822)
end
- unless self.copyright.blank?
- xml_builder.tag!("dc:rights", self.copyright)
+ unless self.rights.blank?
+ xml_builder.tag!("dc:rights", self.rights)
end
unless self.guid.blank?
- if FeedTools::UriHelper.is_uri?(self.guid) && (self.guid =~ /^http/)
+ if FeedTools::UriHelper.is_uri?(self.guid) &&
+ (self.guid =~ /^http/)
xml_builder.guid(self.guid, "isPermaLink" => "true")
else
xml_builder.guid(self.guid, "isPermaLink" => "false")
end
else
@@ -1917,11 +2178,12 @@
end
end
end
rescue
end
- attribute_hash["url"] = FeedTools::UriHelper.normalize_url(enclosure.url)
+ attribute_hash["url"] =
+ FeedTools::UriHelper.normalize_url(enclosure.url)
if enclosure.type != nil
attribute_hash["type"] = enclosure.type
end
if enclosure.file_size != nil && enclosure.file_size.to_i > 0
attribute_hash["length"] = enclosure.file_size.to_s
@@ -1991,22 +2253,24 @@
xml_builder.rights(self.rights)
end
if self.id != nil
unless FeedTools::UriHelper.is_uri? self.id
if self.time != nil && self.link != nil
- xml_builder.id(FeedTools::UriHelper.build_tag_uri(self.link, self.time))
+ xml_builder.id(FeedTools::UriHelper.build_tag_uri(
+ self.link, self.time))
elsif self.link != nil
xml_builder.id(FeedTools.build_urn_uuid_uri(self.link))
else
raise "The unique id must be a URI. " +
"(Attempted to generate id, but failed.)"
end
else
xml_builder.id(self.id)
end
elsif self.time != nil && self.link != nil
- xml_builder.id(FeedTools::UriHelper.build_tag_uri(self.link, self.time))
+ xml_builder.id(FeedTools::UriHelper.build_tag_uri(
+ self.link, self.time))
else
raise "Cannot build feed, missing feed unique id."
end
unless self.tags.nil? || self.tags.size == 0
for tag in self.tags
@@ -2016,10 +2280,11 @@
unless self.enclosures.blank? || self.enclosures.size == 0
for enclosure in self.enclosures
attribute_hash = {}
next if enclosure.url.blank?
attribute_hash["rel"] = "enclosure"
- attribute_hash["href"] = FeedTools::UriHelper.normalize_url(enclosure.url)
+ attribute_hash["href"] =
+ FeedTools::UriHelper.normalize_url(enclosure.url)
if enclosure.type != nil
attribute_hash["type"] = enclosure.type
end
if enclosure.file_size != nil && enclosure.file_size.to_i > 0
attribute_hash["length"] = enclosure.file_size.to_s