lib/feed_tools/feed_item.rb in feedtools-0.2.22 vs lib/feed_tools/feed_item.rb in feedtools-0.2.23
- old
+ new
@@ -19,144 +19,22 @@
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#++
+require 'feed_tools/feed_structures'
+
module FeedTools
# The <tt>FeedTools::FeedItem</tt> class represents the structure of
# a single item within a web feed.
class FeedItem
- # :stopdoc:
- include REXML
- include GenericHelper
- private :validate_options
- # :startdoc:
-
- # This class stores information about a feed item's file enclosures.
- class Enclosure
- # The url for the enclosure
- attr_accessor :url
- # The MIME type of the file referenced by the enclosure
- attr_accessor :type
- # The size of the file referenced by the enclosure
- attr_accessor :file_size
- # The total play time of the file referenced by the enclosure
- attr_accessor :duration
- # The height in pixels of the enclosed media
- attr_accessor :height
- # The width in pixels of the enclosed media
- attr_accessor :width
- # The bitrate of the enclosed media
- attr_accessor :bitrate
- # The framerate of the enclosed media
- attr_accessor :framerate
- # The thumbnail for this enclosure
- attr_accessor :thumbnail
- # The categories for this enclosure
- attr_accessor :categories
- # A hash of the enclosed file
- attr_accessor :hash
- # A website containing some kind of media player instead of a direct
- # link to the media file.
- attr_accessor :player
- # A list of credits for the enclosed media
- attr_accessor :credits
- # A text rendition of the enclosed media
- attr_accessor :text
- # A list of alternate version of the enclosed media file
- attr_accessor :versions
- # The default version of the enclosed media file
- attr_accessor :default_version
-
- # Returns true if this is the default enclosure
- def is_default?
- return @is_default
- end
-
- # Sets whether this is the default enclosure for the media group
- def is_default=(new_is_default)
- @is_default = new_is_default
- end
-
- # Returns true if the enclosure contains explicit material
- def explicit?
- return @explicit
- end
-
- # Sets the explicit attribute on the enclosure
- def explicit=(new_explicit)
- @explicit = new_explicit
- end
-
- # Determines if the object is a sample, or the full version of the
- # object, or if it is a stream.
- # Possible values are 'sample', 'full', 'nonstop'.
- def expression
- return @expression
- end
-
- # Sets the expression attribute on the enclosure.
- # Allowed values are 'sample', 'full', 'nonstop'.
- def expression=(new_expression)
- unless ['sample', 'full', 'nonstop'].include? new_expression.downcase
- raise ArgumentError,
- "Permitted values are 'sample', 'full', 'nonstop'."
- end
- @expression = new_expression.downcase
- end
-
- # Returns true if this enclosure contains audio content
- def audio?
- unless self.type.nil?
- return true if (self.type =~ /^audio/) != nil
- end
- # TODO: create a more complete list
- # =================================
- audio_extensions = ['mp3', 'm4a', 'm4p', 'wav', 'ogg', 'wma']
- audio_extensions.each do |extension|
- if (url =~ /#{extension}$/) != nil
- return true
- end
- end
- return false
- end
-
- # Returns true if this enclosure contains video content
- def video?
- unless self.type.nil?
- return true if (self.type =~ /^video/) != nil
- return true if self.type == "image/mov"
- end
- # TODO: create a more complete list
- # =================================
- video_extensions = ['mov', 'mp4', 'avi', 'wmv', 'asf']
- video_extensions.each do |extension|
- if (url =~ /#{extension}$/) != nil
- return true
- end
- end
- return false
- end
-
- alias_method :link, :url
- alias_method :link=, :url=
- end
-
- # TODO: Make these actual classes instead of structs
- # ==================================================
- EnclosureHash = Struct.new( "EnclosureHash", :hash, :type )
- EnclosurePlayer = Struct.new( "EnclosurePlayer", :url, :height, :width )
- EnclosureCredit = Struct.new( "EnclosureCredit", :name, :role )
- EnclosureThumbnail = Struct.new( "EnclosureThumbnail", :url, :height,
- :width )
-
# Initialize the feed object
def initialize
super
@feed_data = nil
@feed_data_type = :xml
- @xml_doc = nil
+ @xml_document = nil
@root_node = nil
@title = nil
@id = nil
@time = Time.now.gmtime
end
@@ -189,10 +67,18 @@
end
end
return parent_feed
end
+ # Returns the feed item's encoding.
+ def encoding
+ if @encoding.nil?
+ @encoding = self.feed.encoding
+ end
+ return @encoding
+ end
+
# Returns the feed item's raw data.
def feed_data
return @feed_data
end
@@ -211,57 +97,84 @@
def feed_data_type=(new_feed_data_type)
@feed_data_type = new_feed_data_type
end
# Returns a REXML Document of the feed_data
- def xml
+ def xml_document
if self.feed_data_type != :xml
- @xml_doc = nil
+ @xml_document = nil
else
- if @xml_doc.nil?
+ if @xml_document.nil?
# TODO: :ignore_whitespace_nodes => :all
# Add that?
# ======================================
- @xml_doc = Document.new(self.feed_data)
+ @xml_document = REXML::Document.new(self.feed_data)
end
end
- return @xml_doc
+ return @xml_document
end
# Returns the first node within the root_node that matches the xpath query.
def find_node(xpath, select_result_value=false)
if feed.feed_data_type != :xml
raise "The feed data type is not xml."
end
- return try_xpaths(self.root_node, [xpath],
+ return FeedTools::XmlHelper.try_xpaths(self.root_node, [xpath],
:select_result_value => select_result_value)
end
# Returns all nodes within the root_node that match the xpath query.
def find_all_nodes(xpath, select_result_value=false)
if feed.feed_data_type != :xml
raise "The feed data type is not xml."
end
- return try_xpaths_all(self.root_node, [xpath],
+ return FeedTools::XmlHelper.try_xpaths_all(self.root_node, [xpath],
:select_result_value => select_result_value)
end
# Returns the root node of the feed item.
def root_node
if @root_node.nil?
- if xml.nil?
+ if self.xml_document.nil?
return nil
end
- @root_node = xml.root
+ @root_node = self.xml_document.root
end
return @root_node
end
+
+ # Sets the root node of the feed item.
+ #
+ # This allows namespace information to be inherited by the feed item
+ # from the feed itself. When creating individual nodes from scratch,
+ # the <tt>feed_data=</tt> method should be used instead.
+ def root_node=(new_root_node)
+ @root_node = new_root_node
+ end
+
+ # Returns the feed type of this item
+ def feed_type
+ if @feed_type.nil?
+ parent_feed = self.feed
+ @feed_type = parent_feed.feed_type unless parent_feed.nil?
+ end
+ return @feed_type
+ end
+
+ # Returns the feed version of this item
+ def feed_version
+ if @feed_version.nil?
+ parent_feed = self.feed
+ @feed_version = parent_feed.feed_version unless parent_feed.nil?
+ end
+ return @feed_version
+ end
# Returns the feed items's unique id
def id
if @id.nil?
- @id = try_xpaths(self.root_node, [
+ @id = FeedTools::XmlHelper.try_xpaths(self.root_node, [
"atom10:id/text()",
"atom03:id/text()",
"atom:id/text()",
"id/text()",
"guid/text()"
@@ -277,60 +190,33 @@
# Returns the feed item title
def title
if @title.nil?
repair_entities = false
- title_node = try_xpaths(self.root_node, [
+ title_node = FeedTools::XmlHelper.try_xpaths(self.root_node, [
"atom10:title",
"atom03:title",
"atom:title",
"title",
"dc:title"
])
- if title_node.nil?
- return nil
+ @title = FeedTools::HtmlHelper.process_text_construct(title_node,
+ self.feed_type, self.feed_version)
+ if self.feed_type == "atom" ||
+ FeedTools.configurations[:always_strip_wrapper_elements]
+ @title = FeedTools::HtmlHelper.strip_wrapper_element(@title)
end
- title_type = try_xpaths(title_node, "@type",
- :select_result_value => true)
- title_mode = try_xpaths(title_node, "@mode",
- :select_result_value => true)
- title_encoding = try_xpaths(title_node, "@encoding",
- :select_result_value => true)
-
- # Note that we're checking for misuse of type, mode and encoding here
- if title_type == "base64" || title_mode == "base64" ||
- title_encoding == "base64"
- @title = Base64.decode64(title_node.inner_xml.strip)
- elsif title_type == "xhtml" || title_mode == "xhtml" ||
- title_type == "xml" || title_mode == "xml" ||
- title_type == "application/xhtml+xml"
- @title = title_node.inner_xml
- elsif title_type == "escaped" || title_mode == "escaped"
- @title = FeedTools.unescape_entities(
- title_node.inner_xml)
- else
- @title = title_node.inner_xml
- repair_entities = true
- end
- unless @title.nil?
- @title = FeedTools.sanitize_html(@title, :strip)
- @title = FeedTools.unescape_entities(@title) if repair_entities
- @title = FeedTools.tidy_html(@title) unless repair_entities
- end
if !@title.blank? && FeedTools.configurations[:strip_comment_count]
# Some blogging tools include the number of comments in a post
# in the title... this is supremely ugly, and breaks any
# applications which expect the title to be static, so we're
# gonna strip them out.
#
# If for some incredibly wierd reason you need the actual
# unstripped title, just use find_node("title/text()").to_s
@title = @title.strip.gsub(/\[\d*\]$/, "").strip
end
- @title.gsub!(/>\n</, "><")
- @title.gsub!(/\n/, " ")
- @title.strip!
@title = nil if @title.blank?
end
return @title
end
@@ -341,19 +227,23 @@
# Returns the feed item content
def content
if @content.nil?
repair_entities = false
- content_node = try_xpaths(self.root_node, [
+ content_node = FeedTools::XmlHelper.try_xpaths(self.root_node, [
"atom10:content",
"atom03:content",
"atom:content",
+ "xhtml:body",
+ "body",
+ "xhtml:div",
+ "div",
+ "p:payload",
+ "payload",
"content:encoded",
"content",
"fullitem",
- "xhtml:body",
- "body",
"encoded",
"description",
"tagline",
"subtitle",
"atom10:summary",
@@ -362,55 +252,25 @@
"summary",
"abstract",
"blurb",
"info"
])
- if content_node.nil?
- return nil
+ @content = FeedTools::HtmlHelper.process_text_construct(content_node,
+ self.feed_type, self.feed_version)
+ if self.feed_type == "atom" ||
+ FeedTools.configurations[:always_strip_wrapper_elements]
+ @content = FeedTools::HtmlHelper.strip_wrapper_element(@content)
end
- content_type = try_xpaths(content_node, "@type",
- :select_result_value => true)
- content_mode = try_xpaths(content_node, "@mode",
- :select_result_value => true)
- content_encoding = try_xpaths(content_node, "@encoding",
- :select_result_value => true)
-
- # Note that we're checking for misuse of type, mode and encoding here
- if !content_encoding.blank?
- @content =
- "[Embedded data objects are not currently supported.]"
- elsif content_node.cdatas.size > 0
- @content = content_node.cdatas.first.value
- elsif content_type == "base64" || content_mode == "base64" ||
- content_encoding == "base64"
- @content = Base64.decode64(content_node.inner_xml.strip)
- elsif content_type == "xhtml" || content_mode == "xhtml" ||
- content_type == "xml" || content_mode == "xml" ||
- content_type == "application/xhtml+xml"
- @content = content_node.inner_xml
- elsif content_type == "escaped" || content_mode == "escaped"
- @content = FeedTools.unescape_entities(
- content_node.inner_xml)
- else
- @content = content_node.inner_xml
- repair_entities = true
+ if @content.blank?
+ @content = self.media_text
end
if @content.blank?
@content = self.itunes_summary
end
if @content.blank?
@content = self.itunes_subtitle
end
-
- unless @content.blank?
- @content = FeedTools.sanitize_html(@content, :strip)
- @content = FeedTools.unescape_entities(@content) if repair_entities
- @content = FeedTools.tidy_html(@content)
- end
-
- @content = @content.strip unless @content.nil?
- @content = nil if @content.blank?
end
return @content
end
# Sets the feed item content
@@ -420,76 +280,50 @@
# Returns the feed item summary
def summary
if @summary.nil?
repair_entities = false
- summary_node = try_xpaths(self.root_node, [
+ summary_node = FeedTools::XmlHelper.try_xpaths(self.root_node, [
"atom10:summary",
"atom03:summary",
"atom:summary",
"summary",
"abstract",
"blurb",
"description",
"tagline",
"subtitle",
- "fullitem",
"xhtml:body",
"body",
+ "xhtml:div",
+ "div",
+ "p:payload",
+ "payload",
+ "fullitem",
"content:encoded",
"encoded",
"atom10:content",
"atom03:content",
"atom:content",
"content",
"info"
])
- if summary_node.nil?
- return nil
+ @summary = FeedTools::HtmlHelper.process_text_construct(summary_node,
+ self.feed_type, self.feed_version)
+ if self.feed_type == "atom" ||
+ FeedTools.configurations[:always_strip_wrapper_elements]
+ @summary = FeedTools::HtmlHelper.strip_wrapper_element(@summary)
end
- summary_type = try_xpaths(summary_node, "@type",
- :select_result_value => true)
- summary_mode = try_xpaths(summary_node, "@mode",
- :select_result_value => true)
- summary_encoding = try_xpaths(summary_node, "@encoding",
- :select_result_value => true)
-
- # Note that we're checking for misuse of type, mode and encoding here
- if !summary_encoding.blank?
- @summary =
- "[Embedded data objects are not currently supported.]"
- elsif summary_node.cdatas.size > 0
- @summary = summary_node.cdatas.first.value
- elsif summary_type == "base64" || summary_mode == "base64" ||
- summary_encoding == "base64"
- @summary = Base64.decode64(summary_node.inner_xml.strip)
- elsif summary_type == "xhtml" || summary_mode == "xhtml" ||
- summary_type == "xml" || summary_mode == "xml" ||
- summary_type == "application/xhtml+xml"
- @summary = summary_node.inner_xml
- elsif summary_type == "escaped" || summary_mode == "escaped"
- @summary = FeedTools.unescape_entities(
- summary_node.inner_xml)
- else
- @summary = summary_node.inner_xml
- repair_entities = true
+ if @summary.blank?
+ @summary = self.media_text
end
if @summary.blank?
@summary = self.itunes_summary
end
if @summary.blank?
@summary = self.itunes_subtitle
end
-
- unless @summary.blank?
- @summary = FeedTools.sanitize_html(@summary, :strip)
- @summary = FeedTools.unescape_entities(@summary) if repair_entities
- @summary = FeedTools.tidy_html(@summary)
- end
-
- @summary = @summary.strip unless @summary.nil?
- @summary = nil if @summary.blank?
end
return @summary
end
# Sets the feed item summary
@@ -498,16 +332,17 @@
end
# Returns the contents of the itunes:summary element
def itunes_summary
if @itunes_summary.nil?
- @itunes_summary = try_xpaths(self.root_node, [
+ @itunes_summary = FeedTools::XmlHelper.try_xpaths(self.root_node, [
"itunes:summary/text()"
- ])
+ ], :select_result_value => true)
unless @itunes_summary.blank?
- @itunes_summary = FeedTools.unescape_entities(@itunes_summary)
- @itunes_summary = FeedTools.sanitize_html(@itunes_summary)
+ @itunes_summary = FeedTools::HtmlHelper.unescape_entities(@itunes_summary)
+ @itunes_summary = FeedTools::HtmlHelper.sanitize_html(@itunes_summary)
+ @itunes_summary.strip!
else
@itunes_summary = nil
end
end
return @itunes_summary
@@ -519,16 +354,17 @@
end
# Returns the contents of the itunes:subtitle element
def itunes_subtitle
if @itunes_subtitle.nil?
- @itunes_subtitle = try_xpaths(self.root_node, [
+ @itunes_subtitle = FeedTools::XmlHelper.try_xpaths(self.root_node, [
"itunes:subtitle/text()"
- ])
+ ], :select_result_value => true)
unless @itunes_subtitle.blank?
- @itunes_subtitle = FeedTools.unescape_entities(@itunes_subtitle)
- @itunes_subtitle = FeedTools.sanitize_html(@itunes_subtitle)
+ @itunes_subtitle = FeedTools::HtmlHelper.unescape_entities(@itunes_subtitle)
+ @itunes_subtitle = FeedTools::HtmlHelper.sanitize_html(@itunes_subtitle)
+ @itunes_subtitle.strip!
else
@itunes_subtitle = nil
end
end
return @itunes_subtitle
@@ -540,18 +376,20 @@
end
# Returns the contents of the media:text element
def media_text
if @media_text.nil?
- @media_text = FeedTools.unescape_entities(XPath.first(root_node,
- "itunes:subtitle/text()").to_s)
- if @media_text == ""
+ @media_text = FeedTools::XmlHelper.try_xpaths(self.root_node, [
+ "media:text/text()"
+ ], :select_result_value => true)
+ unless @media_text.blank?
+ @media_text = FeedTools::HtmlHelper.unescape_entities(@media_text)
+ @media_text = FeedTools::HtmlHelper.sanitize_html(@media_text)
+ @media_text.strip!
+ else
@media_text = nil
end
- unless @media_text.nil?
- @media_text = FeedTools.sanitize_html(@media_text)
- end
end
return @media_text
end
# Sets the contents of the media:text element
@@ -560,119 +398,206 @@
end
# Returns the feed item link
def link
if @link.nil?
- @link = try_xpaths(self.root_node, [
- "atom10:link[@type='application/xhtml+xml']/@href",
- "atom10:link[@type='text/html']/@href",
- "atom10:link[@rel='alternate']/@href",
- "atom03:link[@type='application/xhtml+xml']/@href",
- "atom03:link[@type='text/html']/@href",
- "atom03:link[@rel='alternate']/@href",
- "atom:link[@type='application/xhtml+xml']/@href",
- "atom:link[@type='text/html']/@href",
- "atom:link[@rel='alternate']/@href",
- "link[@type='application/xhtml+xml']/@href",
- "link[@type='text/html']/@href",
- "link[@rel='alternate']/@href",
- "link/text()",
- "@rdf:about",
- "guid[@isPermaLink='true']/text()",
- "@href",
- "a/@href"
- ], :select_result_value => true)
+ max_score = 0
+ for link_object in self.links.reverse
+ score = 0
+ if FeedTools::HtmlHelper.html_type?(link_object.type)
+ score = score + 2
+ elsif link_object.type != nil
+ score = score - 1
+ end
+ if FeedTools::HtmlHelper.xml_type?(link_object.type)
+ score = score + 1
+ end
+ if link_object.rel == "alternate"
+ score = score + 1
+ end
+ if link_object.rel == "self"
+ score = score - 1
+ end
+ if score >= max_score
+ max_score = score
+ @link = link_object.href
+ end
+ end
if @link.blank?
- if FeedTools.is_uri?(self.guid) &&
- !(self.guid =~ /^urn:uuid:/) &&
- !(self.guid =~ /^tag:/)
- @link = self.guid
+ @link = FeedTools::XmlHelper.try_xpaths(self.root_node, [
+ "@href",
+ "@rdf:about",
+ "@about"
+ ], :select_result_value => true)
+ end
+ if @link.blank?
+ if FeedTools::UriHelper.is_uri?(self.id) &&
+ (self.id =~ /^http/)
+ @link = self.id
end
end
if !@link.blank?
- @link = FeedTools.unescape_entities(@link)
+ @link = FeedTools::HtmlHelper.unescape_entities(@link)
end
-# TODO: Actually implement proper relative url resolving instead of this crap
-# ===========================================================================
-#
-# if @link != "" && (@link =~ /http:\/\//) != 0 && (@link =~ /https:\/\//) != 0
-# if (feed.base[-1..-1] == "/" && @link[0..0] == "/")
-# @link = @link[1..-1]
-# end
-# # prepend the base to the link since they seem to have used a relative path
-# @link = feed.base + @link
-# end
- if @link.blank?
- link_node = try_xpaths(self.root_node, [
- "atom10:link",
- "atom03:link",
- "atom:link",
- "link"
- ])
- if link_node != nil
- if link_node.attributes['type'].to_s =~ /^image/ ||
- link_node.attributes['type'].to_s =~ /^application/ ||
- link_node.attributes['type'].to_s =~ /xml/ ||
- link_node.attributes['rel'].to_s =~ /self/
- for child in self.root_node
- if child.class == REXML::Element
- if child.name.downcase == "link"
- if child.attributes['type'].to_s =~ /^image/ ||
- child.attributes['type'].to_s =~ /^application/ ||
- child.attributes['type'].to_s =~ /xml/ ||
- child.attributes['rel'].to_s =~ /self/
- @link = nil
- next
- else
- @link = child.attributes['href'].to_s
- if @link.blank?
- @link = child.inner_xml
- end
- if @link.blank?
- next
- end
- break
- end
- end
- end
+ @link = self.comments if @link.blank?
+ @link = nil if @link.blank?
+ begin
+ if !(@link =~ /^file:/) &&
+ !FeedTools::UriHelper.is_uri?(@link)
+ stored_base_uri =
+ FeedTools::GenericHelper.recursion_trap(:feed_link) do
+ self.feed.base_uri if self.feed != nil
end
- else
- @link = link_node.attributes['href'].to_s
+ root_base_uri = nil
+ unless self.root_node.nil?
+ root_base_uri = self.root_node.base_uri
end
+ @link = FeedTools::UriHelper.resolve_relative_uri(
+ @link, [root_base_uri,stored_base_uri])
end
+ rescue
end
- @link = self.comments if @link.blank?
- @link = nil if @link.blank?
if FeedTools.configurations[:url_normalization_enabled]
- @link = FeedTools.normalize_url(@link)
+ @link = FeedTools::UriHelper.normalize_url(@link)
end
end
return @link
end
# Sets the feed item link
def link=(new_link)
@link = new_link
end
+
+ # Returns the links collection
+ def links
+ if @links.nil?
+ @links = []
+ link_nodes =
+ FeedTools::XmlHelper.combine_xpaths_all(self.root_node, [
+ "atom10:link",
+ "atom03:link",
+ "atom:link",
+ "link",
+ "a",
+ "url",
+ "href"
+ ])
+ for link_node in link_nodes
+ link_object = FeedTools::Link.new
+ link_object.href = FeedTools::XmlHelper.try_xpaths(link_node, [
+ "@atom10:href",
+ "@atom03:href",
+ "@atom:href",
+ "@href",
+ "text()"
+ ], :select_result_value => true)
+ if link_object.href.nil? && link_node.base_uri != nil
+ link_object.href = ""
+ end
+ begin
+ if !(link_object.href =~ /^file:/) &&
+ !FeedTools::UriHelper.is_uri?(link_object.href)
+ stored_base_uri =
+ FeedTools::GenericHelper.recursion_trap(:feed_link) do
+ self.feed.base_uri if self.feed != nil
+ end
+ link_object.href = FeedTools::UriHelper.resolve_relative_uri(
+ link_object.href,
+ [link_node.base_uri, stored_base_uri])
+ end
+ rescue
+ end
+ if FeedTools.configurations[:url_normalization_enabled]
+ link_object.href =
+ FeedTools::UriHelper.normalize_url(link_object.href)
+ end
+ link_object.href.strip! unless link_object.href.nil?
+ next if link_object.href.blank?
+ link_object.hreflang = FeedTools::XmlHelper.try_xpaths(link_node, [
+ "@atom10:hreflang",
+ "@atom03:hreflang",
+ "@atom:hreflang",
+ "@hreflang"
+ ], :select_result_value => true)
+ unless link_object.hreflang.nil?
+ link_object.hreflang = link_object.hreflang.downcase
+ end
+ link_object.rel = FeedTools::XmlHelper.try_xpaths(link_node, [
+ "@atom10:rel",
+ "@atom03:rel",
+ "@atom:rel",
+ "@rel"
+ ], :select_result_value => true)
+ unless link_object.rel.nil?
+ link_object.rel = link_object.rel.downcase
+ end
+ link_object.type = FeedTools::XmlHelper.try_xpaths(link_node, [
+ "@atom10:type",
+ "@atom03:type",
+ "@atom:type",
+ "@type"
+ ], :select_result_value => true)
+ unless link_object.type.nil?
+ link_object.type = link_object.type.downcase
+ end
+ link_object.title = FeedTools::XmlHelper.try_xpaths(link_node, [
+ "@atom10:title",
+ "@atom03:title",
+ "@atom:title",
+ "@title",
+ "text()"
+ ], :select_result_value => true)
+ # This catches the ambiguities between atom, rss, and cdf
+ if link_object.title == link_object.href
+ link_object.title = nil
+ end
+ link_object.length = FeedTools::XmlHelper.try_xpaths(link_node, [
+ "@atom10:length",
+ "@atom03:length",
+ "@atom:length",
+ "@length"
+ ], :select_result_value => true)
+ if !link_object.length.nil?
+ link_object.length = link_object.length.to_i
+ else
+ if !link_object.type.nil? && link_object.type[0..4] != "text" &&
+ link_object.type[-3..-1] != "xml" &&
+ link_object.href =~ /^http:\/\//
+ # Retrieve the length with an http HEAD request
+ else
+ link_object.length = nil
+ end
+ end
+ @links << link_object
+ end
+ end
+ return @links
+ end
+
+ # Sets the links collection
+ def links=(new_links)
+ @links = new_links
+ end
# Returns a list of the feed item's categories
def categories
if @categories.nil?
@categories = []
- category_nodes = try_xpaths_all(self.root_node, [
+ category_nodes = FeedTools::XmlHelper.try_xpaths_all(self.root_node, [
"category",
"dc:subject"
])
for category_node in category_nodes
- category = FeedTools::Feed::Category.new
- category.term = try_xpaths(category_node, ["@term", "text()"],
+ category = FeedTools::Category.new
+ category.term = FeedTools::XmlHelper.try_xpaths(category_node, ["@term", "text()"],
:select_result_value => true)
category.term.strip! unless category.term.nil?
- category.label = try_xpaths(category_node, ["@label"],
+ category.label = FeedTools::XmlHelper.try_xpaths(category_node, ["@label"],
:select_result_value => true)
category.label.strip! unless category.label.nil?
- category.scheme = try_xpaths(category_node, [
+ category.scheme = FeedTools::XmlHelper.try_xpaths(category_node, [
"@scheme",
"@domain"
], :select_result_value => true)
category.scheme.strip! unless category.scheme.nil?
@categories << category
@@ -683,82 +608,90 @@
# Returns a list of the feed items's images
def images
if @images.nil?
@images = []
- image_nodes = try_xpaths_all(self.root_node, [
+ image_nodes = FeedTools::XmlHelper.try_xpaths_all(self.root_node, [
"image",
"logo",
"apple-wallpapers:image",
- "atom10:link",
- "atom03:link",
- "atom:link",
- "link"
+ "imageUrl"
])
unless image_nodes.blank?
for image_node in image_nodes
- image = FeedTools::Feed::Image.new
- image.url = try_xpaths(image_node, [
+ image = FeedTools::Image.new
+ image.href = FeedTools::XmlHelper.try_xpaths(image_node, [
"url/text()",
"@rdf:resource",
+ "@href",
"text()"
], :select_result_value => true)
- if image.url.blank? && (image_node.name == "logo" ||
- (image_node.attributes['type'].to_s =~ /^image/) == 0)
- image.url = try_xpaths(image_node, [
- "@atom10:href",
- "@atom03:href",
- "@atom:href",
- "@href"
- ], :select_result_value => true)
- if image.url == self.link && image.url != nil
- image.url = nil
+ if image.href.nil? && image_node.base_uri != nil
+ image.href = ""
+ end
+ begin
+ if !(image.href =~ /^file:/) &&
+ !FeedTools::UriHelper.is_uri?(image.href)
+ stored_base_uri =
+ FeedTools::GenericHelper.recursion_trap(:feed_link) do
+ self.feed.base_uri if self.feed != nil
+ end
+ image.href = FeedTools::UriHelper.resolve_relative_uri(
+ image.href, [image_node.base_uri, stored_base_uri])
end
+ rescue
end
- if image.url.blank? && image_node.name == "LOGO"
- image.url = try_xpaths(image_node, [
- "@href"
- ], :select_result_value => true)
- end
- image.url.strip! unless image.url.nil?
- image.title = try_xpaths(image_node,
+ if FeedTools.configurations[:url_normalization_enabled]
+ image.href = FeedTools::UriHelper.normalize_url(image.href)
+ end
+ image.href.strip! unless image.href.nil?
+ next if image.href.blank?
+ image.title = FeedTools::XmlHelper.try_xpaths(image_node,
["title/text()"], :select_result_value => true)
image.title.strip! unless image.title.nil?
- image.description = try_xpaths(image_node,
+ image.description = FeedTools::XmlHelper.try_xpaths(image_node,
["description/text()"], :select_result_value => true)
image.description.strip! unless image.description.nil?
- image.link = try_xpaths(image_node,
+ image.link = FeedTools::XmlHelper.try_xpaths(image_node,
["link/text()"], :select_result_value => true)
image.link.strip! unless image.link.nil?
- image.height = try_xpaths(image_node,
+ image.height = FeedTools::XmlHelper.try_xpaths(image_node,
["height/text()"], :select_result_value => true).to_i
image.height = nil if image.height <= 0
- image.width = try_xpaths(image_node,
+ image.width = FeedTools::XmlHelper.try_xpaths(image_node,
["width/text()"], :select_result_value => true).to_i
image.width = nil if image.width <= 0
- image.style = try_xpaths(image_node, [
+ image.style = FeedTools::XmlHelper.try_xpaths(image_node, [
"style/text()",
"@style"
], :select_result_value => true)
image.style.strip! unless image.style.nil?
image.style.downcase! unless image.style.nil?
@images << image unless image.url.nil?
end
end
+ for link_object in self.links
+ if link_object.type != nil && link_object.type =~ /^image/
+ image = FeedTools::Image.new
+ image.href = link_object.href
+ image.title = link_object.title
+ @images << image unless image.href.nil?
+ end
+ end
end
return @images
end
# Returns the feed item itunes image link
def itunes_image_link
if @itunes_image_link.nil?
- @itunes_image_link = try_xpaths(self.root_node, [
+ @itunes_image_link = FeedTools::XmlHelper.try_xpaths(self.root_node, [
"itunes:image/@href",
"itunes:link[@rel='image']/@href"
], :select_result_value => true)
if FeedTools.configurations[:url_normalization_enabled]
- @itunes_image_link = FeedTools.normalize_url(@itunes_image_link)
+ @itunes_image_link = FeedTools::UriHelper.normalize_url(@itunes_image_link)
end
end
return @itunes_image_link
end
@@ -768,112 +701,94 @@
end
# Returns the feed item media thumbnail link
def media_thumbnail_link
if @media_thumbnail_link.nil?
- @media_thumbnail_link = try_xpaths(self.root_node, [
+ @media_thumbnail_link = FeedTools::XmlHelper.try_xpaths(self.root_node, [
"media:thumbnail/@url"
], :select_result_value => true)
if FeedTools.configurations[:url_normalization_enabled]
- @media_thumbnail_link = FeedTools.normalize_url(@media_thumbnail_link)
+ @media_thumbnail_link = FeedTools::UriHelper.normalize_url(@media_thumbnail_link)
end
end
return @media_thumbnail_link
end
# Sets the feed item media thumbnail url
def media_thumbnail_link=(new_media_thumbnail_link)
@media_thumbnail_link = new_media_thumbnail_link
end
- # Returns the feed item's copyright information
- def copyright
- if @copyright.nil?
+ # Returns the feed item's rights information
+ def rights
+ if @rights.nil?
repair_entities = false
- copyright_node = try_xpaths(self.root_node, [
+ rights_node = FeedTools::XmlHelper.try_xpaths(self.root_node, [
"atom10:copyright",
"atom03:copyright",
"atom:copyright",
"copyright",
"copyrights",
"dc:rights",
"rights"
])
- if copyright_node.nil?
- return nil
+ @rights = FeedTools::HtmlHelper.process_text_construct(rights_node,
+ self.feed_type, self.feed_version)
+ if self.feed_type == "atom" ||
+ FeedTools.configurations[:always_strip_wrapper_elements]
+ @rights = FeedTools::HtmlHelper.strip_wrapper_element(@rights)
end
- copyright_type = try_xpaths(copyright_node, "@type",
- :select_result_value => true)
- copyright_mode = try_xpaths(copyright_node, "@mode",
- :select_result_value => true)
- copyright_encoding = try_xpaths(copyright_node, "@encoding",
- :select_result_value => true)
-
- # Note that we're checking for misuse of type, mode and encoding here
- if !copyright_encoding.blank?
- @copyright =
- "[Embedded data objects are not currently supported.]"
- elsif copyright_node.cdatas.size > 0
- @copyright = copyright_node.cdatas.first.value
- elsif copyright_type == "base64" || copyright_mode == "base64" ||
- copyright_encoding == "base64"
- @copyright = Base64.decode64(copyright_node.inner_xml.strip)
- elsif copyright_type == "xhtml" || copyright_mode == "xhtml" ||
- copyright_type == "xml" || copyright_mode == "xml" ||
- copyright_type == "application/xhtml+xml"
- @copyright = copyright_node.inner_xml
- elsif copyright_type == "escaped" || copyright_mode == "escaped"
- @copyright = FeedTools.unescape_entities(
- copyright_node.inner_xml)
- else
- @copyright = copyright_node.inner_xml
- repair_entities = true
- end
-
- unless @copyright.nil?
- @copyright = FeedTools.sanitize_html(@copyright, :strip)
- @copyright = FeedTools.unescape_entities(@copyright) if repair_entities
- @copyright = FeedTools.tidy_html(@copyright)
- end
-
- @copyright = @copyright.strip unless @copyright.nil?
- @copyright = nil if @copyright.blank?
end
- return @copyright
+ return @rights
end
- # Sets the feed item's copyright information
- def copyright=(new_copyright)
- @copyright = new_copyright
+ # Sets the feed item's rights information
+ def rights=(new_rights)
+ @rights = new_rights
end
+ def license #:nodoc:
+ raise "Not implemented yet."
+ end
+
+ def license=(new_license) #:nodoc:
+ raise "Not implemented yet."
+ end
+
# Returns all feed item enclosures
def enclosures
if @enclosures.nil?
@enclosures = []
# First, load up all the different possible sources of enclosures
rss_enclosures =
- try_xpaths_all(self.root_node, ["enclosure"])
+ FeedTools::XmlHelper.try_xpaths_all(self.root_node, ["enclosure"])
atom_enclosures =
- try_xpaths_all(self.root_node, [
+ FeedTools::XmlHelper.try_xpaths_all(self.root_node, [
"atom10:link[@rel='enclosure']",
"atom03:link[@rel='enclosure']",
"atom:link[@rel='enclosure']",
"link[@rel='enclosure']"
])
media_content_enclosures =
- try_xpaths_all(self.root_node, ["media:content"])
+ FeedTools::XmlHelper.try_xpaths_all(self.root_node,
+ ["media:content"])
media_group_enclosures =
- try_xpaths_all(self.root_node, ["media:group"])
+ FeedTools::XmlHelper.try_xpaths_all(self.root_node, ["media:group"])
+
+ # TODO: Implement this
+ bittorrent_enclosures =
+ FeedTools::XmlHelper.try_xpaths_all(self.root_node,
+ ["bitTorrent:torrent"])
+
# Parse RSS-type enclosures. Thanks to a few buggy enclosures
# implementations, sometimes these also manage to show up in atom
# files.
for enclosure_node in rss_enclosures
- enclosure = Enclosure.new
- enclosure.url = FeedTools.unescape_entities(
+ enclosure = FeedTools::Enclosure.new
+ enclosure.url = FeedTools::HtmlHelper.unescape_entities(
enclosure_node.attributes["url"].to_s)
enclosure.type = enclosure_node.attributes["type"].to_s
enclosure.file_size = enclosure_node.attributes["length"].to_i
enclosure.credits = []
enclosure.explicit = false
@@ -881,11 +796,11 @@
end
# Parse atom-type enclosures. If there are repeats of the same
# enclosure object, we merge the two together.
for enclosure_node in atom_enclosures
- enclosure_url = FeedTools.unescape_entities(
+ enclosure_url = FeedTools::HtmlHelper.unescape_entities(
enclosure_node.attributes["href"].to_s)
enclosure = nil
new_enclosure = false
for existing_enclosure in @enclosures
if existing_enclosure.url == enclosure_url
@@ -893,11 +808,11 @@
break
end
end
if enclosure.nil?
new_enclosure = true
- enclosure = Enclosure.new
+ enclosure = FeedTools::Enclosure.new
end
enclosure.url = enclosure_url
enclosure.type = enclosure_node.attributes["type"].to_s
enclosure.file_size = enclosure_node.attributes["length"].to_i
enclosure.credits = []
@@ -912,11 +827,11 @@
# have to do identical processing for content objects within group
# objects.
parse_media_content = lambda do |media_content_nodes|
affected_enclosures = []
for enclosure_node in media_content_nodes
- enclosure_url = FeedTools.unescape_entities(
+ enclosure_url = FeedTools::HtmlHelper.unescape_entities(
enclosure_node.attributes["url"].to_s)
enclosure = nil
new_enclosure = false
for existing_enclosure in @enclosures
if existing_enclosure.url == enclosure_url
@@ -924,11 +839,11 @@
break
end
end
if enclosure.nil?
new_enclosure = true
- enclosure = Enclosure.new
+ enclosure = FeedTools::Enclosure.new
end
enclosure.url = enclosure_url
enclosure.type = enclosure_node.attributes["type"].to_s
enclosure.file_size = enclosure_node.attributes["fileSize"].to_i
enclosure.duration = enclosure_node.attributes["duration"].to_s
@@ -938,83 +853,83 @@
enclosure.framerate = enclosure_node.attributes["framerate"].to_i
enclosure.expression =
enclosure_node.attributes["expression"].to_s
enclosure.is_default =
(enclosure_node.attributes["isDefault"].to_s.downcase == "true")
- enclosure_thumbnail_url = try_xpaths(enclosure_node,
+ enclosure_thumbnail_url = FeedTools::XmlHelper.try_xpaths(enclosure_node,
["media:thumbnail/@url"], :select_result_value => true)
if !enclosure_thumbnail_url.blank?
- enclosure.thumbnail = EnclosureThumbnail.new(
- FeedTools.unescape_entities(enclosure_thumbnail_url),
- FeedTools.unescape_entities(
- try_xpaths(enclosure_node, ["media:thumbnail/@height"],
+ enclosure.thumbnail = FeedTools::EnclosureThumbnail.new(
+ FeedTools::HtmlHelper.unescape_entities(enclosure_thumbnail_url),
+ FeedTools::HtmlHelper.unescape_entities(
+ FeedTools::XmlHelper.try_xpaths(enclosure_node, ["media:thumbnail/@height"],
:select_result_value => true)),
- FeedTools.unescape_entities(
- try_xpaths(enclosure_node, ["media:thumbnail/@width"],
+ FeedTools::HtmlHelper.unescape_entities(
+ FeedTools::XmlHelper.try_xpaths(enclosure_node, ["media:thumbnail/@width"],
:select_result_value => true))
)
end
enclosure.categories = []
- for category in try_xpaths_all(enclosure_node, ["media:category"])
- enclosure.categories << FeedTools::Feed::Category.new
+ for category in FeedTools::XmlHelper.try_xpaths_all(enclosure_node, ["media:category"])
+ enclosure.categories << FeedTools::Category.new
enclosure.categories.last.term =
- FeedTools.unescape_entities(category.inner_xml)
+ FeedTools::HtmlHelper.unescape_entities(category.inner_xml)
enclosure.categories.last.scheme =
- FeedTools.unescape_entities(
+ FeedTools::HtmlHelper.unescape_entities(
category.attributes["scheme"].to_s)
enclosure.categories.last.label =
- FeedTools.unescape_entities(
+ FeedTools::HtmlHelper.unescape_entities(
category.attributes["label"].to_s)
if enclosure.categories.last.scheme.blank?
enclosure.categories.last.scheme = nil
end
if enclosure.categories.last.label.blank?
enclosure.categories.last.label = nil
end
end
- enclosure_media_hash = try_xpaths(enclosure_node,
+ enclosure_media_hash = FeedTools::XmlHelper.try_xpaths(enclosure_node,
["media:hash/text()"], :select_result_value => true)
if !enclosure_media_hash.nil?
- enclosure.hash = EnclosureHash.new(
- FeedTools.sanitize_html(FeedTools.unescape_entities(
+ enclosure.hash = FeedTools::EnclosureHash.new(
+ FeedTools::HtmlHelper.sanitize_html(FeedTools::HtmlHelper.unescape_entities(
enclosure_media_hash), :strip),
"md5"
)
end
- enclosure_media_player_url = try_xpaths(enclosure_node,
+ enclosure_media_player_url = FeedTools::XmlHelper.try_xpaths(enclosure_node,
["media:player/@url"], :select_result_value => true)
if !enclosure_media_player_url.blank?
- enclosure.player = EnclosurePlayer.new(
- FeedTools.unescape_entities(enclosure_media_player_url),
- FeedTools.unescape_entities(
- try_xpaths(enclosure_node,
+ enclosure.player = FeedTools::EnclosurePlayer.new(
+ FeedTools::HtmlHelper.unescape_entities(enclosure_media_player_url),
+ FeedTools::HtmlHelper.unescape_entities(
+ FeedTools::XmlHelper.try_xpaths(enclosure_node,
["media:player/@height"], :select_result_value => true)),
- FeedTools.unescape_entities(
- try_xpaths(enclosure_node,
+ FeedTools::HtmlHelper.unescape_entities(
+ FeedTools::XmlHelper.try_xpaths(enclosure_node,
["media:player/@width"], :select_result_value => true))
)
end
enclosure.credits = []
- for credit in try_xpaths_all(enclosure_node, ["media:credit"])
- enclosure.credits << EnclosureCredit.new(
- FeedTools.unescape_entities(credit.inner_xml.to_s.strip),
- FeedTools.unescape_entities(
+ for credit in FeedTools::XmlHelper.try_xpaths_all(enclosure_node, ["media:credit"])
+ enclosure.credits << FeedTools::EnclosureCredit.new(
+ FeedTools::HtmlHelper.unescape_entities(credit.inner_xml.to_s.strip),
+ FeedTools::HtmlHelper.unescape_entities(
credit.attributes["role"].to_s.downcase)
)
if enclosure.credits.last.name.blank?
enclosure.credits.last.name = nil
end
if enclosure.credits.last.role.blank?
enclosure.credits.last.role = nil
end
end
- enclosure.explicit = (try_xpaths(enclosure_node,
+ enclosure.explicit = (FeedTools::XmlHelper.try_xpaths(enclosure_node,
["media:adult/text()"]).to_s.downcase == "true")
enclosure_media_text =
- try_xpaths(enclosure_node, ["media:text/text()"])
+ FeedTools::XmlHelper.try_xpaths(enclosure_node, ["media:text/text()"])
if !enclosure_media_text.blank?
- enclosure.text = FeedTools.unescape_entities(
+ enclosure.text = FeedTools::HtmlHelper.unescape_entities(
enclosure_media_text)
end
affected_enclosures << enclosure
if new_enclosure
@enclosures << enclosure
@@ -1029,99 +944,99 @@
media_groups = []
# Parse the group objects.
for media_group in media_group_enclosures
group_media_content_enclosures =
- try_xpaths_all(media_group, ["media:content"])
+ FeedTools::XmlHelper.try_xpaths_all(media_group, ["media:content"])
# Parse the content objects within the group objects.
affected_enclosures =
parse_media_content.call(group_media_content_enclosures)
# Now make sure that content objects inherit certain properties from
# the group objects.
for enclosure in affected_enclosures
- media_group_thumbnail = try_xpaths(media_group,
+ media_group_thumbnail = FeedTools::XmlHelper.try_xpaths(media_group,
["media:thumbnail/@url"], :select_result_value => true)
if enclosure.thumbnail.nil? && !media_group_thumbnail.blank?
- enclosure.thumbnail = EnclosureThumbnail.new(
- FeedTools.unescape_entities(
+ enclosure.thumbnail = FeedTools::EnclosureThumbnail.new(
+ FeedTools::HtmlHelper.unescape_entities(
media_group_thumbnail),
- FeedTools.unescape_entities(
- try_xpaths(media_group, ["media:thumbnail/@height"],
+ FeedTools::HtmlHelper.unescape_entities(
+ FeedTools::XmlHelper.try_xpaths(media_group, ["media:thumbnail/@height"],
:select_result_value => true)),
- FeedTools.unescape_entities(
- try_xpaths(media_group, ["media:thumbnail/@width"],
+ FeedTools::HtmlHelper.unescape_entities(
+ FeedTools::XmlHelper.try_xpaths(media_group, ["media:thumbnail/@width"],
:select_result_value => true))
)
end
if (enclosure.categories.blank?)
enclosure.categories = []
- for category in try_xpaths_all(media_group, ["media:category"])
- enclosure.categories << FeedTools::Feed::Category.new
+ for category in FeedTools::XmlHelper.try_xpaths_all(media_group, ["media:category"])
+ enclosure.categories << FeedTools::Category.new
enclosure.categories.last.term =
- FeedTools.unescape_entities(category.inner_xml)
+ FeedTools::HtmlHelper.unescape_entities(category.inner_xml)
enclosure.categories.last.scheme =
- FeedTools.unescape_entities(
+ FeedTools::HtmlHelper.unescape_entities(
category.attributes["scheme"].to_s)
enclosure.categories.last.label =
- FeedTools.unescape_entities(
+ FeedTools::HtmlHelper.unescape_entities(
category.attributes["label"].to_s)
if enclosure.categories.last.scheme.blank?
enclosure.categories.last.scheme = nil
end
if enclosure.categories.last.label.blank?
enclosure.categories.last.label = nil
end
end
end
- enclosure_media_group_hash = try_xpaths(enclosure_node,
+ enclosure_media_group_hash = FeedTools::XmlHelper.try_xpaths(enclosure_node,
["media:hash/text()"], :select_result_value => true)
if enclosure.hash.nil? && !enclosure_media_group_hash.blank?
- enclosure.hash = EnclosureHash.new(
- FeedTools.sanitize_html(FeedTools.unescape_entities(
+ enclosure.hash = FeedTools::EnclosureHash.new(
+ FeedTools::HtmlHelper.sanitize_html(FeedTools::HtmlHelper.unescape_entities(
enclosure_media_group_hash), :strip),
"md5"
)
end
- enclosure_media_group_url = try_xpaths(media_group,
+ enclosure_media_group_url = FeedTools::XmlHelper.try_xpaths(media_group,
"media:player/@url", :select_result_value => true)
if enclosure.player.nil? && !enclosure_media_group_url.blank?
- enclosure.player = EnclosurePlayer.new(
- FeedTools.unescape_entities(enclosure_media_group_url),
- FeedTools.unescape_entities(
- try_xpaths(media_group, ["media:player/@height"],
+ enclosure.player = FeedTools::EnclosurePlayer.new(
+ FeedTools::HtmlHelper.unescape_entities(enclosure_media_group_url),
+ FeedTools::HtmlHelper.unescape_entities(
+ FeedTools::XmlHelper.try_xpaths(media_group, ["media:player/@height"],
:select_result_value => true)),
- FeedTools.unescape_entities(
- try_xpaths(media_group, ["media:player/@width"],
+ FeedTools::HtmlHelper.unescape_entities(
+ FeedTools::XmlHelper.try_xpaths(media_group, ["media:player/@width"],
:select_result_value => true))
)
end
if enclosure.credits.nil? || enclosure.credits.size == 0
enclosure.credits = []
- for credit in try_xpaths_all(media_group, ["media:credit"])
- enclosure.credits << EnclosureCredit.new(
- FeedTools.unescape_entities(credit.inner_xml),
- FeedTools.unescape_entities(
+ for credit in FeedTools::XmlHelper.try_xpaths_all(media_group, ["media:credit"])
+ enclosure.credits << FeedTools::EnclosureCredit.new(
+ FeedTools::HtmlHelper.unescape_entities(credit.inner_xml),
+ FeedTools::HtmlHelper.unescape_entities(
credit.attributes["role"].to_s.downcase)
)
if enclosure.credits.last.role.blank?
enclosure.credits.last.role = nil
end
end
end
if enclosure.explicit?.nil?
- enclosure.explicit = ((try_xpaths(media_group, [
+ enclosure.explicit = ((FeedTools::XmlHelper.try_xpaths(media_group, [
"media:adult/text()"
], :select_result_value => true).downcase == "true") ?
true : false)
end
- enclosure_media_group_text = try_xpaths(media_group,
+ enclosure_media_group_text = FeedTools::XmlHelper.try_xpaths(media_group,
["media:text/text()"], :select_result_value => true)
if enclosure.text.nil? && !enclosure_media_group_text.blank?
- enclosure.text = FeedTools.sanitize_html(
- FeedTools.unescape_entities(
+ enclosure.text = FeedTools::HtmlHelper.sanitize_html(
+ FeedTools::HtmlHelper.unescape_entities(
enclosure_media_group_text), :strip)
end
end
# Keep track of the media groups
@@ -1135,31 +1050,31 @@
end
end
# Add all the itunes categories
itunes_categories =
- try_xpaths_all(self.root_node, ["itunes:category"])
+ FeedTools::XmlHelper.try_xpaths_all(self.root_node, ["itunes:category"])
for itunes_category in itunes_categories
genre = "Podcasts"
category = itunes_category.attributes["text"].to_s
subcategory =
- try_xpaths(itunes_category, ["itunes:category/@text"],
+ FeedTools::XmlHelper.try_xpaths(itunes_category, ["itunes:category/@text"],
:select_result_value => true)
category_path = genre
- if category != ""
+ if !category.blank?
category_path << "/" + category
end
- if subcategory != ""
+ if !subcategory.blank?
category_path << "/" + subcategory
end
for enclosure in @enclosures
if enclosure.categories.nil?
enclosure.categories = []
end
- enclosure.categories << FeedTools::Feed::Category.new
+ enclosure.categories << FeedTools::Category.new
enclosure.categories.last.term =
- FeedTools.unescape_entities(category_path)
+ FeedTools::HtmlHelper.unescape_entities(category_path)
enclosure.categories.last.scheme =
"http://www.apple.com/itunes/store/"
enclosure.categories.last.label =
"iTunes Music Store Categories"
end
@@ -1252,25 +1167,25 @@
end
# Returns the feed item author
def author
if @author.nil?
- @author = FeedTools::Feed::Author.new
- author_node = try_xpaths(self.root_node, [
+ @author = FeedTools::Author.new
+ author_node = FeedTools::XmlHelper.try_xpaths(self.root_node, [
"atom10:author",
"atom03:author",
"atom:author",
"author",
"managingEditor",
"dc:author",
"dc:creator",
"creator"
])
unless author_node.nil?
- @author.raw = FeedTools.unescape_entities(
- XPath.first(author_node, "text()").to_s).strip
- @author.raw = nil if @author.raw.blank?
+ @author.raw = FeedTools::XmlHelper.try_xpaths(
+ author_node, ["text()"], :select_result_value => true)
+ @author.raw = FeedTools::HtmlHelper.unescape_entities(@author.raw)
unless @author.raw.nil?
raw_scan = @author.raw.scan(
/(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
if raw_scan.nil? || raw_scan.size == 0
raw_scan = @author.raw.scan(
@@ -1300,34 +1215,34 @@
@author.name = @author.raw
end
end
end
if @author.name.blank?
- @author.name = FeedTools.unescape_entities(
- try_xpaths(author_node, [
+ @author.name = FeedTools::HtmlHelper.unescape_entities(
+ FeedTools::XmlHelper.try_xpaths(author_node, [
"atom10:name/text()",
"atom03:name/text()",
"atom:name/text()",
"name/text()",
"@name"
], :select_result_value => true)
)
end
if @author.email.blank?
- @author.email = FeedTools.unescape_entities(
- try_xpaths(author_node, [
+ @author.email = FeedTools::HtmlHelper.unescape_entities(
+ FeedTools::XmlHelper.try_xpaths(author_node, [
"atom10:email/text()",
"atom03:email/text()",
"atom:email/text()",
"email/text()",
"@email"
], :select_result_value => true)
)
end
if @author.url.blank?
- @author.url = FeedTools.unescape_entities(
- try_xpaths(author_node, [
+ @author.url = FeedTools::HtmlHelper.unescape_entities(
+ FeedTools::XmlHelper.try_xpaths(author_node, [
"atom10:url/text()",
"atom03:url/text()",
"atom:url/text()",
"url/text()",
"atom10:uri/text()",
@@ -1342,10 +1257,20 @@
end
@author.name = nil if @author.name.blank?
@author.raw = nil if @author.raw.blank?
@author.email = nil if @author.email.blank?
@author.url = nil if @author.url.blank?
+ if @author.url != nil
+ begin
+ if !(@author.url =~ /^file:/) &&
+ !FeedTools::UriHelper.is_uri?(@author.url)
+ @author.url = FeedTools::UriHelper.resolve_relative_uri(
+ @author.url, [author_node.base_uri, self.base_uri])
+ end
+ rescue
+ end
+ end
end
# Fallback on the itunes module if we didn't find an author name
begin
@author.name = self.itunes_author if @author.name.nil?
rescue
@@ -1364,24 +1289,24 @@
@author = new_author
else
# We're not looking at an author object, this is probably a string,
# default to setting the author's name.
if @author.nil?
- @author = FeedTools::Feed::Author.new
+ @author = FeedTools::Author.new
end
@author.name = new_author
end
end
# Returns the feed publisher
def publisher
if @publisher.nil?
- @publisher = FeedTools::Feed::Author.new
+ @publisher = FeedTools::Author.new
# Set the author name
- @publisher.raw = FeedTools.unescape_entities(
- try_xpaths(self.root_node, [
+ @publisher.raw = FeedTools::HtmlHelper.unescape_entities(
+ FeedTools::XmlHelper.try_xpaths(self.root_node, [
"dc:publisher/text()",
"webMaster/text()"
], :select_result_value => true))
unless @publisher.raw.blank?
raw_scan = @publisher.raw.scan(
@@ -1418,10 +1343,24 @@
@publisher.name = nil if @publisher.name.blank?
@publisher.raw = nil if @publisher.raw.blank?
@publisher.email = nil if @publisher.email.blank?
@publisher.url = nil if @publisher.url.blank?
+ if @publisher.url != nil
+ begin
+ if !(@publisher.url =~ /^file:/) &&
+ !FeedTools::UriHelper.is_uri?(@publisher.url)
+ root_base_uri = nil
+ unless self.root_node.nil?
+ root_base_uri = self.root_node.base_uri
+ end
+ @publisher.url = FeedTools::UriHelper.resolve_relative_uri(
+ @publisher.url, [root_base_uri, self.base_uri])
+ end
+ rescue
+ end
+ end
end
return @publisher
end
# Sets the feed publisher
@@ -1433,11 +1372,11 @@
@publisher = new_publisher
else
# We're not looking at an Author object, this is probably a string,
# default to setting the publisher's name.
if @publisher.nil?
- @publisher = FeedTools::Feed::Author.new
+ @publisher = FeedTools::Author.new
end
@publisher.name = new_publisher
end
end
@@ -1445,12 +1384,12 @@
#
# This inherits from any incorrectly placed channel-level itunes:author
# elements. They're actually amazingly common. People don't read specs.
def itunes_author
if @itunes_author.nil?
- @itunes_author = FeedTools.unescape_entities(
- try_xpaths(self.root_node,
+ @itunes_author = FeedTools::HtmlHelper.unescape_entities(
+ FeedTools::XmlHelper.try_xpaths(self.root_node,
["itunes:author/text()"], :select_result_value => true))
@itunes_author = feed.itunes_author if @itunes_author.blank?
end
return @itunes_author
end
@@ -1461,12 +1400,12 @@
end
# Returns the number of seconds that the associated media runs for
def itunes_duration
if @itunes_duration.nil?
- raw_duration = FeedTools.unescape_entities(
- try_xpaths(self.root_node,
+ raw_duration = FeedTools::HtmlHelper.unescape_entities(
+ FeedTools::XmlHelper.try_xpaths(self.root_node,
["itunes:duration/text()"], :select_result_value => true))
if !raw_duration.blank?
hms = raw_duration.split(":").map { |x| x.to_i }
if hms.size == 3
@itunes_duration = hms[0].hours + hms[1].minutes + hms[2]
@@ -1485,34 +1424,35 @@
@itunes_duration = new_itunes_duration
end
# Returns the feed item time
def time(options = {})
- validate_options([ :estimate_timestamp ],
+ FeedTools::GenericHelper.validate_options([ :estimate_timestamp ],
options.keys)
options = { :estimate_timestamp => true }.merge(options)
if @time.nil?
- time_string = try_xpaths(self.root_node, [
+ time_string = FeedTools::XmlHelper.try_xpaths(self.root_node, [
"atom10:updated/text()",
"atom03:updated/text()",
"atom:updated/text()",
"updated/text()",
"atom10:modified/text()",
"atom03:modified/text()",
"atom:modified/text()",
"modified/text()",
"time/text()",
+ "lastBuildDate/text()",
"atom10:issued/text()",
"atom03:issued/text()",
"atom:issued/text()",
"issued/text()",
"atom10:published/text()",
"atom03:published/text()",
"atom:published/text()",
"published/text()",
- "pubDate/text()",
"dc:date/text()",
+ "pubDate/text()",
"date/text()"
], :select_result_value => true)
begin
if !time_string.blank?
@time = Time.parse(time_string).gmtime
@@ -1601,19 +1541,20 @@
private :prev_time
# Returns the feed item updated time
def updated
if @updated.nil?
- updated_string = try_xpaths(self.root_node, [
+ updated_string = FeedTools::XmlHelper.try_xpaths(self.root_node, [
"atom10:updated/text()",
"atom03:updated/text()",
"atom:updated/text()",
"updated/text()",
"atom10:modified/text()",
"atom03:modified/text()",
"atom:modified/text()",
- "modified/text()"
+ "modified/text()",
+ "lastBuildDate/text()"
], :select_result_value => true)
if !updated_string.blank?
@updated = Time.parse(updated_string).gmtime rescue nil
else
@updated = nil
@@ -1628,44 +1569,56 @@
end
# Returns the feed item published time
def published
if @published.nil?
- published_string = try_xpaths(self.root_node, [
+ published_string = FeedTools::XmlHelper.try_xpaths(self.root_node, [
"atom10:issued/text()",
"atom03:issued/text()",
"atom:issued/text()",
"issued/text()",
"atom10:published/text()",
"atom03:published/text()",
"atom:published/text()",
"published/text()",
- "pubDate/text()",
"dc:date/text()",
+ "pubDate/text()",
"date/text()"
], :select_result_value => true)
if !published_string.blank?
- @issued = Time.parse(published_string).gmtime rescue nil
+ @published = Time.parse(published_string).gmtime rescue nil
else
- @issued = nil
+ @published = nil
end
end
- return @issued
+ return @published
end
# Sets the feed item published time
def published=(new_published)
@published = new_published
end
# Returns the url for posting comments
def comments
if @comments.nil?
- @comments = try_xpaths(self.root_node, ["comments/text()"],
+ @comments = FeedTools::XmlHelper.try_xpaths(self.root_node, ["comments/text()"],
:select_result_value => true)
+ begin
+ if !(@comments =~ /^file:/) &&
+ !FeedTools::UriHelper.is_uri?(@comments)
+ root_base_uri = nil
+ unless self.root_node.nil?
+ root_base_uri = self.root_node.base_uri
+ end
+ @comments = FeedTools::UriHelper.resolve_relative_uri(
+ @comments, [root_base_uri, self.base_uri])
+ end
+ rescue
+ end
if FeedTools.configurations[:url_normalization_enabled]
- @comments = FeedTools.normalize_url(@comments)
+ @comments = FeedTools::UriHelper.normalize_url(@comments)
end
end
return @comments
end
@@ -1675,14 +1628,16 @@
end
# The source that this post was based on
def source
if @source.nil?
- @source = FeedTools::Feed::Link.new
- @source.url = try_xpaths(self.root_node, ["source/@url"],
+ @source = FeedTools::Link.new
+ @source.href = FeedTools::XmlHelper.try_xpaths(
+ self.root_node, ["source/@url"],
:select_result_value => true)
- @source.value = try_xpaths(self.root_node, ["source/text()"],
+ @source.title = FeedTools::XmlHelper.try_xpaths(
+ self.root_node, ["source/text()"],
:select_result_value => true)
end
return @source
end
@@ -1695,11 +1650,11 @@
if root_node.nil?
return @tags
end
if @tags.nil? || @tags.size == 0
@tags = []
- tag_list = try_xpaths_all(self.root_node,
+ tag_list = FeedTools::XmlHelper.try_xpaths_all(self.root_node,
["dc:subject/rdf:Bag/rdf:li/text()"],
:select_result_value => true)
if tag_list != nil && tag_list.size > 0
for tag in tag_list
@tags << tag.downcase.strip
@@ -1707,16 +1662,16 @@
end
end
if @tags.nil? || @tags.size == 0
# messy effort to find ourselves some tags, mainly for del.icio.us
@tags = []
- rdf_bag = try_xpaths_all(self.root_node,
+ rdf_bag = FeedTools::XmlHelper.try_xpaths_all(self.root_node,
["taxo:topics/rdf:Bag/rdf:li"])
if rdf_bag != nil && rdf_bag.size > 0
for tag_node in rdf_bag
begin
- tag_url = try_xpaths(tag_node, ["@resource"],
+ tag_url = FeedTools::XmlHelper.try_xpaths(tag_node, ["@resource"],
:select_result_value => true)
tag_match = tag_url.scan(/\/(tag|tags)\/(\w+)$/)
if tag_match.size > 0
@tags << tag_match.first.last.downcase.strip
end
@@ -1725,27 +1680,27 @@
end
end
end
if @tags.nil? || @tags.size == 0
@tags = []
- tag_list = try_xpaths_all(self.root_node, ["category/text()"],
+ tag_list = FeedTools::XmlHelper.try_xpaths_all(self.root_node, ["category/text()"],
:select_result_value => true)
for tag in tag_list
@tags << tag.to_s.downcase.strip
end
end
if @tags.nil? || @tags.size == 0
@tags = []
- tag_list = try_xpaths_all(self.root_node, ["dc:subject/text()"],
+ tag_list = FeedTools::XmlHelper.try_xpaths_all(self.root_node, ["dc:subject/text()"],
:select_result_value => true)
for tag in tag_list
@tags << tag.to_s.downcase.strip
end
end
if @tags.blank?
begin
- itunes_keywords_string = try_xpaths(self.root_node, [
+ itunes_keywords_string = FeedTools::XmlHelper.try_xpaths(self.root_node, [
"itunes:keywords/text()"
], :select_result_value => true)
unless itunes_keywords_string.blank?
@tags = itunes_keywords_string.downcase.split(",")
if @tags.size == 1
@@ -1777,11 +1732,11 @@
# Returns true if this feed item contains explicit material. If the whole
# feed has been marked as explicit, this will return true even if the item
# isn't explicitly marked as explicit.
def explicit?
if @explicit.nil?
- explicit_string = try_xpaths(self.root_node, [
+ explicit_string = FeedTools::XmlHelper.try_xpaths(self.root_node, [
"media:adult/text()",
"itunes:explicit/text()"
], :select_result_value => true)
if explicit_string == "true" || explicit_string == "yes" ||
feed.explicit?
@@ -1817,36 +1772,43 @@
# RDF-based rss format
if link.nil?
raise "Cannot generate an rdf-based feed item with a nil link field."
end
return xml_builder.item("rdf:about" =>
- FeedTools.escape_entities(link)) do
- unless title.nil? || title == ""
- xml_builder.title(title)
+ FeedTools::HtmlHelper.escape_entities(link)) do
+ unless self.title.blank?
+ xml_builder.title(FeedTools::HtmlHelper.strip_html_tags(self.title))
else
xml_builder.title
end
- unless link.nil? || link == ""
- xml_builder.link(link)
+ unless self.link.blank?
+ xml_builder.link(self.link)
else
xml_builder.link
end
- unless description.nil? || description == ""
- xml_builder.description(description)
+ unless self.author.nil? || self.author.name.nil?
+ xml_builder.tag!("dc:creator", self.author.name)
+ end
+ unless self.summary.blank?
+ xml_builder.description(self.summary)
else
xml_builder.description
end
+ unless self.content.blank?
+ xml_builder.tag!("content:encoded") do
+ xml_builder.cdata!(self.content)
+ end
+ end
unless time.nil?
xml_builder.tag!("dc:date", time.iso8601)
end
+ unless self.rights.blank?
+ xml_builder.tag!("dc:rights", self.rights)
+ end
unless tags.nil? || tags.size == 0
- xml_builder.tag!("taxo:topics") do
- xml_builder.tag!("rdf:Bag") do
- for tag in tags
- xml_builder.tag!("rdf:li", tag)
- end
- end
+ for tag in tags
+ xml_builder.tag!("dc:subject", tag)
end
if self.feed.podcast?
xml_builder.tag!("itunes:keywords", tags.join(", "))
end
end
@@ -1854,39 +1816,48 @@
end
elsif feed_type == "rss"
# normal rss format
return xml_builder.item do
unless self.title.blank?
- xml_builder.title(self.title)
+ xml_builder.title(FeedTools::HtmlHelper.strip_html_tags(self.title))
end
unless self.link.blank?
xml_builder.link(link)
end
- unless self.description.blank?
- xml_builder.description(self.description)
+ unless self.author.nil? || self.author.name.nil?
+ xml_builder.tag!("dc:creator", self.author.name)
end
- unless self.time.nil?
+ unless self.summary.blank?
+ xml_builder.description(self.summary)
+ end
+ unless self.content.blank?
+ xml_builder.tag!("content:encoded") do
+ xml_builder.cdata!(self.content)
+ end
+ end
+ if !self.published.nil?
+ xml_builder.pubDate(self.published.rfc822)
+ elsif !self.time.nil?
xml_builder.pubDate(self.time.rfc822)
end
+ unless self.copyright.blank?
+ xml_builder.tag!("dc:rights", self.copyright)
+ end
unless self.guid.blank?
- if FeedTools.is_uri?(self.guid)
+ if FeedTools::UriHelper.is_uri?(self.guid) && (self.guid =~ /^http/)
xml_builder.guid(self.guid, "isPermaLink" => "true")
else
xml_builder.guid(self.guid, "isPermaLink" => "false")
end
else
unless self.link.blank?
xml_builder.guid(self.link, "isPermaLink" => "true")
end
end
unless tags.nil? || tags.size == 0
- xml_builder.tag!("taxo:topics") do
- xml_builder.tag!("rdf:Bag") do
- for tag in tags
- xml_builder.tag!("rdf:li", tag)
- end
- end
+ for tag in tags
+ xml_builder.tag!("category", tag)
end
if self.feed.podcast?
xml_builder.tag!("itunes:keywords", tags.join(", "))
end
end
@@ -1909,11 +1880,11 @@
end
end
end
rescue
end
- attribute_hash["url"] = FeedTools.normalize_url(enclosure.url)
+ attribute_hash["url"] = FeedTools::UriHelper.normalize_url(enclosure.url)
if enclosure.type != nil
attribute_hash["type"] = enclosure.type
end
if enclosure.file_size != nil && enclosure.file_size.to_i > 0
attribute_hash["length"] = enclosure.file_size.to_s
@@ -1934,11 +1905,12 @@
elsif feed_type == "atom" && version == 1.0
# normal atom format
return xml_builder.entry("xmlns" =>
FEED_TOOLS_NAMESPACES['atom10']) do
unless title.nil? || title == ""
- xml_builder.title(title,
+ xml_builder.title(
+ FeedTools::HtmlHelper.strip_html_tags(self.title),
"type" => "html")
end
xml_builder.author do
unless self.author.nil? || self.author.name.nil?
xml_builder.name(self.author.name)
@@ -1951,14 +1923,14 @@
unless self.author.nil? || self.author.url.nil?
xml_builder.uri(self.author.url)
end
end
unless link.nil? || link == ""
- xml_builder.link("href" => FeedTools.escape_entities(self.link),
- "rel" => "alternate",
- "type" => "text/html",
- "title" => FeedTools.escape_entities(title))
+ xml_builder.link(
+ "href" =>
+ FeedTools::HtmlHelper.escape_entities(self.link),
+ "rel" => "alternate")
end
if !self.content.blank?
xml_builder.content(self.content,
"type" => "html")
end
@@ -1976,25 +1948,28 @@
xml_builder.updated(Time.now.gmtime.iso8601)
end
unless self.published.nil?
xml_builder.published(self.published.iso8601)
end
+ unless self.rights.blank?
+ xml_builder.rights(self.rights)
+ end
if self.id != nil
- unless FeedTools.is_uri? self.id
+ unless FeedTools::UriHelper.is_uri? self.id
if self.time != nil && self.link != nil
- xml_builder.id(FeedTools.build_tag_uri(self.link, self.time))
+ xml_builder.id(FeedTools::UriHelper.build_tag_uri(self.link, self.time))
elsif self.link != nil
xml_builder.id(FeedTools.build_urn_uuid_uri(self.link))
else
raise "The unique id must be a URI. " +
"(Attempted to generate id, but failed.)"
end
else
xml_builder.id(self.id)
end
elsif self.time != nil && self.link != nil
- xml_builder.id(FeedTools.build_tag_uri(self.link, self.time))
+ xml_builder.id(FeedTools::UriHelper.build_tag_uri(self.link, self.time))
else
raise "Cannot build feed, missing feed unique id."
end
unless self.tags.nil? || self.tags.size == 0
for tag in self.tags
@@ -2004,11 +1979,11 @@
unless self.enclosures.blank? || self.enclosures.size == 0
for enclosure in self.enclosures
attribute_hash = {}
next if enclosure.url.blank?
attribute_hash["rel"] = "enclosure"
- attribute_hash["href"] = FeedTools.normalize_url(enclosure.url)
+ attribute_hash["href"] = FeedTools::UriHelper.normalize_url(enclosure.url)
if enclosure.type != nil
attribute_hash["type"] = enclosure.type
end
if enclosure.file_size != nil && enclosure.file_size.to_i > 0
attribute_hash["length"] = enclosure.file_size.to_s
@@ -2021,13 +1996,15 @@
else
raise "Unsupported feed format/version."
end
end
- alias_method :abstract, :content
- alias_method :abstract=, :content=
- alias_method :description, :content
- alias_method :description=, :content=
+ alias_method :abstract, :summary
+ alias_method :abstract=, :summary=
+ alias_method :description, :summary
+ alias_method :description=, :summary=
+ alias_method :copyright, :rights
+ alias_method :copyright=, :rights=
alias_method :guid, :id
alias_method :guid=, :id=
# Returns a simple representation of the feed item object's state.
def inspect