#-- # Copyright (c) 2005 Robert Aman # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the # "Software"), to deal in the Software without restriction, including # without limitation the rights to use, copy, modify, merge, publish, # distribute, sublicense, and/or sell copies of the Software, and to # permit persons to whom the Software is furnished to do so, subject to # the following conditions: # # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #++ require 'rexml/document' require 'feed_tools/feed_item' require 'feed_tools/feed_structures' require 'feed_tools/helpers/retrieval_helper' require 'feed_tools/helpers/generic_helper' require 'feed_tools/helpers/xml_helper' require 'feed_tools/helpers/html_helper' module FeedTools # The FeedTools::Feed class represents a web feed's structure. class Feed # Initialize the feed object def initialize super @cache_object = nil @http_headers = nil @xml_document = nil @feed_data = nil @feed_data_type = :xml @root_node = nil @channel_node = nil @href = nil @id = nil @title = nil @description = nil @link = nil @last_retrieved = nil @time_to_live = nil @entries = nil @live = false @encoding = nil @options = nil end # Loads the feed specified by the url, pulling the data from the # cache if it hasn't expired. Options supplied will override the # default options. def Feed.open(url, options={}) FeedTools::GenericHelper.validate_options( FeedTools.configurations.keys, options.keys) # create the new feed feed = FeedTools::Feed.new feed.configurations = FeedTools.configurations.merge(options) if feed.configurations[:feed_cache] != nil && FeedTools.feed_cache.nil? raise(ArgumentError, "There is currently no caching mechanism set. " + "Cannot retrieve cached feeds.") end # clean up the url url = FeedTools::UriHelper.normalize_url(url) # load the new feed feed.href = url feed.update! unless feed.configurations[:disable_update_from_remote] return feed end # Returns the load options for this feed. def configurations if @configurations.blank? @configurations = FeedTools.configurations.dup end return @configurations end # Sets the load options for this feed. def configurations=(new_configurations) @configurations = new_configurations end # Loads the feed from the remote url if the feed has expired from the # cache or cannot be retrieved from the cache for some reason. def update! if self.configurations[:disable_update_from_remote] # Don't do anything if this option is set return end if !FeedTools.feed_cache.nil? && !FeedTools.feed_cache.set_up_correctly? raise "Your feed cache system is incorrectly set up. " + "Please see the documentation for more information." end if self.http_headers.blank? && !(self.cache_object.nil?) && !(self.cache_object.http_headers.nil?) @http_headers = YAML.load(self.cache_object.http_headers) @http_headers = {} unless @http_headers.kind_of? Hash elsif self.http_headers.blank? @http_headers = {} end if self.expired? == false @live = false else load_remote_feed! # Handle autodiscovery if self.http_headers['content-type'] =~ /text\/html/ || self.http_headers['content-type'] =~ /application\/xhtml\+xml/ autodiscovered_url = nil autodiscovered_url = FeedTools::HtmlHelper.extract_link_by_mime_type(self.feed_data, "application/atom+xml") if autodiscovered_url.nil? autodiscovered_url = FeedTools::HtmlHelper.extract_link_by_mime_type(self.feed_data, "application/rss+xml") end if autodiscovered_url.nil? autodiscovered_url = FeedTools::HtmlHelper.extract_link_by_mime_type(self.feed_data, "application/rdf+xml") end unless autodiscovered_url.nil? begin autodiscovered_url = FeedTools::UriHelper.resolve_relative_uri( autodiscovered_url, [self.href]) rescue Exception end self.feed_data = nil self.href = autodiscovered_url if FeedTools.feed_cache.nil? self.cache_object = nil else self.cache_object = FeedTools.feed_cache.find_by_href(autodiscovered_url) end self.update! end end end end # Attempts to load the feed from the remote location. Requires the url # field to be set. If an etag or the last_modified date has been set, # attempts to use them to prevent unnecessary reloading of identical # content. def load_remote_feed! @live = true if self.http_headers.nil? && !(self.cache_object.nil?) && !(self.cache_object.http_headers.nil?) @http_headers = YAML.load(self.cache_object.http_headers) end if (self.href =~ /^feed:/) == 0 # Woah, Nelly, how'd that happen? You should've already been # corrected. So let's fix that url. And please, # just use less crappy browsers instead of badly defined # pseudo-protocol hacks. self.href = FeedTools::UriHelper.normalize_url(self.href) end # Find out what method we're going to be using to obtain this feed. begin uri = URI.parse(self.href) rescue URI::InvalidURIError raise FeedAccessError, "Cannot retrieve feed using invalid URL: " + self.href.to_s end retrieval_method = "http" case uri.scheme when "http" retrieval_method = "http" when "ftp" retrieval_method = "ftp" when "file" retrieval_method = "file" when nil raise FeedAccessError, "No protocol was specified in the url." else raise FeedAccessError, "Cannot retrieve feed using unrecognized protocol: " + uri.scheme end # No need for http headers unless we're actually doing http if retrieval_method == "http" begin @http_response = (FeedTools::RetrievalHelper.http_get( self.href, :feed_object => self) do |url, response| # Find out if we've already seen the url we've been # redirected to. follow_redirect = true begin cached_feed = FeedTools::Feed.open(url, :disable_update_from_remote => true) if cached_feed.cache_object != nil && cached_feed.cache_object.new_record? != true if !cached_feed.expired? && !cached_feed.http_headers.blank? # Copy the cached state self.href = cached_feed.href @feed_data = cached_feed.feed_data @feed_data_type = cached_feed.feed_data_type if @feed_data.blank? raise "Invalid cache data." end @title = nil; self.title @link = nil; self.link self.last_retrieved = cached_feed.last_retrieved self.http_headers = cached_feed.http_headers self.cache_object = cached_feed.cache_object @live = false follow_redirect = false end end rescue # If anything goes wrong, ignore it. end follow_redirect end) case @http_response when Net::HTTPSuccess @feed_data = self.http_response.body @http_headers = {} self.http_response.each_header do |key, value| self.http_headers[key.downcase] = value end self.last_retrieved = Time.now.gmtime @live = true when Net::HTTPNotModified @http_headers = {} self.http_response.each_header do |key, value| self.http_headers[key.downcase] = value end self.last_retrieved = Time.now.gmtime @live = true else @live = false end rescue Exception => error @live = false if self.feed_data.nil? raise error end end elsif retrieval_method == "https" # Not supported... yet elsif retrieval_method == "ftp" # Not supported... yet # Technically, CDF feeds are supposed to be able to be accessed # directly from an ftp server. This is silly, but we'll humor # Microsoft. # # Eventually. If they're lucky. And someone demands it. elsif retrieval_method == "file" # Now that we've gone to all that trouble to ensure the url begins # with 'file://', strip the 'file://' off the front of the url. file_name = self.href.gsub(/^file:\/\//, "") if RUBY_PLATFORM =~ /mswin/ file_name = file_name[1..-1] if file_name[0..0] == "/" end begin open(file_name) do |file| @http_response = nil @http_headers = {} @feed_data = file.read @feed_data_type = :xml self.last_retrieved = Time.now.gmtime end rescue @live = false # In this case, pulling from the cache is probably not going # to help at all, and the use should probably be immediately # appraised of the problem. Raise the exception. raise end end unless self.cache_object.nil? begin self.save rescue end end end # Returns the relevant information from an http request. def http_response return @http_response end # Returns a hash of the http headers from the response. def http_headers if @http_headers.blank? if !self.cache_object.nil? && !self.cache_object.http_headers.nil? @http_headers = YAML.load(self.cache_object.http_headers) @http_headers = {} unless @http_headers.kind_of? Hash else @http_headers = {} end end return @http_headers end # Returns the encoding that the feed was parsed with def encoding if @encoding.nil? unless self.http_headers.blank? @encoding = "utf-8" else @encoding = self.encoding_from_feed_data end end return @encoding end # Returns the encoding of feed calculated only from the xml data. # I.e., the encoding we would come up with if we ignore RFC 3023. def encoding_from_feed_data if @encoding_from_feed_data.nil? raw_data = self.feed_data encoding_from_xml_instruct = raw_data.scan( /^<\?xml [^>]*encoding="([\w]*)"[^>]*\?>/ ).flatten.first unless encoding_from_xml_instruct.blank? encoding_from_xml_instruct.downcase! end if encoding_from_xml_instruct.blank? doc = REXML::Document.new(raw_data) encoding_from_xml_instruct = doc.encoding.downcase if encoding_from_xml_instruct == "utf-8" # REXML has a tendency to report utf-8 overzealously, take with # grain of salt encoding_from_xml_instruct = nil end else @encoding_from_feed_data = encoding_from_xml_instruct end if encoding_from_xml_instruct.blank? sniff_table = { "Lo\247\224" => "ebcdic-cp-us", " "utf-8" } sniff = self.feed_data[0..3] if sniff_table[sniff] != nil @encoding_from_feed_data = sniff_table[sniff].downcase end else @encoding_from_feed_data = encoding_from_xml_instruct end if @encoding_from_feed_data.blank? # Safest assumption @encoding_from_feed_data = "utf-8" end end return @encoding_from_feed_data end # Returns the feed's raw data. def feed_data if @feed_data.nil? unless self.cache_object.nil? @feed_data = self.cache_object.feed_data end end return @feed_data end # Sets the feed's data. def feed_data=(new_feed_data) for var in self.instance_variables self.instance_variable_set(var, nil) end @http_headers = {} @feed_data = new_feed_data unless self.cache_object.nil? self.cache_object.feed_data = new_feed_data end end # Returns the feed's raw data as utf-8. def feed_data_utf_8(force_encoding=nil) if @feed_data_utf_8.nil? raw_data = self.feed_data if force_encoding.nil? use_encoding = self.encoding else use_encoding = force_encoding end if use_encoding != "utf-8" begin @feed_data_utf_8 = Iconv.new('utf-8', use_encoding).iconv(raw_data) rescue return raw_data end else return self.feed_data end end return @feed_data_utf_8 end # Returns the data type of the feed # Possible values: # * :xml # * :yaml # * :text def feed_data_type if @feed_data_type.nil? # Right now, nothing else is supported @feed_data_type = :xml end return @feed_data_type end # Sets the feed's data type. def feed_data_type=(new_feed_data_type) @feed_data_type = new_feed_data_type unless self.cache_object.nil? self.cache_object.feed_data_type = new_feed_data_type end end # Returns a REXML Document of the feed_data def xml_document if self.feed_data_type != :xml @xml_document = nil else if @xml_document.nil? begin begin @xml_document = REXML::Document.new(self.feed_data_utf_8) rescue Object # Something failed, attempt to repair the xml with htree. @xml_document = HTree.parse(self.feed_data_utf_8).to_rexml end rescue Object @xml_document = nil raise end end end return @xml_document end # Returns the first node within the channel_node that matches the xpath # query. def find_node(xpath, select_result_value=false) if self.feed_data_type != :xml raise "The feed data type is not xml." end return FeedTools::XmlHelper.try_xpaths(self.channel_node, [xpath], :select_result_value => select_result_value) end # Returns all nodes within the channel_node that match the xpath query. def find_all_nodes(xpath, select_result_value=false) if self.feed_data_type != :xml raise "The feed data type is not xml." end return FeedTools::XmlHelper.try_xpaths_all(self.channel_node, [xpath], :select_result_value => select_result_value) end # Returns the root node of the feed. def root_node if @root_node.nil? # TODO: Fix this so that added content at the end of the file doesn't # break this stuff. # E.g.: http://smogzer.tripod.com/smog.rdf # =================================================================== begin if self.xml_document.nil? return nil else @root_node = self.xml_document.root end rescue return nil end end return @root_node end # Returns the channel node of the feed. def channel_node if @channel_node.nil? && self.root_node != nil @channel_node = FeedTools::XmlHelper.try_xpaths(self.root_node, [ "channel", "CHANNEL", "feedinfo" ]) if @channel_node == nil @channel_node = self.root_node end end return @channel_node end # The cache object that handles the feed persistence. def cache_object if !@href.nil? && @href =~ /^file:\/\// return nil end unless FeedTools.feed_cache.nil? if @cache_object.nil? begin if @href != nil begin @cache_object = FeedTools.feed_cache.find_by_href(@href) rescue warn("The feed cache seems to be having trouble with the " + "find_by_href method. This may cause unexpected results.") end end if @cache_object.nil? @cache_object = FeedTools.feed_cache.new end rescue end end end return @cache_object end # Sets the cache object for this feed. # # This can be any object, but it must accept the following messages: # href # href= # title # title= # link # link= # feed_data # feed_data= # feed_data_type # feed_data_type= # etag # etag= # last_modified # last_modified= # save def cache_object=(new_cache_object) @cache_object = new_cache_object end # Returns the type of feed # Possible values: # "rss", "atom", "cdf", "!okay/news" def feed_type if @feed_type.nil? if self.root_node.nil? return nil end case self.root_node.name.downcase when "feed" @feed_type = "atom" when "rdf:rdf" @feed_type = "rss" when "rdf" @feed_type = "rss" when "rss" @feed_type = "rss" when "channel" if self.root_node.namespace == FEED_TOOLS_NAMESPACES['rss11'] @feed_type = "rss" else @feed_type = "cdf" end end end return @feed_type end # Sets the default feed type def feed_type=(new_feed_type) @feed_type = new_feed_type end # Returns the version number of the feed type. # Intentionally does not differentiate between the Netscape and Userland # versions of RSS 0.91. def feed_version if @feed_version.nil? if self.root_node.nil? return nil end version = nil begin version_string = FeedTools::XmlHelper.try_xpaths(self.root_node, [ "@version" ], :select_result_value => true) unless version_string.nil? version = version_string.to_f end rescue end version = nil if version == 0.0 default_namespace = FeedTools::XmlHelper.try_xpaths(self.root_node, [ "@xmlns" ], :select_result_value => true) case self.feed_type when "atom" if default_namespace == FEED_TOOLS_NAMESPACES['atom10'] @feed_version = 1.0 elsif version != nil @feed_version = version elsif default_namespace == FEED_TOOLS_NAMESPACES['atom03'] @feed_version = 0.3 end when "rss" if default_namespace == FEED_TOOLS_NAMESPACES['rss09'] @feed_version = 0.9 elsif default_namespace == FEED_TOOLS_NAMESPACES['rss10'] @feed_version = 1.0 elsif default_namespace == FEED_TOOLS_NAMESPACES['rss11'] @feed_version = 1.1 elsif version != nil case version when 2.1 @feed_version = 2.0 when 2.01 @feed_version = 2.0 else @feed_version = version end end when "cdf" @feed_version = 0.4 when "!okay/news" @feed_version = 1.0 end end return @feed_version end # Sets the default feed version def feed_version=(new_feed_version) @feed_version = new_feed_version end # Returns the feed's unique id def id if @id.nil? @id = FeedTools::XmlHelper.select_not_blank([ FeedTools::XmlHelper.try_xpaths(self.channel_node, [ "atom10:id/text()", "atom03:id/text()", "atom:id/text()", "id/text()", "guid/text()" ], :select_result_value => true), FeedTools::XmlHelper.try_xpaths(self.root_node, [ "atom10:id/text()", "atom03:id/text()", "atom:id/text()", "id/text()", "guid/text()" ], :select_result_value => true) ]) end return @id end # Sets the feed's unique id def id=(new_id) @id = new_id end # Returns the feed url. def href if @href_overridden != true || @href.nil? original_href = @href override_href = lambda do |current_href| begin if current_href.nil? && self.feed_data != nil # The current url is nil and we have feed data to go on true elsif current_href != nil && !(["http", "https"].include?( URI.parse(current_href.to_s).scheme)) if self.feed_data != nil # The current url is set, but isn't a http/https url and # we have feed data to use to replace the current url with true else # The current url is set, but isn't a http/https url but # we don't have feed data to use to replace the current url # with so we'll have to wait until we do false end else # The current url is set to an http/https url and there's # no compelling reason to override it false end rescue # Something went wrong, so we should err on the side of caution # and attempt to override the url true end end if override_href.call(@href) && self.feed_data != nil # rdf:about is ordered last because a lot of people put the url to # the feed inside it instead of a link to their blog. # Ordering it last gives them as many chances as humanly possible # for them to redeem themselves. If the link turns out to be the # same as the blog link, it will be reset to the original value. for link_object in self.links if link_object.rel == 'self' if link_object.href != self.link @href = link_object.href @href_overridden = true return @href end end end @href = FeedTools::XmlHelper.try_xpaths(self.channel_node, [ "admin:feed/@rdf:resource", "admin:feed/@resource", "feed/@rdf:resource", "feed/@resource", "@rdf:about", "@about" ], :select_result_value => true) do |result| override_href.call(FeedTools::UriHelper.normalize_url(result)) end begin if !(@href =~ /^file:/) && !FeedTools::UriHelper.is_uri?(@href) @href = FeedTools::UriHelper.resolve_relative_uri( @href, [self.base_uri]) end rescue end if self.configurations[:url_normalization_enabled] @href = FeedTools::UriHelper.normalize_url(@href) end @href.strip! unless @href.nil? @href = nil if @href.blank? @href_overridden = true if @href == nil @href = original_href @href_overridden = false end if @href == self.link @href = original_href @href_overridden = false end end end return @href end # Sets the feed url and prepares the cache_object if necessary. def href=(new_href) @href = FeedTools::UriHelper.normalize_url(new_href) self.cache_object.href = new_href unless self.cache_object.nil? end # Returns the feed title def title if @title.nil? repair_entities = false title_node = FeedTools::XmlHelper.try_xpaths(self.channel_node, [ "atom10:title", "atom03:title", "atom:title", "title", "dc:title", "channelTitle" ]) @title = FeedTools::HtmlHelper.process_text_construct(title_node, self.feed_type, self.feed_version) if self.feed_type == "atom" || self.configurations[:always_strip_wrapper_elements] @title = FeedTools::HtmlHelper.strip_wrapper_element(@title) end @title = nil if @title.blank? self.cache_object.title = @title unless self.cache_object.nil? end return @title end # Sets the feed title def title=(new_title) @title = new_title self.cache_object.title = new_title unless self.cache_object.nil? end # Returns the feed subtitle def subtitle if @subtitle.nil? subtitle_node = FeedTools::XmlHelper.try_xpaths(self.channel_node, [ "atom10:subtitle", "subtitle", "atom03:tagline", "tagline", "description", "summary", "abstract", "content:encoded", "encoded", "content", "xhtml:body", "body", "xhtml:div", "div", "p:payload", "payload", "channelDescription", "blurb", "info" ]) @subtitle = FeedTools::HtmlHelper.process_text_construct( subtitle_node, self.feed_type, self.feed_version) if self.feed_type == "atom" || self.configurations[:always_strip_wrapper_elements] @subtitle = FeedTools::HtmlHelper.strip_wrapper_element(@subtitle) end if @subtitle.blank? @subtitle = self.itunes_summary end if @subtitle.blank? @subtitle = self.itunes_subtitle end end return @subtitle end # Sets the feed subtitle def subtitle=(new_subtitle) @subtitle = new_subtitle end # Returns the contents of the itunes:summary element def itunes_summary if @itunes_summary.nil? @itunes_summary = FeedTools::XmlHelper.select_not_blank([ FeedTools::XmlHelper.try_xpaths(self.channel_node, [ "itunes:summary/text()" ], :select_result_value => true), FeedTools::XmlHelper.try_xpaths(self.root_node, [ "itunes:summary/text()" ], :select_result_value => true) ]) unless @itunes_summary.blank? @itunes_summary = FeedTools::HtmlHelper.unescape_entities(@itunes_summary) @itunes_summary = FeedTools::HtmlHelper.sanitize_html(@itunes_summary) @itunes_summary.strip! else @itunes_summary = nil end end return @itunes_summary end # Sets the contents of the itunes:summary element def itunes_summary=(new_itunes_summary) @itunes_summary = new_itunes_summary end # Returns the contents of the itunes:subtitle element def itunes_subtitle if @itunes_subtitle.nil? @itunes_subtitle = FeedTools::XmlHelper.select_not_blank([ FeedTools::XmlHelper.try_xpaths(self.channel_node, [ "itunes:subtitle/text()" ], :select_result_value => true), FeedTools::XmlHelper.try_xpaths(self.root_node, [ "itunes:subtitle/text()" ], :select_result_value => true) ]) unless @itunes_subtitle.blank? @itunes_subtitle = FeedTools::HtmlHelper.unescape_entities(@itunes_subtitle) @itunes_subtitle = FeedTools::HtmlHelper.sanitize_html(@itunes_subtitle) @itunes_subtitle.strip! else @itunes_subtitle = nil end end return @itunes_subtitle end # Sets the contents of the itunes:subtitle element def itunes_subtitle=(new_itunes_subtitle) @itunes_subtitle = new_itunes_subtitle end # Returns the contents of the media:text element def media_text if @media_text.nil? @media_text = FeedTools::XmlHelper.select_not_blank([ FeedTools::XmlHelper.try_xpaths(self.channel_node, [ "media:text/text()" ], :select_result_value => true), FeedTools::XmlHelper.try_xpaths(self.root_node, [ "media:text/text()" ], :select_result_value => true) ]) unless @media_text.blank? @media_text = FeedTools::HtmlHelper.unescape_entities(@media_text) @media_text = FeedTools::HtmlHelper.sanitize_html(@media_text) @media_text.strip! else @media_text = nil end end return @media_text end # Sets the contents of the media:text element def media_text=(new_media_text) @media_text = new_media_text end # Returns the feed link def link if @link.nil? max_score = 0 for link_object in self.links.reverse score = 0 if FeedTools::HtmlHelper.html_type?(link_object.type) score = score + 2 elsif link_object.type != nil score = score - 1 end if FeedTools::HtmlHelper.xml_type?(link_object.type) score = score + 1 end if link_object.rel == "alternate" score = score + 1 end if link_object.rel == "self" score = score - 1 end if score >= max_score max_score = score @link = link_object.href end end if @link.blank? @link = FeedTools::XmlHelper.try_xpaths(self.channel_node, [ "@href", "@rdf:about", "@about" ], :select_result_value => true) end if @link.blank? if FeedTools::UriHelper.is_uri?(self.id) && (self.id =~ /^http/) @link = self.id end end if !@link.blank? @link = FeedTools::HtmlHelper.unescape_entities(@link) end @link = nil if @link.blank? begin if !(@link =~ /^file:/) && !FeedTools::UriHelper.is_uri?(@link) channel_base_uri = nil unless self.channel_node.nil? channel_base_uri = self.channel_node.base_uri end @link = FeedTools::UriHelper.resolve_relative_uri( @link, [channel_base_uri, self.base_uri]) end rescue end if self.configurations[:url_normalization_enabled] @link = FeedTools::UriHelper.normalize_url(@link) end unless self.cache_object.nil? self.cache_object.link = @link end end return @link end # Sets the feed link def link=(new_link) @link = new_link unless self.cache_object.nil? self.cache_object.link = new_link end end # Returns the links collection def links if @links.blank? @links = [] link_nodes = FeedTools::XmlHelper.combine_xpaths_all(self.channel_node, [ "atom10:link", "atom03:link", "atom:link", "link", "channelLink", "a", "url", "href" ]) for link_node in link_nodes link_object = FeedTools::Link.new link_object.href = FeedTools::XmlHelper.try_xpaths(link_node, [ "@atom10:href", "@atom03:href", "@atom:href", "@href", "text()" ], :select_result_value => true) if link_object.href.nil? && link_node.base_uri != nil link_object.href = "" end begin if !(link_object.href =~ /^file:/) && !FeedTools::UriHelper.is_uri?(link_object.href) link_object.href = FeedTools::UriHelper.resolve_relative_uri( link_object.href, [link_node.base_uri, self.base_uri]) end rescue end if self.configurations[:url_normalization_enabled] link_object.href = FeedTools::UriHelper.normalize_url(link_object.href) end link_object.href.strip! unless link_object.href.nil? next if link_object.href.blank? link_object.hreflang = FeedTools::XmlHelper.try_xpaths(link_node, [ "@atom10:hreflang", "@atom03:hreflang", "@atom:hreflang", "@hreflang" ], :select_result_value => true) unless link_object.hreflang.nil? link_object.hreflang = link_object.hreflang.downcase end link_object.rel = FeedTools::XmlHelper.try_xpaths(link_node, [ "@atom10:rel", "@atom03:rel", "@atom:rel", "@rel" ], :select_result_value => true) unless link_object.rel.nil? link_object.rel = link_object.rel.downcase end link_object.type = FeedTools::XmlHelper.try_xpaths(link_node, [ "@atom10:type", "@atom03:type", "@atom:type", "@type" ], :select_result_value => true) unless link_object.type.nil? link_object.type = link_object.type.downcase end link_object.title = FeedTools::XmlHelper.try_xpaths(link_node, [ "@atom10:title", "@atom03:title", "@atom:title", "@title", "text()" ], :select_result_value => true) # This catches the ambiguities between atom, rss, and cdf if link_object.title == link_object.href link_object.title = nil end link_object.length = FeedTools::XmlHelper.try_xpaths(link_node, [ "@atom10:length", "@atom03:length", "@atom:length", "@length" ], :select_result_value => true) if !link_object.length.nil? link_object.length = link_object.length.to_i else if !link_object.type.nil? && link_object.type[0..4] != "text" && link_object.type[-3..-1] != "xml" && link_object.href =~ /^http:\/\// # Retrieve the length with an http HEAD request else link_object.length = nil end end @links << link_object end end return @links end # Sets the links collection def links=(new_links) @links = new_links end # Returns the base uri for the feed, used for resolving relative paths def base_uri if @base_uri.nil? @base_uri = FeedTools::XmlHelper.try_xpaths(self.channel_node, [ "@base" ], :select_result_value => true) if @base_uri.blank? @base_uri = FeedTools::GenericHelper.recursion_trap(:feed_base_uri) do self.href end end if !@base_uri.blank? @base_uri = FeedTools::UriHelper.normalize_url(@base_uri) end end return @base_uri end # Sets the base uri for the feed def base_uri=(new_base_uri) @base_uri = new_base_uri end # Returns the url to the icon file for this feed. def icon if @icon.nil? icon_node = FeedTools::XmlHelper.try_xpaths(self.channel_node, [ "link[@rel='icon']", "link[@rel='shortcut icon']", "link[@type='image/x-icon']", "icon", "logo[@style='icon']", "LOGO[@STYLE='ICON']" ]) unless icon_node.nil? @icon = FeedTools::XmlHelper.try_xpaths(icon_node, [ "@atom10:href", "@atom03:href", "@atom:href", "@href", "text()" ], :select_result_value => true) begin if !(@icon =~ /^file:/) && !FeedTools::UriHelper.is_uri?(@icon) channel_base_uri = nil unless self.channel_node.nil? channel_base_uri = self.channel_node.base_uri end @icon = FeedTools::UriHelper.resolve_relative_uri( @icon, [channel_base_uri, self.base_uri]) end rescue end @icon = nil unless FeedTools::UriHelper.is_uri?(@icon) @icon = nil if @icon.blank? end end return @icon end # Returns the favicon url for this feed. # This method first tries to use the url from the link field instead of # the feed url, in order to avoid grabbing the favicon for services like # feedburner. def favicon if @favicon.nil? if !self.link.blank? begin link_uri = URI.parse( FeedTools::UriHelper.normalize_url(self.link)) if link_uri.scheme == "http" @favicon = "http://" + link_uri.host + "/favicon.ico" end rescue @favicon = nil end if @favicon.nil? && !self.href.blank? begin feed_uri = URI.parse( FeedTools::UriHelper.normalize_url(self.href)) if feed_uri.scheme == "http" @favicon = "http://" + feed_uri.host + "/favicon.ico" end rescue @favicon = nil end end else @favicon = nil end end return @favicon end # Returns the feed author def author if @author.nil? @author = FeedTools::Author.new author_node = FeedTools::XmlHelper.try_xpaths(self.channel_node, [ "atom10:author", "atom03:author", "atom:author", "author", "managingEditor", "dc:author", "dc:creator" ]) unless author_node.nil? @author.raw = FeedTools::XmlHelper.try_xpaths( author_node, ["text()"], :select_result_value => true) @author.raw = FeedTools::HtmlHelper.unescape_entities(@author.raw) unless @author.raw.nil? raw_scan = @author.raw.scan( /(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i) if raw_scan.nil? || raw_scan.size == 0 raw_scan = @author.raw.scan( /(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\s*\((.*)\)/i) unless raw_scan.size == 0 author_raw_pair = raw_scan.first.reverse end else author_raw_pair = raw_scan.first end if raw_scan.nil? || raw_scan.size == 0 email_scan = @author.raw.scan( /\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b/i) if email_scan != nil && email_scan.size > 0 @author.email = email_scan.first.strip end end unless author_raw_pair.nil? || author_raw_pair.size == 0 @author.name = author_raw_pair.first.strip @author.email = author_raw_pair.last.strip else unless @author.raw.include?("@") # We can be reasonably sure we are looking at something # that the creator didn't intend to contain an email address # if it got through the preceeding regexes and it doesn't # contain the tell-tale '@' symbol. @author.name = @author.raw end end end if @author.name.blank? @author.name = FeedTools::HtmlHelper.unescape_entities( FeedTools::XmlHelper.try_xpaths(author_node, [ "atom10:name/text()", "atom03:name/text()", "atom:name/text()", "name/text()", "@name" ], :select_result_value => true) ) end if @author.email.blank? @author.email = FeedTools::HtmlHelper.unescape_entities( FeedTools::XmlHelper.try_xpaths(author_node, [ "atom10:email/text()", "atom03:email/text()", "atom:email/text()", "email/text()", "@email" ], :select_result_value => true) ) end if @author.url.blank? @author.url = FeedTools::HtmlHelper.unescape_entities( FeedTools::XmlHelper.try_xpaths(author_node, [ "atom10:url/text()", "atom03:url/text()", "atom:url/text()", "url/text()", "atom10:uri/text()", "atom03:uri/text()", "atom:uri/text()", "uri/text()", "@href", "@uri", "@href" ], :select_result_value => true) ) end @author.name = nil if @author.name.blank? @author.raw = nil if @author.raw.blank? @author.email = nil if @author.email.blank? @author.url = nil if @author.url.blank? if @author.url != nil begin if !(@author.url =~ /^file:/) && !FeedTools::UriHelper.is_uri?(@author.url) @author.url = FeedTools::UriHelper.resolve_relative_uri( @author.url, [author_node.base_uri, self.base_uri]) end rescue end end end # Fallback on the itunes module if we didn't find an author name begin @author.name = self.itunes_author if @author.name.nil? rescue @author.name = nil end end return @author end # Sets the feed author def author=(new_author) if new_author.respond_to?(:name) && new_author.respond_to?(:email) && new_author.respond_to?(:url) # It's a complete author object, just set it. @author = new_author else # We're not looking at an author object, this is probably a string, # default to setting the author's name. if @author.nil? @author = FeedTools::Author.new end @author.name = new_author end end # Returns the feed publisher def publisher if @publisher.nil? @publisher = FeedTools::Author.new @publisher.raw = FeedTools::HtmlHelper.unescape_entities( FeedTools::XmlHelper.try_xpaths(self.channel_node, [ "webMaster/text()", "dc:publisher/text()" ], :select_result_value => true)) unless @publisher.raw.blank? raw_scan = @publisher.raw.scan( /(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i) if raw_scan.nil? || raw_scan.size == 0 raw_scan = @publisher.raw.scan( /(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\s*\((.*)\)/i) unless raw_scan.size == 0 publisher_raw_pair = raw_scan.first.reverse end else publisher_raw_pair = raw_scan.first end if raw_scan.nil? || raw_scan.size == 0 email_scan = @publisher.raw.scan( /\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b/i) if email_scan != nil && email_scan.size > 0 @publisher.email = email_scan.first.strip end end unless publisher_raw_pair.nil? || publisher_raw_pair.size == 0 @publisher.name = publisher_raw_pair.first.strip @publisher.email = publisher_raw_pair.last.strip else unless @publisher.raw.include?("@") # We can be reasonably sure we are looking at something # that the creator didn't intend to contain an email address if # it got through the preceeding regexes and it doesn't # contain the tell-tale '@' symbol. @publisher.name = @publisher.raw end end end @publisher.name = nil if @publisher.name.blank? @publisher.raw = nil if @publisher.raw.blank? @publisher.email = nil if @publisher.email.blank? @publisher.url = nil if @publisher.url.blank? if @publisher.url != nil begin if !(@publisher.url =~ /^file:/) && !FeedTools::UriHelper.is_uri?(@publisher.url) channel_base_uri = nil unless self.channel_node.nil? channel_base_uri = self.channel_node.base_uri end @publisher.url = FeedTools::UriHelper.resolve_relative_uri( @publisher.url, [channel_base_uri, self.base_uri]) end rescue end end end return @publisher end # Sets the feed publisher def publisher=(new_publisher) if new_publisher.respond_to?(:name) && new_publisher.respond_to?(:email) && new_publisher.respond_to?(:url) # It's a complete Author object, just set it. @publisher = new_publisher else # We're not looking at an Author object, this is probably a string, # default to setting the publisher's name. if @publisher.nil? @publisher = FeedTools::Author.new end @publisher.name = new_publisher end end # Returns the contents of the itunes:author element # # Returns any incorrectly placed channel-level itunes:author # elements. They're actually amazingly common. People don't read specs. # There is no setter for this, since this is an incorrectly placed # attribute. def itunes_author if @itunes_author.nil? @itunes_author = FeedTools::HtmlHelper.unescape_entities( FeedTools::XmlHelper.try_xpaths(self.channel_node, [ "itunes:author/text()" ], :select_result_value => true) ) @itunes_author = nil if @itunes_author.blank? end return @itunes_author end # Returns the feed time def time if @time.nil? time_string = FeedTools::XmlHelper.try_xpaths(self.channel_node, [ "atom10:updated/text()", "atom03:updated/text()", "atom:updated/text()", "updated/text()", "atom10:modified/text()", "atom03:modified/text()", "atom:modified/text()", "modified/text()", "time/text()", "lastBuildDate/text()", "atom10:issued/text()", "atom03:issued/text()", "atom:issued/text()", "issued/text()", "atom10:published/text()", "atom03:published/text()", "atom:published/text()", "published/text()", "dc:date/text()", "pubDate/text()", "date/text()" ], :select_result_value => true) begin unless time_string.blank? @time = Time.parse(time_string).gmtime else if self.configurations[:timestamp_estimation_enabled] @time = Time.now.gmtime end end rescue if self.configurations[:timestamp_estimation_enabled] @time = Time.now.gmtime end end end return @time end # Sets the feed time def time=(new_time) @time = new_time end # Returns the feed updated time def updated if @updated.nil? updated_string = FeedTools::XmlHelper.try_xpaths(self.channel_node, [ "atom10:updated/text()", "atom03:updated/text()", "atom:updated/text()", "updated/text()", "atom10:modified/text()", "atom03:modified/text()", "atom:modified/text()", "modified/text()", "lastBuildDate/text()" ], :select_result_value => true) unless updated_string.blank? @updated = Time.parse(updated_string).gmtime rescue nil else @updated = nil end end return @updated end # Sets the feed updated time def updated=(new_updated) @updated = new_updated end # Returns the feed published time def published if @published.nil? published_string = FeedTools::XmlHelper.try_xpaths(self.channel_node, [ "atom10:published/text()", "atom03:published/text()", "atom:published/text()", "published/text()", "dc:date/text()", "pubDate/text()", "atom10:issued/text()", "atom03:issued/text()", "atom:issued/text()", "issued/text()" ], :select_result_value => true) unless published_string.blank? @published = Time.parse(published_string).gmtime rescue nil else @published = nil end end return @published end # Sets the feed published time def published=(new_published) @published = new_published end # Returns a list of the feed's categories def categories if @categories.nil? @categories = [] category_nodes = FeedTools::XmlHelper.try_xpaths_all(self.channel_node, [ "category", "dc:subject" ]) unless category_nodes.nil? for category_node in category_nodes category = FeedTools::Category.new category.term = FeedTools::XmlHelper.try_xpaths(category_node, [ "@term", "text()" ], :select_result_value => true) category.term.strip! unless category.term.blank? category.label = FeedTools::XmlHelper.try_xpaths( category_node, ["@label"], :select_result_value => true) category.label.strip! unless category.label.blank? category.scheme = FeedTools::XmlHelper.try_xpaths(category_node, [ "@scheme", "@domain" ], :select_result_value => true) category.scheme.strip! unless category.scheme.blank? @categories << category end end end return @categories end # Returns a list of the feed's images def images if @images.nil? @images = [] image_nodes = FeedTools::XmlHelper.combine_xpaths_all( self.channel_node, [ "image", "logo", "apple-wallpapers:image", "imageUrl" ] ) unless image_nodes.blank? for image_node in image_nodes image = FeedTools::Image.new image.href = FeedTools::XmlHelper.try_xpaths(image_node, [ "url/text()", "@rdf:resource", "@href", "text()" ], :select_result_value => true) if image.href.nil? && image_node.base_uri != nil image.href = "" end begin if !(image.href =~ /^file:/) && !FeedTools::UriHelper.is_uri?(image.href) image.href = FeedTools::UriHelper.resolve_relative_uri( image.href, [image_node.base_uri, self.base_uri]) end rescue end if self.configurations[:url_normalization_enabled] image.href = FeedTools::UriHelper.normalize_url(image.href) end image.href.strip! unless image.href.nil? next if image.href.blank? image.title = FeedTools::XmlHelper.try_xpaths(image_node, ["title/text()"], :select_result_value => true) image.title.strip! unless image.title.nil? image.description = FeedTools::XmlHelper.try_xpaths(image_node, ["description/text()"], :select_result_value => true) image.description.strip! unless image.description.nil? image.link = FeedTools::XmlHelper.try_xpaths(image_node, ["link/text()"], :select_result_value => true) image.link.strip! unless image.link.nil? image.height = FeedTools::XmlHelper.try_xpaths(image_node, ["height/text()"], :select_result_value => true).to_i image.height = nil if image.height <= 0 image.width = FeedTools::XmlHelper.try_xpaths(image_node, ["width/text()"], :select_result_value => true).to_i image.width = nil if image.width <= 0 image.style = FeedTools::XmlHelper.try_xpaths(image_node, [ "style/text()", "@style" ], :select_result_value => true) image.style.strip! unless image.style.nil? image.style.downcase! unless image.style.nil? @images << image unless image.href.nil? end end for link_object in self.links if link_object.type != nil && link_object.type =~ /^image/ image = FeedTools::Image.new image.href = link_object.href image.title = link_object.title @images << image unless image.href.nil? end end end return @images end # Returns the feed's text input field def text_input if @text_input.nil? @text_input = FeedTools::TextInput.new text_input_node = FeedTools::XmlHelper.try_xpaths(self.channel_node, ["textInput"]) unless text_input_node.nil? @text_input.title = FeedTools::XmlHelper.try_xpaths(text_input_node, ["title/text()"], :select_result_value => true) @text_input.description = FeedTools::XmlHelper.try_xpaths(text_input_node, ["description/text()"], :select_result_value => true) @text_input.link = FeedTools::XmlHelper.try_xpaths(text_input_node, ["link/text()"], :select_result_value => true) @text_input.name = FeedTools::XmlHelper.try_xpaths(text_input_node, ["name/text()"], :select_result_value => true) end end return @text_input end # Returns the feed's copyright information def rights if @rights.nil? repair_entities = false rights_node = FeedTools::XmlHelper.try_xpaths(self.channel_node, [ "atom10:copyright", "atom03:copyright", "atom:copyright", "copyright", "copyrights", "dc:rights", "rights" ]) @rights = FeedTools::HtmlHelper.process_text_construct(rights_node, self.feed_type, self.feed_version) if self.feed_type == "atom" || self.configurations[:always_strip_wrapper_elements] @rights = FeedTools::HtmlHelper.strip_wrapper_element(@rights) end end return @rights end # Sets the feed's rights information def rights=(new_rights) @rights = new_rights end def license #:nodoc: raise "Not implemented yet." end def license=(new_license) #:nodoc: raise "Not implemented yet." end # Returns the number of seconds before the feed should expire def time_to_live if @time_to_live.nil? unless channel_node.nil? # get the feed time to live from the xml document update_frequency = FeedTools::XmlHelper.try_xpaths( self.channel_node, ["syn:updateFrequency/text()"], :select_result_value => true) if !update_frequency.blank? update_period = FeedTools::XmlHelper.try_xpaths( self.channel_node, ["syn:updatePeriod/text()"], :select_result_value => true) if update_period == "daily" @time_to_live = update_frequency.to_i.day elsif update_period == "weekly" @time_to_live = update_frequency.to_i.week elsif update_period == "monthly" @time_to_live = update_frequency.to_i.month elsif update_period == "yearly" @time_to_live = update_frequency.to_i.year else # hourly @time_to_live = update_frequency.to_i.hour end end if @time_to_live.nil? # usually expressed in minutes update_frequency = FeedTools::XmlHelper.try_xpaths( self.channel_node, ["ttl/text()"], :select_result_value => true) if !update_frequency.blank? update_span = FeedTools::XmlHelper.try_xpaths( self.channel_node, ["ttl/@span"], :select_result_value => true) if update_span == "seconds" @time_to_live = update_frequency.to_i elsif update_span == "minutes" @time_to_live = update_frequency.to_i.minute elsif update_span == "hours" @time_to_live = update_frequency.to_i.hour elsif update_span == "days" @time_to_live = update_frequency.to_i.day elsif update_span == "weeks" @time_to_live = update_frequency.to_i.week elsif update_span == "months" @time_to_live = update_frequency.to_i.month elsif update_span == "years" @time_to_live = update_frequency.to_i.year else @time_to_live = update_frequency.to_i.minute end end end if @time_to_live.nil? @time_to_live = 0 update_frequency_days = FeedTools::XmlHelper.try_xpaths(self.channel_node, ["schedule/intervaltime/@day"], :select_result_value => true) update_frequency_hours = FeedTools::XmlHelper.try_xpaths(self.channel_node, ["schedule/intervaltime/@hour"], :select_result_value => true) update_frequency_minutes = FeedTools::XmlHelper.try_xpaths(self.channel_node, ["schedule/intervaltime/@min"], :select_result_value => true) update_frequency_seconds = FeedTools::XmlHelper.try_xpaths(self.channel_node, ["schedule/intervaltime/@sec"], :select_result_value => true) if !update_frequency_days.blank? @time_to_live = @time_to_live + update_frequency_days.to_i.day end if !update_frequency_hours.blank? @time_to_live = @time_to_live + update_frequency_hours.to_i.hour end if !update_frequency_minutes.blank? @time_to_live = @time_to_live + update_frequency_minutes.to_i.minute end if !update_frequency_seconds.blank? @time_to_live = @time_to_live + update_frequency_seconds.to_i end if @time_to_live == 0 @time_to_live = 1.hour end end end end if @time_to_live.nil? || @time_to_live == 0 # Default to one hour @time_to_live = 1.hour elsif self.configurations[:max_ttl] != nil && self.configurations[:max_ttl] != 0 && @time_to_live >= self.configurations[:max_ttl].to_i @time_to_live = self.configurations[:max_ttl].to_i end @time_to_live = @time_to_live.round return @time_to_live end # Sets the feed time to live def time_to_live=(new_time_to_live) @time_to_live = new_time_to_live.round @time_to_live = 1.hour if @time_to_live < 1.hour end # Returns the feed's cloud def cloud if @cloud.nil? @cloud = FeedTools::Cloud.new @cloud.domain = FeedTools::XmlHelper.try_xpaths( self.channel_node, ["cloud/@domain"], :select_result_value => true) @cloud.port = FeedTools::XmlHelper.try_xpaths( self.channel_node, ["cloud/@port"], :select_result_value => true) @cloud.path = FeedTools::XmlHelper.try_xpaths( self.channel_node, ["cloud/@path"], :select_result_value => true) @cloud.register_procedure = FeedTools::XmlHelper.try_xpaths( self.channel_node, ["cloud/@registerProcedure"], :select_result_value => true) @cloud.protocol = FeedTools::XmlHelper.try_xpaths( self.channel_node, ["cloud/@protocol"], :select_result_value => true) @cloud.protocol.downcase unless @cloud.protocol.nil? @cloud.port = @cloud.port.to_s.to_i @cloud.port = nil if @cloud.port == 0 end return @cloud end # Sets the feed's cloud def cloud=(new_cloud) @cloud = new_cloud end # Returns the feed generator def generator if @generator.nil? @generator = FeedTools::XmlHelper.try_xpaths( self.channel_node, ["generator/text()"], :select_result_value => true) unless @generator.nil? @generator = FeedTools::HtmlHelper.convert_html_to_plain_text(@generator) end end return @generator end # Sets the feed generator # # Note: Setting this variable will NOT cause this to appear in any # generated output. The generator string is created from the # :generator_name and :generator_href configuration # variables. def generator=(new_generator) @generator = new_generator end # Returns the feed docs def docs if @docs.nil? @docs = FeedTools::XmlHelper.try_xpaths( self.channel_node, ["docs/text()"], :select_result_value => true) begin if !(@docs =~ /^file:/) && !FeedTools::UriHelper.is_uri?(@docs) channel_base_uri = nil unless self.channel_node.nil? channel_base_uri = self.channel_node.base_uri end @docs = FeedTools::UriHelper.resolve_relative_uri( @docs, [channel_base_uri, self.base_uri]) end rescue end if self.configurations[:url_normalization_enabled] @docs = FeedTools::UriHelper.normalize_url(@docs) end end return @docs end # Sets the feed docs def docs=(new_docs) @docs = new_docs end # Returns the feed language def language if @language.nil? @language = FeedTools::XmlHelper.select_not_blank([ FeedTools::XmlHelper.try_xpaths(self.channel_node, [ "language/text()", "dc:language/text()", "@dc:language", "@xml:lang", "xml:lang/text()" ], :select_result_value => true), FeedTools::XmlHelper.try_xpaths(self.root_node, [ "@xml:lang", "xml:lang/text()" ], :select_result_value => true) ]) if @language.blank? @language = "en-us" end @language = @language.downcase end return @language end # Sets the feed language def language=(new_language) @language = new_language end # Returns true if this feed contains explicit material. def explicit? if @explicit.nil? explicit_string = FeedTools::XmlHelper.try_xpaths(self.channel_node, [ "media:adult/text()", "itunes:explicit/text()" ], :select_result_value => true) if explicit_string == "true" || explicit_string == "yes" @explicit = true else @explicit = false end end return @explicit end # Sets whether or not the feed contains explicit material def explicit=(new_explicit) @explicit = (new_explicit ? true : false) end # Returns the feed entries def entries if @entries.nil? raw_entries = FeedTools::XmlHelper.select_not_blank([ FeedTools::XmlHelper.try_xpaths_all(self.channel_node, [ "atom10:entry", "atom03:entry", "atom:entry", "entry" ]), FeedTools::XmlHelper.try_xpaths_all(self.root_node, [ "rss10:item", "rss11:items/rss11:item", "rss11:items/item", "items/rss11:item", "items/item", "item", "atom10:entry", "atom03:entry", "atom:entry", "entry" ]), FeedTools::XmlHelper.try_xpaths_all(self.channel_node, [ "rss10:item", "rss11:items/rss11:item", "rss11:items/item", "items/rss11:item", "items/item", "item" ]) ]) # create the individual feed items @entries = [] unless raw_entries.blank? for entry_node in raw_entries.reverse new_entry = FeedItem.new new_entry.feed_data = entry_node.to_s new_entry.feed_data_type = self.feed_data_type new_entry.root_node = entry_node if new_entry.root_node.namespace.blank? new_entry.root_node.add_namespace(self.root_node.namespace) end @entries << new_entry end end end # Sort the items if self.configurations[:entry_sorting_property] == "time" @entries = @entries.sort do |a, b| (b.time or Time.utc(1970)) <=> (a.time or Time.utc(1970)) end elsif self.configurations[:entry_sorting_property] != nil sorting_property = self.configurations[:entry_sorting_property] @entries = @entries.sort do |a, b| eval("a.#{sorting_property}") <=> eval("b.#{sorting_property}") end else return @entries.reverse end return @entries end # Sets the entries array to a new array. def entries=(new_entries) for entry in new_entries unless entry.kind_of? FeedTools::FeedItem raise ArgumentError, "You should only add FeedItem objects to the entries array." end end @entries = new_entries end # Syntactic sugar for appending feed items to a feed. def <<(new_entry) @entries ||= [] unless new_entry.kind_of? FeedTools::FeedItem raise ArgumentError, "You should only add FeedItem objects to the entries array." end @entries << new_entry end # The time that the feed was last requested from the remote server. Nil # if it has never been pulled, or if it was created from scratch. def last_retrieved unless self.cache_object.nil? @last_retrieved = self.cache_object.last_retrieved end return @last_retrieved end # Sets the time that the feed was last updated. def last_retrieved=(new_last_retrieved) @last_retrieved = new_last_retrieved unless self.cache_object.nil? self.cache_object.last_retrieved = new_last_retrieved end end # True if this feed contains audio content enclosures def podcast? podcast = false self.items.each do |item| item.enclosures.each do |enclosure| podcast = true if enclosure.audio? end end return podcast end # True if this feed contains video content enclosures def vidlog? vidlog = false self.items.each do |item| item.enclosures.each do |enclosure| vidlog = true if enclosure.video? end end return vidlog end # True if the feed was not last retrieved from the cache. def live? return @live end # True if the feed has expired and must be reacquired from the remote # server. def expired? if (self.last_retrieved == nil) return true elsif (self.time_to_live < 30.minutes) return (self.last_retrieved + 30.minutes) < Time.now.gmtime else return (self.last_retrieved + self.time_to_live) < Time.now.gmtime end end # Forces this feed to expire. def expire! self.last_retrieved = Time.mktime(1970).gmtime self.save end # A hook method that is called during the feed generation process. # Overriding this method will enable additional content to be # inserted into the feed. def build_xml_hook(feed_type, version, xml_builder) return nil end # Generates xml based on the content of the feed def build_xml(feed_type=(self.feed_type or "atom"), feed_version=nil, xml_builder=Builder::XmlMarkup.new( :indent => 2, :escape_attrs => false)) xml_builder.instruct! :xml, :version => "1.0", :encoding => (self.configurations[:output_encoding] or "utf-8") if feed_type.nil? feed_type = self.feed_type end if feed_version.nil? feed_version = self.feed_version end if feed_type == "rss" && (feed_version == nil || feed_version <= 0.0) feed_version = 1.0 elsif feed_type == "atom" && (feed_version == nil || feed_version <= 0.0) feed_version = 1.0 end if feed_type == "rss" && (feed_version == 0.9 || feed_version == 1.0 || feed_version == 1.1) # RDF-based rss format return xml_builder.tag!("rdf:RDF", "xmlns" => FEED_TOOLS_NAMESPACES['rss10'], "xmlns:content" => FEED_TOOLS_NAMESPACES['content'], "xmlns:rdf" => FEED_TOOLS_NAMESPACES['rdf'], "xmlns:dc" => FEED_TOOLS_NAMESPACES['dc'], "xmlns:syn" => FEED_TOOLS_NAMESPACES['syn'], "xmlns:admin" => FEED_TOOLS_NAMESPACES['admin'], "xmlns:taxo" => FEED_TOOLS_NAMESPACES['taxo'], "xmlns:itunes" => FEED_TOOLS_NAMESPACES['itunes'], "xmlns:media" => FEED_TOOLS_NAMESPACES['media']) do channel_attributes = {} unless self.link.nil? channel_attributes["rdf:about"] = FeedTools::HtmlHelper.escape_entities(self.link) end xml_builder.channel(channel_attributes) do unless self.title.blank? xml_builder.title( FeedTools::HtmlHelper.strip_html_tags(self.title)) else xml_builder.title end unless self.link.blank? xml_builder.link(self.link) else xml_builder.link end unless images.blank? xml_builder.image("rdf:resource" => FeedTools::HtmlHelper.escape_entities( images.first.url)) end unless description.nil? || description == "" xml_builder.description(description) else xml_builder.description end unless self.language.blank? xml_builder.tag!("dc:language", self.language) end unless self.rights.blank? xml_builder.tag!("dc:rights", self.rights) end xml_builder.tag!("syn:updatePeriod", "hourly") xml_builder.tag!("syn:updateFrequency", (self.time_to_live / 1.hour).to_s) xml_builder.tag!("syn:updateBase", Time.mktime(1970).iso8601) xml_builder.items do xml_builder.tag!("rdf:Seq") do unless items.nil? for item in items if item.link.nil? raise "Cannot generate an rdf-based feed with a nil " + "item link field." end xml_builder.tag!("rdf:li", "rdf:resource" => FeedTools::HtmlHelper.escape_entities(item.link)) end end end end xml_builder.tag!( "admin:generatorAgent", "rdf:resource" => self.configurations[:generator_href]) build_xml_hook(feed_type, feed_version, xml_builder) end unless self.images.blank? best_image = nil for image in self.images if image.link != nil best_image = image break end end best_image = self.images.first if best_image.nil? xml_builder.image("rdf:about" => FeedTools::HtmlHelper.escape_entities(best_image.url)) do if !best_image.title.blank? xml_builder.title(best_image.title) elsif !self.title.blank? xml_builder.title(self.title) else xml_builder.title end unless best_image.url.blank? xml_builder.url(best_image.url) end if !best_image.link.blank? xml_builder.link(best_image.link) elsif !self.link.blank? xml_builder.link(self.link) else xml_builder.link end end end unless items.nil? for item in items item.build_xml(feed_type, feed_version, xml_builder) end end end elsif feed_type == "rss" # normal rss format return xml_builder.rss("version" => "2.0", "xmlns:content" => FEED_TOOLS_NAMESPACES['content'], "xmlns:rdf" => FEED_TOOLS_NAMESPACES['rdf'], "xmlns:dc" => FEED_TOOLS_NAMESPACES['dc'], "xmlns:taxo" => FEED_TOOLS_NAMESPACES['taxo'], "xmlns:trackback" => FEED_TOOLS_NAMESPACES['trackback'], "xmlns:itunes" => FEED_TOOLS_NAMESPACES['itunes'], "xmlns:media" => FEED_TOOLS_NAMESPACES['media']) do xml_builder.channel do unless self.title.blank? xml_builder.title( FeedTools::HtmlHelper.strip_html_tags(self.title)) end unless self.link.blank? xml_builder.link(link) end unless self.description.blank? xml_builder.description(description) else xml_builder.description end unless self.published.blank? xml_builder.pubDate(self.published.rfc822) end unless self.updated.blank? xml_builder.lastBuildDate(self.updated.rfc822) end unless self.copyright.blank? xml_builder.copyright(self.copyright) end xml_builder.ttl((time_to_live / 1.minute).to_s) xml_builder.generator( self.configurations[:generator_href]) build_xml_hook(feed_type, feed_version, xml_builder) unless items.nil? for item in items item.build_xml(feed_type, feed_version, xml_builder) end end end end elsif feed_type == "atom" && feed_version == 0.3 raise "Atom 0.3 is obsolete." elsif feed_type == "atom" && feed_version == 1.0 # normal atom format return xml_builder.feed("xmlns" => FEED_TOOLS_NAMESPACES['atom10'], "xml:lang" => language) do unless title.blank? xml_builder.title(title, "type" => "html") end xml_builder.author do unless self.author.nil? || self.author.name.nil? xml_builder.name(self.author.name) else xml_builder.name("n/a") end unless self.author.nil? || self.author.email.nil? xml_builder.email(self.author.email) end unless self.author.nil? || self.author.url.nil? xml_builder.uri(self.author.url) end end unless self.href.blank? xml_builder.link("href" => self.href, "rel" => "self", "type" => "application/atom+xml") end unless self.link.blank? xml_builder.link( "href" => FeedTools::HtmlHelper.escape_entities(self.link), "rel" => "alternate") end unless self.subtitle.blank? xml_builder.subtitle(self.subtitle, "type" => "html") end if self.updated != nil xml_builder.updated(self.updated.iso8601) elsif self.time != nil # Not technically correct, but a heck of a lot better # than the Time.now fall-back. xml_builder.updated(self.time.iso8601) else xml_builder.updated(Time.now.gmtime.iso8601) end unless self.rights.blank? xml_builder.rights(self.rights) end xml_builder.generator(self.configurations[:generator_name] + " - " + self.configurations[:generator_href]) if self.id != nil unless FeedTools::UriHelper.is_uri? self.id if self.link != nil xml_builder.id(FeedTools::UriHelper.build_urn_uri(self.link)) else raise "The unique id must be a valid URI." end else xml_builder.id(self.id) end elsif self.link != nil xml_builder.id(FeedTools::UriHelper.build_urn_uri(self.link)) else raise "Cannot build feed, missing feed unique id." end build_xml_hook(feed_type, feed_version, xml_builder) unless items.nil? for item in items item.build_xml(feed_type, feed_version, xml_builder) end end end else raise "Unsupported feed format/version." end end # Persists the current feed state to the cache. def save if self.configurations[:feed_cache].nil? # The cache is disabled for this feed, do nothing. return end if self.http_headers['content-type'] =~ /text\/html/ || self.http_headers['content-type'] =~ /application\/xhtml\+xml/ if self.title.nil? && self.link.nil? && self.entries.blank? # Don't save html pages to the cache, it messes with # autodiscovery. return end end unless self.href =~ /^file:\/\// if FeedTools.feed_cache.nil? raise "Caching is currently disabled. Cannot save to cache." elsif self.href.nil? raise "The url field must be set to save to the cache." elsif self.cache_object.nil? raise "The cache_object is currently nil. Cannot save to cache." else self.cache_object.href = self.href unless self.feed_data.nil? self.cache_object.title = self.title self.cache_object.link = self.link self.cache_object.feed_data = self.feed_data self.cache_object.feed_data_type = self.feed_data_type.to_s end self.cache_object.http_headers = self.http_headers.to_yaml self.cache_object.last_retrieved = self.last_retrieved self.cache_object.save end end end alias_method :url, :href alias_method :url=, :href= alias_method :tagline, :subtitle alias_method :tagline=, :subtitle= alias_method :description, :subtitle alias_method :description=, :subtitle= alias_method :abstract, :subtitle alias_method :abstract=, :subtitle= alias_method :copyright, :rights alias_method :copyright=, :rights= alias_method :ttl, :time_to_live alias_method :ttl=, :time_to_live= alias_method :guid, :id alias_method :guid=, :id= alias_method :items, :entries alias_method :items=, :entries= # passes missing methods to the cache_object def method_missing(msg, *params) if self.cache_object.nil? raise NoMethodError, "Invalid method #{msg.to_s}" end return self.cache_object.send(msg, params) end # passes missing methods to the FeedTools.feed_cache def Feed.method_missing(msg, *params) if FeedTools.feed_cache.nil? raise NoMethodError, "Invalid method Feed.#{msg.to_s}" end result = FeedTools.feed_cache.send(msg, params) if result.kind_of? FeedTools.feed_cache result = Feed.open(result.url) end return result end # Returns a simple representation of the feed object's state. def inspect return "#" end # Allows sorting feeds by title def <=>(other_feed) return self.title.to_s <=> other_feed.title.to_s end end end