#--
# Copyright (c) 2005 Robert Aman
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#++
FEED_TOOLS_ENV = ENV['FEED_TOOLS_ENV'] ||
ENV['RAILS_ENV'] ||
'production' # :nodoc:
FEED_TOOLS_VERSION = "0.2.1"
$:.unshift(File.dirname(__FILE__))
$:.unshift(File.dirname(__FILE__) + "/../../activerecord/lib")
$:.unshift(File.dirname(__FILE__) + "/feed_tools/vendor")
require 'rubygems'
require 'active_record'
begin
require 'builder'
rescue LoadError
# RubyGems version is not available, use included Builder
require 'feed_tools/vendor/builder'
end
begin
require 'tidy'
rescue LoadError
# Ignore the error for now.
end
require 'feed_tools/vendor/htree'
require 'net/http'
require 'net/https'
require 'net/ftp'
require 'rexml/document'
require 'iconv'
require 'uri'
require 'time'
require 'cgi'
require 'pp'
require 'yaml'
#= feed_tools.rb
#
# FeedTools was designed to be a simple XML feed parser, generator, and translator with a built-in
# caching system.
#
#== Example
# slashdot_feed = FeedTools::Feed.open('http://www.slashdot.org/index.rss')
# slashdot_feed.title
# => "Slashdot"
# slashdot_feed.description
# => "News for nerds, stuff that matters"
# slashdot_feed.link
# => "http://slashdot.org/"
# slashdot_feed.items.first.find_node("slash:hitparade/text()").to_s
# => "43,37,28,23,11,3,1"
module FeedTools
# The default caching mechanism for the FeedTools module
class DatabaseFeedCache < ActiveRecord::Base
# Overrides the default table name to use the "feeds" table.
def self.table_name() "feeds" end
# If ActiveRecord is not already connected, attempts to find a configuration file and use
# it to open a connection for ActiveRecord.
# This method is probably unnecessary for anything but testing and debugging purposes.
# In a Rails environment, the connection will already have been established
# and this method will simply do nothing.
#
# This method should not raise any exceptions because it's designed to be run only when
# the module is first loaded. If it fails, the user should get an exception when they
# try to perform some action that makes use of the caching functionality, and not until.
def DatabaseFeedCache.initialize_cache
# Establish a connection if we don't already have one
begin
ActiveRecord::Base.connection
rescue
begin
possible_config_files = [
"./config/database.yml",
"../database.yml",
"./database.yml"
]
database_config_file = nil
for file in possible_config_files
if File.exists? file
database_config_file = file
break
end
end
database_config_hash = File.open(database_config_file) do |file|
config_hash = YAML::load(file)
unless config_hash[FEED_TOOLS_ENV].nil?
config_hash = config_hash[FEED_TOOLS_ENV]
end
config_hash
end
ActiveRecord::Base.configurations = database_config_hash
ActiveRecord::Base.establish_connection(database_config_hash)
ActiveRecord::Base.connection
rescue
end
end
# Verify that the necessary database tables are in place
# and if they're missing, create them
unless DatabaseFeedCache.table_exists?
DatabaseFeedCache.create_table
end
return nil
end
# True if the appropriate database table already exists
def DatabaseFeedCache.table_exists?
begin
ActiveRecord::Base.connection.execute "select id, url, title, " +
"link, xml_data, http_headers, last_retrieved " +
"from feeds limit 1"
rescue ActiveRecord::StatementInvalid
return false
rescue
return false
end
return true
end
# Creates the appropriate database table
def DatabaseFeedCache.create_table
unless DatabaseFeedCache.table_exists?
feeds_mysql = <<-SQL_END
CREATE TABLE `feeds` (
`id` int(10) unsigned NOT NULL auto_increment,
`url` varchar(255) default NULL,
`title` varchar(255) default NULL,
`link` varchar(255) default NULL,
`xml_data` longtext default NULL,
`http_headers` text default NULL,
`last_retrieved` datetime default NULL,
PRIMARY KEY (`id`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1;
SQL_END
feeds_sqlite = <<-SQL_END
CREATE TABLE 'feeds' (
'id' INTEGER PRIMARY KEY NOT NULL,
'url' VARCHAR(255) DEFAULT NULL,
'title' VARCHAR(255) DEFAULT NULL,
'link' VARCHAR(255) DEFAULT NULL,
'image_link' VARCHAR(255) DEFAULT NULL,
'xml_data' TEXT DEFAULT NULL,
'http_headers' TEXT DEFAULT NULL,
'last_retrieved' DATETIME DEFAULT NULL,
);
SQL_END
feeds_psql = <<-SQL_END
CREATE TABLE feeds (
id SERIAL PRIMARY KEY NOT NULL,
url varchar(255) default NULL,
title varchar(255) default NULL,
link varchar(255) default NULL,
xml_data text default NULL,
http_headers text default NULL,
last_retrieved datetime default NULL,
);
SQL_END
table_creation_sql = nil
if configurations["adapter"] == "mysql"
table_creation_sql = feeds_mysql
elsif configurations["adapter"] == "sqlite"
table_creation_sql = feeds_sqlite
elsif configurations["adapter"] == "postgresql"
table_creation_sql = feeds_psql
end
if table_creation_sql.nil?
raise "Could not build feed_items table."
else
connection.execute table_creation_sql
end
end
end
end
# Error raised when a feed cannot be retrieved
class FeedAccessError < StandardError
end
# Quick method of enabling small classes to have their attributes
# accessible as a dictionary.
module AttributeDictionary # :nodoc:
# Access the attributes as a dictionary
def [](key)
# Assignment, and destructive methods should not be
# accessed like this.
return nil if key[-1..-1] == "=" || key[-1..-1] == "!"
return nil unless self.method(key).arity == 0
return self.send(key)
end
# Access the attributes as a dictionary
def []=(key, value)
# Assignment, and destructive methods should not be
# accessed like this.
return nil if key[-1..-1] == "=" || key[-1..-1] == "!"
return nil unless self.method(key + "=").arity == 1
return self.send(key + "=", value)
end
end
@feed_cache = DatabaseFeedCache
@user_agent = "FeedTools/#{FEED_TOOLS_VERSION} " +
"+http://www.sporkmonger.com/projects/feedtools/"
# Returns the current caching mechanism.
def FeedTools.feed_cache
return @feed_cache
end
# Sets the current caching mechanism. If set to nil, disables caching.
# Default is the DatabaseFeedCache class.
#
# Objects of this class must accept the following messages:
# url
# url=
# title
# title=
# link
# link=
# xml_data
# xml_data=
# etag
# etag=
# last_modified
# last_modified=
# save
#
# Additionally, the class itself must accept the following messages:
# find_by_id
# find_by_url
# initialize_cache
def FeedTools.feed_cache=(new_feed_cache)
# TODO: ensure that the feed cache class actually does those things.
# ==================================================================
@feed_cache = new_feed_cache
end
# Returns the currently used user agent string.
def FeedTools.user_agent
return @user_agent
end
# Sets the user agent string to send in the http headers.
def FeedTools.user_agent=(new_user_agent)
@user_agent = new_user_agent
end
# Returns true if the html tidy module can be used.
#
# Obviously, you need the tidy gem installed in order to run with html
# tidy features turned on.
#
# This method does a fairly complicated, and probably unnecessarily
# desperate search for the libtidy library. If you want this thing to
# execute fast, the best thing to do is to set Tidy.path ahead of time.
# If Tidy.path is set, this method doesn't do much. If it's not set,
# it will do it's darnedest to find the libtidy library. If you set
# the LIBTIDYPATH environment variable to the libtidy library, it should
# be able to find it.
#
# Once the library is located, this method will run much faster.
def FeedTools.tidy_enabled?
# This is an override variable to keep tidy from being used even if it
# is available.
if @force_tidy_enabled == false
return false
end
if @tidy_enabled.nil? || @tidy_enabled == false
@tidy_enabled = false
begin
require 'tidy'
if Tidy.path.nil?
# *Shrug*, just brute force it, I guess. There's a lot of places
# this thing might be hiding in, depending on platform and general
# sanity of the person who installed the thing. Most of these are
# probably unlikely, but it's not like checking unlikely locations
# hurts. Much. Especially if you actually find it.
libtidy_locations = [
'/usr/local/lib/libtidy.dylib',
'/opt/local/lib/libtidy.dylib',
'/usr/lib/libtidy.dylib',
'/usr/local/lib/tidylib.dylib',
'/opt/local/lib/tidylib.dylib',
'/usr/lib/tidylib.dylib',
'/usr/local/lib/tidy.dylib',
'/opt/local/lib/tidy.dylib',
'/usr/lib/tidy.dylib',
'/usr/local/lib/libtidy.so',
'/opt/local/lib/libtidy.so',
'/usr/lib/libtidy.so',
'/usr/local/lib/tidylib.so',
'/opt/local/lib/tidylib.so',
'/usr/lib/tidylib.so',
'/usr/local/lib/tidy.so',
'/opt/local/lib/tidy.so',
'/usr/lib/tidy.so',
'C:\Program Files\Tidy\tidy.dll',
'C:\Tidy\tidy.dll',
'/usr/local/lib',
'/opt/local/lib',
'/usr/lib'
]
# We just made this thing up, but if someone sets it, we'll
# go ahead and check it
unless ENV['LIBTIDYPATH'].nil?
libtidy_locations =
libtidy_locations.reverse.push(ENV['LIBTIDYPATH'])
end
for path in libtidy_locations
if File.exists? path
if File.ftype(path) == "file"
Tidy.path = path
@tidy_enabled = true
break
elsif File.ftype(path) == "directory"
# Ok, now perhaps we're getting a bit more desperate
lib_paths =
`find #{path} -name '*tidy*' | grep '\\.\\(so\\|dylib\\)$'`
# If there's more than one, grab the first one and
# hope for the best, and if it doesn't work, then blame the
# user for not specifying more accurately.
tidy_path = lib_paths.split("\n").first
unless tidy_path.nil?
Tidy.path = tidy_path
@tidy_enabled = true
break
end
end
end
end
# Still couldn't find it.
unless @tidy_enabled
@tidy_enabled = false
end
else
@tidy_enabled = true
end
rescue LoadError
# Tidy not installed, disable features that rely on tidy.
@tidy_enabled = false
end
end
return @tidy_enabled
end
# Turns html tidy support on or off. Be aware, that setting this to true
# does not mean tidy will be enabled. It simply means that tidy will be
# enabled if it is available to be enabled.
def FeedTools.tidy_enabled=(new_tidy_enabled)
@force_tidy_enabled = new_tidy_enabled
end
# Attempts to ensures that the passed url is valid and sane. Accepts very, very ugly urls
# and makes every effort to figure out what it was supposed to be. Also translates from
# the feed: and rss: pseudo-protocols to the http: protocol.
def FeedTools.normalize_url(url)
if url.nil? || url == ""
return nil
end
normalized_url = url
# if a url begins with the '/' character, it only makes sense that they
# meant to be using a file:// url. Fix it for them.
if normalized_url.length > 0 && normalized_url[0..0] == "/"
normalized_url = "file://" + normalized_url
end
# if a url begins with javascript:, it's quite possibly an attempt at
# doing something malicious. Let's keep that from getting anywhere,
# shall we?
if (normalized_url.downcase =~ /javascript:/) != nil
return "#"
end
# deal with all of the many ugly possibilities involved in the rss:
# and feed: pseudo-protocols (incidentally, whose crazy idea was this
# mess?)
normalized_url.gsub!(/^http:\/*(feed:\/*)?/, "http://")
normalized_url.gsub!(/^http:\/*(rss:\/*)?/, "http://")
normalized_url.gsub!(/^feed:\/*(http:\/*)?/, "http://")
normalized_url.gsub!(/^rss:\/*(http:\/*)?/, "http://")
normalized_url.gsub!(/^file:\/*/, "file:///")
normalized_url.gsub!(/^https:\/*/, "https://")
# fix (very) bad urls (usually of the user-entered sort)
normalized_url.gsub!(/^http:\/*(http:\/*)*/, "http://")
if (normalized_url =~ /^file:/) == 0
# fix bad Windows-based entries
normalized_url.gsub!(/file:\/\/\/([a-zA-Z]):/, 'file:///\1|')
# maybe this is too aggressive?
normalized_url.gsub!(/\\/, '/')
return normalized_url
else
if (normalized_url =~ /https?:\/\//) == nil
normalized_url = "http://" + normalized_url
end
if normalized_url == "http://"
return nil
end
begin
feed_uri = URI.parse(normalized_url)
if feed_uri.scheme == nil
feed_uri.scheme = "http"
end
if feed_uri.path == nil || feed_uri.path == ""
feed_uri.path = "/"
end
if (feed_uri.path =~ /^[\/]+/) == 0
feed_uri.path.gsub!(/^[\/]+/, "/")
end
return feed_uri.to_s
rescue URI::InvalidURIError
return normalized_url
end
end
end
# Returns true if the parameter appears to be a valid url
def FeedTools.is_url?(url)
return false if url.nil?
begin
uri = URI.parse(url)
rescue URI::InvalidURIError
return false
end
return true
end
# Removes all html tags from the html formatted text.
def FeedTools.strip_html(html)
# TODO: do this properly
# ======================
stripped_html = html.gsub(/<\/?[^>]+>/, "")
return stripped_html
end
# Tidys up the html
def FeedTools.tidy_html(html)
if FeedTools.tidy_enabled?
is_fragment = true
if (html.strip =~ /(.|\n)*
/) != nil ||
(html.strip =~ /<\/body>(.|\n)*<\/html>$/) != nil
is_fragment = false
end
if (html.strip =~ /<\?xml(.|\n)*\?>/) != nil
is_fragment = false
end
tidy_html = Tidy.open(:show_warnings=>false) do |tidy|
tidy.options.output_xml = true
tidy.options.indent = false
tidy.options.wrap_attributes = true
tidy.options.logical_emphasis = true
tidy.options.doctype = "omit"
xml = tidy.clean(html)
xml
end
if is_fragment
# Tidy puts ...[our html]... in.
# We don't want this.
tidy_html.strip!
tidy_html.gsub!(/^(.|\n)*/, "")
tidy_html.gsub!(/<\/body>(.|\n)*<\/html>$/, "")
tidy_html.strip!
end
else
tidy_html = html
end
return tidy_html
end
# Removes all dangerous html tags from the html formatted text.
# If mode is set to :escape, dangerous and unknown elements will
# be escaped. If mode is set to :strip, dangerous and unknown
# elements and all children will be removed entirely.
# Dangerous or unknown attributes are always removed.
def FeedTools.sanitize_html(html, mode=:escape)
# Lists borrowed from Mark Pilgrim's feedparser
acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area', 'b',
'big', 'blockquote', 'br', 'button', 'caption', 'center', 'cite',
'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl',
'dt', 'em', 'fieldset', 'font', 'form', 'h1', 'h2', 'h3', 'h4',
'h5', 'h6', 'hr', 'i', 'img', 'input', 'ins', 'kbd', 'label', 'legend',
'li', 'map', 'menu', 'ol', 'optgroup', 'option', 'p', 'pre', 'q', 's',
'samp', 'select', 'small', 'span', 'strike', 'strong', 'sub', 'sup',
'table', 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'tt',
'u', 'ul', 'var']
acceptable_attributes = ['abbr', 'accept', 'accept-charset', 'accesskey',
'action', 'align', 'alt', 'axis', 'border', 'cellpadding',
'cellspacing', 'char', 'charoff', 'charset', 'checked', 'cite', 'class',
'clear', 'cols', 'colspan', 'color', 'compact', 'coords', 'datetime',
'dir', 'disabled', 'enctype', 'for', 'frame', 'headers', 'height',
'href', 'hreflang', 'hspace', 'id', 'ismap', 'label', 'lang',
'longdesc', 'maxlength', 'media', 'method', 'multiple', 'name',
'nohref', 'noshade', 'nowrap', 'prompt', 'readonly', 'rel', 'rev',
'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape', 'size',
'span', 'src', 'start', 'summary', 'tabindex', 'target', 'title',
'type', 'usemap', 'valign', 'value', 'vspace', 'width']
# Stupid hack to pass this unit test:
# http://feedparser.org/tests/wellformed/rss/
# item_description_not_a_doctype.xml
html.gsub!(/" + html + "").to_rexml
sanitize_node = lambda do |html_node|
if html_node.respond_to? :children
for child in html_node.children
if child.kind_of? REXML::Element
unless acceptable_elements.include? child.name
if mode == :strip
html_node.delete_element(child)
else
new_child = REXML::Text.new(CGI.escapeHTML(child.to_s))
html_node.insert_after(child, new_child)
html_node.delete_element(child)
end
end
for attribute in child.attributes.keys
unless acceptable_attributes.include? attribute
child.delete_attribute(attribute)
end
end
end
sanitize_node.call(child)
end
end
html_node
end
sanitize_node.call(html_doc.root)
return html_doc.root.inner_xml
end
class Feed
include REXML
include AttributeDictionary
# Loads the feed specified by the url, pulling the data from the cache if it hasn't expired.
def Feed.open(url)
# clean up the url
url = FeedTools.normalize_url(url)
# create and load the new feed
feed = Feed.new
feed.url = url
feed.update
return feed
end
# Loads the feed from the remote url if the feed has expired from the cache or cannot be
# retrieved from the cache for some reason.
def update
if self.http_headers.nil? && !(self.cache_object.nil?) &&
!(self.cache_object.http_headers.nil?)
@http_headers = YAML.load(self.cache_object.http_headers)
end
if expired?
load_remote_feed
else
@live = false
end
end
# Attempts to load the feed from the remote location. Requires the url
# field to be set. If an etag or the last_modified date has been set,
# attempts to use them to prevent unnecessary reloading of identical
# content.
def load_remote_feed
@live = true
if self.http_headers.nil? && !(self.cache_object.nil?) &&
!(self.cache_object.http_headers.nil?)
@http_headers = YAML.load(self.cache_object.http_headers)
end
if (self.url =~ /^feed:/) == 0
# Woah, Nelly, how'd that happen? You should've already been
# corrected. So let's fix that url. And please,
# just use less crappy browsers instead of badly defined
# pseudo-protocol hacks.
self.url = FeedTools.normalize_url(self.url)
end
# Find out what method we're going to be using to obtain this feed.
uri = URI.parse(self.url)
retrieval_method = "http"
case uri.scheme
when "http"
retrieval_method = "http"
when "ftp"
retrieval_method = "ftp"
when "file"
retrieval_method = "file"
when nil
raise FeedAccessError,
"No protocol was specified in the url."
else
raise FeedAccessError,
"Cannot retrieve feed using unrecognized protocol: " + uri.scheme
end
# No need for http headers unless we're actually doing http
if retrieval_method == "http"
# Set up the appropriate http headers
headers = {}
unless self.http_headers.nil?
headers["If-None-Match"] =
self.http_headers['etag'] unless self.http_headers['etag'].nil?
headers["If-Modified-Since"] =
self.http_headers['last-modified'] unless
self.http_headers['last-modified'].nil?
end
headers["User-Agent"] =
FeedTools.user_agent unless FeedTools.user_agent.nil?
# The http feed access method
def http_fetch(feed_url, http_headers, redirect_limit = 10,
response_chain = []) # :nodoc:
raise FeedAccessError, 'Redirect too deep' if redirect_limit == 0
feed_uri = nil
begin
feed_uri = URI.parse(feed_url)
rescue URI::InvalidURIError
# Uh, maybe try to fix it?
feed_uri = URI.parse(FeedTools.normalize_url(feed_url))
end
# Borrowed from open-uri:
# According to RFC2616 14.23, Host: request-header field should be
# set to an origin server.
# But net/http wrongly set a proxy server if an absolute URI is
# specified as a request URI.
# So override it here explicitly.
http_headers['Host'] = feed_uri.host
http_headers['Host'] += ":#{feed_uri.port}" if feed_uri.port
Net::HTTP.start(feed_uri.host, (feed_uri.port or 80)) do |http|
response = http.request_get(feed_uri.path, http_headers)
case response
when Net::HTTPSuccess
# We've reached the final destination, process all previous
# redirections, and see if we need to update the url.
for redirected_response in response_chain
if redirected_response.last.code.to_i == 301
self.url = redirected_response.first
else
# Jump out as soon as we hit anything that isn't a
# permanently moved redirection.
break
end
end
return response
when Net::HTTPRedirection
if response.code.to_i == 304
response.error!
else
if response['Location'].nil?
raise FeedAccessError,
"No location to redirect to supplied: " + response.code
end
response_chain << [feed_url, response]
new_location = response['location']
if response_chain.assoc(new_location) != nil
raise FeedAccessError, "Redirection loop detected."
end
# TODO: deal with stupid people using relative urls
# in Location header
# =================================================
http_fetch(new_location, http_headers,
redirect_limit - 1, response_chain)
end
else
response.error!
end
end
end
begin
@http_response = http_fetch(self.url, headers)
@http_headers = {}
self.http_response.each_header do |header|
self.http_headers[header.first.downcase] = header.last
end
self.last_retrieved = Time.now
self.xml_data = self.http_response.body
rescue FeedAccessError
@live = false
if self.xml_data.nil?
raise
end
rescue Timeout::Error
# if we time out, do nothing, it should fall back to the xml_data
# stored in the cache.
@live = false
if self.xml_data.nil?
raise
end
rescue Errno::ECONNRESET
# if the connection gets reset by peer, oh well, fall back to the
# xml_data stored in the cache
@live = false
if self.xml_data.nil?
raise
end
rescue => error
# heck, if anything at all bad happens, fall back to the xml_data
# stored in the cache.
# If we can, get the HTTPResponse...
@http_response = nil
if error.respond_to?(:each_header)
@http_response = error
end
if error.respond_to?(:response) &&
error.response.respond_to?(:each_header)
@http_response = error.response
end
if @http_response != nil
@http_headers = {}
self.http_response.each_header do |header|
self.http_headers[header.first] = header.last
end
if self.http_response.code.to_i == 304
self.last_retrieved = Time.now
end
end
@live = false
if self.xml_data.nil?
raise
end
end
elsif retrieval_method == "https"
# Not supported... yet
elsif retrieval_method == "ftp"
# Not supported... yet
# Technically, CDF feeds are supposed to be able to be accessed directly
# from an ftp server. This is silly, but we'll humor Microsoft.
#
# Eventually.
elsif retrieval_method == "file"
# Now that we've gone to all that trouble to ensure the url begins
# with 'file://', strip the 'file://' off the front of the url.
file_name = self.url.gsub(/^file:\/\//, "")
begin
open(file_name) do |file|
@http_response = nil
@http_headers = {}
self.last_retrieved = Time.now
self.xml_data = file.read
end
rescue
@live = false
# In this case, pulling from the cache is probably not going
# to help at all, and the use should probably be immediately
# appraised of the problem. Raise the exception.
raise
end
end
unless self.cache_object.nil?
begin
self.save
rescue
end
end
end
# Returns the relevant information from an http request.
def http_response
return @http_response
end
# Returns a hash of the http headers from the response.
def http_headers
return @http_headers
end
# Returns the feed's raw xml data.
def xml_data
if @xml_data.nil?
unless self.cache_object.nil?
@xml_data = self.cache_object.xml_data
end
end
return @xml_data
end
# Sets the feed's xml data.
def xml_data=(new_xml_data)
@xml_data = new_xml_data
unless self.cache_object.nil?
self.cache_object.xml_data = new_xml_data
end
end
# Returns a REXML Document of the xml_data
def xml
if @xml_doc.nil?
begin
@xml_doc = Document.new(xml_data)
rescue
# Something failed, attempt to repair the xml with htree.
@xml_doc = HTree.parse(xml_data).to_rexml
end
end
return @xml_doc
end
# Returns the first node within the channel_node that matches the xpath query.
def find_node(xpath)
return XPath.first(channel_node, xpath)
end
# Returns all nodes within the channel_node that match the xpath query.
def find_all_nodes(xpath)
return XPath.match(channel_node, xpath)
end
# Returns the root node of the feed.
def root_node
if @root_node.nil?
@root_node = xml.root
end
return @root_node
end
# Returns the channel node of the feed.
def channel_node
if @channel_node.nil?
@channel_node = XPath.first(root_node, "channel")
if @channel_node == nil
@channel_node = XPath.first(root_node, "feedinfo")
end
if @channel_node == nil
@channel_node = root_node
end
end
return @channel_node
end
# The cache object that handles the feed persistence.
def cache_object
unless FeedTools.feed_cache.nil?
if @cache_object.nil?
begin
if @id != nil
@cache_object = FeedTools.feed_cache.find_by_id(@id)
elsif @url != nil
@cache_object = FeedTools.feed_cache.find_by_url(@url)
end
if @cache_object.nil?
@cache_object = FeedTools.feed_cache.new
end
rescue
end
end
end
return @cache_object
end
# Sets the cache object for this feed.
#
# This can be any object, but it must accept the following messages:
# url
# url=
# title
# title=
# link
# link=
# xml_data
# xml_data=
# etag
# etag=
# last_modified
# last_modified=
# save
def cache_object=(new_cache_object)
@cache_object = new_cache_object
end
# Returns the feed's unique id
def id
if @id.nil?
@id = XPath.first(root_node, "id/text()").to_s
if @id == ""
@id = XPath.first(root_node, "guid/text()").to_s
end
@id = nil if @id == ""
end
return @id
end
# Sets the feed's unique id
def id=(new_id)
@id = new_id
end
# Returns the feed url.
def url
if @url.nil? && self.xml_data != nil
@url = XPath.first(channel_node, "link[@rel='self']/@href").to_s
@url = nil if @url == ""
end
return @url
end
# Sets the feed url and prepares the cache_object if necessary.
def url=(new_url)
@url = FeedTools.normalize_url(new_url)
self.cache_object.url = new_url unless self.cache_object.nil?
end
# Returns the feed title
def title
if @title.nil?
if XPath.first(channel_node, "title/@type").to_s == "xhtml" ||
XPath.first(channel_node, "title/@mode").to_s == "xhtml"
@title = XPath.first(channel_node, "title").inner_xml
elsif XPath.first(channel_node, "title/@type").to_s == "escaped" ||
XPath.first(channel_node, "title/@mode").to_s == "escaped"
@title = CGI.unescapeHTML(
XPath.first(channel_node, "title/text()").to_s)
else
@title = CGI.unescapeHTML(
XPath.first(channel_node, "title/text()").to_s)
end
unless @title.nil?
@title = CGI.unescapeHTML(FeedTools.sanitize_html(@title, :strip))
end
if @title != "" && !(@title.nil?)
@title = FeedTools.strip_html(@title).strip
end
@title.gsub!(/\n/, " ")
@title = nil if @title == ""
self.cache_object.title = @title unless self.cache_object.nil?
end
return @title
end
# Sets the feed title
def title=(new_title)
@title = new_title
self.cache_object.title = new_title unless self.cache_object.nil?
end
# Returns the feed description
def description
if @description.nil?
# get the feed description from the xml document
@description = XPath.first(channel_node, "description/text()").to_s
if @description != ""
if XPath.first(channel_node, "description/@encoding").to_s != ""
@description = "[Embedded data objects are not supported.]"
else
@description = CGI.unescapeHTML(description)
end
end
if @description == ""
@description = XPath.first(channel_node, "subtitle/text()").to_s
if @description != "" &&
XPath.first(channel_node, "subtitle/@mode").to_s == "escaped"
@description = CGI.unescapeHTML(description)
end
end
if @description == ""
@description = XPath.first(channel_node, "tagline/text()").to_s
if @description != "" &&
XPath.first(channel_node, "tagline/@mode").to_s == "escaped"
@description = CGI.unescapeHTML(description)
end
end
if @description == "" && XPath.first(channel_node, "tagline") == nil
@description = XPath.first(channel_node, "info/text()").to_s
if @description != "" &&
XPath.first(channel_node, "info/@mode").to_s == "escaped"
@description = CGI.unescapeHTML(description)
end
end
if @description == ""
@description = CGI.unescapeHTML(
XPath.first(channel_node, "abstract/text()").to_s)
end
if @description == ""
@description = CGI.unescapeHTML(
XPath.first(channel_node, "summary/text()").to_s)
end
if @description == ""
# I don't think this is valid for anyone to do, but this is probably
# what they meant if they do it.
@description = CGI.unescapeHTML(
XPath.first(channel_node, "content:encoded/text()").to_s)
if @description != ""
@bozo = true
end
end
if @description == ""
begin
@description = XPath.first(channel_node, "description").inner_xml
rescue
end
end
if @description == ""
@description = self.itunes_summary
@description = "" if @description.nil?
end
if @description == ""
@description = self.itunes_subtitle
@description = "" if @description.nil?
end
@description =
FeedTools.sanitize_html(@description) unless @description.nil?
# If it started with a bunch of divs, hack them right off. We can put
# them back later if they're needed.
@description.gsub!(/^(]*>)*/, "")
@description.gsub!(/(<\/div>)*$/, "")
@description.gsub!(/\n/, " ") if @description.size < 80
@description = @description.strip unless @description.nil?
@description = nil if @description == ""
end
return @description
end
# Sets the feed description
def description=(new_description)
@description = new_description
end
# Returns the contents of the itunes:summary element
def itunes_summary
if @itunes_summary.nil?
@itunes_summary = CGI.unescapeHTML(XPath.first(root_node,
"itunes:summary/text()").to_s)
if @itunes_summary == ""
@itunes_summary = nil
end
@itunes_summary =
FeedTools.sanitize_html(@itunes_summary) unless @itunes_summary.nil?
end
return @itunes_summary
end
# Sets the contents of the itunes:summary element
def itunes_summary=(new_itunes_summary)
@itunes_summary = new_itunes_summary
end
# Returns the contents of the itunes:subtitle element
def itunes_subtitle
if @itunes_subtitle.nil?
@itunes_subtitle = CGI.unescapeHTML(XPath.first(root_node,
"itunes:subtitle/text()").to_s)
if @itunes_subtitle == ""
@itunes_subtitle = nil
end
unless @itunes_subtitle.nil?
@itunes_subtitle = FeedTools.sanitize_html(@itunes_subtitle)
end
end
return @itunes_subtitle
end
# Sets the contents of the itunes:subtitle element
def itunes_subtitle=(new_itunes_subtitle)
@itunes_subtitle = new_itunes_subtitle
end
# Returns the feed link
def link
if @link.nil?
# get the feed link from the xml document
@link = XPath.first(channel_node, "link[@rel='alternate' @type='text/html']/@href").to_s
if @link == ""
@link = XPath.first(channel_node, "link[@rel='alternate']/@href").to_s
end
if @link == ""
@link = XPath.first(channel_node, "link/@href").to_s
end
if @link == ""
@link = XPath.first(channel_node, "link/text()").to_s
end
if @link == ""
@link = XPath.first(channel_node, "@href").to_s
end
if @link == ""
if FeedTools.is_url? self.guid
@link = self.guid
end
end
if @link == ""
# Technically, we shouldn't use the base attribute for this, but if the href attribute
# is missing, it's already a given that we're looking at a messed up CDF file. We can
# always pray it's correct.
@link = XPath.first(channel_node, "@base").to_s
end
@link = FeedTools.normalize_url(@link)
unless self.cache_object.nil?
self.cache_object.link = @link
end
end
return @link
end
# Sets the feed link
def link=(new_link)
@link = new_link
unless self.cache_object.nil?
self.cache_object.link = new_link
end
end
# Returns the feed image link
def image_link
if @image_link.nil?
# get the feed image link from the xml document
@image_link = XPath.first(channel_node, "image/url/text()").to_s
if @image_link == ""
@image_link = XPath.first(channel_node, "image/@rdf:resource").to_s
end
if @image_link == ""
@image_link = XPath.first(channel_node, "link[@type='image/jpeg']/@href").to_s
end
if @image_link == ""
@image_link = XPath.first(channel_node, "link[@type='image/gif']/@href").to_s
end
if @image_link == ""
@image_link = XPath.first(channel_node, "link[@type='image/png']/@href").to_s
end
if @image_link == ""
@image_link = XPath.first(channel_node, "logo[@style='image']/@href").to_s
end
if @image_link == ""
@image_link = XPath.first(channel_node, "logo/@href").to_s
end
@image_link = FeedTools.normalize_url(@image_link)
end
return @image_link
end
# Sets the feed image link
def image_link=(new_image_link)
@image_link = new_image_link
end
# Returns the url to the icon file for this feed.
#
# This method uses the url from the link field in order to avoid grabbing
# the favicon for services like feedburner.
def icon_link
if @icon_link.nil?
@icon_link = XPath.first(channel_node,
"link[@rel='icon']/@href").to_s
if @icon_link == ""
@icon_link = XPath.first(channel_node,
"link[@rel='shortcut icon']/@href").to_s
end
if @icon_link == ""
@icon_link = XPath.first(channel_node,
"link[@type='image/x-icon']/@href").to_s
end
if @icon_link == ""
@icon_link = XPath.first(channel_node,
"icon/@href").to_s
end
if @icon_link == ""
@icon_link = XPath.first(channel_node,
"icon/text()").to_s
end
if @icon_link == ""
link_uri = URI.parse(FeedTools.normalize_url(self.link))
@icon_link =
link_uri.scheme + "://" + link_uri.host + "/favicon.ico"
end
end
return @icon_link
end
# Returns the number of seconds before the feed should expire
def time_to_live
if @time_to_live.nil?
# get the feed time to live from the xml document
update_frequency = XPath.first(channel_node, "syn:updateFrequency/text()").to_s
if update_frequency != ""
update_period = XPath.first(channel_node, "syn:updatePeriod/text()").to_s
if update_period == "daily"
@time_to_live = update_frequency.to_i * 24
elsif update_period == "weekly"
@time_to_live = update_frequency.to_i * 24 * 7
elsif update_period == "monthly"
@time_to_live = update_frequency.to_i * 24 * 30
elsif update_period == "yearly"
@time_to_live = update_frequency.to_i * 24 * 365
else
# hourly
@time_to_live = update_frequency.to_i
end
end
end
if @time_to_live.nil?
# expressed in minutes
update_frequency = XPath.first(channel_node, "ttl/text()").to_s
if update_frequency != ""
@time_to_live = (update_frequency.to_i / 60)
end
end
if @time_to_live.nil?
@time_to_live = 0
update_frequency_days = XPath.first(channel_node, "schedule/intervaltime/@days").to_s
update_frequency_hours = XPath.first(channel_node, "schedule/intervaltime/@hour").to_s
update_frequency_minutes = XPath.first(channel_node, "schedule/intervaltime/@min").to_s
update_frequency_seconds = XPath.first(channel_node, "schedule/intervaltime/@sec").to_s
if update_frequency_days != ""
@time_to_live = @time_to_live + update_frequency_days.to_i * 24
end
if update_frequency_hours != ""
@time_to_live = @time_to_live + update_frequency_hours.to_i * 1
end
if update_frequency_minutes != ""
@time_to_live = @time_to_live + update_frequency_minutes.to_i / 60
end
if update_frequency_seconds != ""
@time_to_live = @time_to_live + update_frequency_seconds.to_i / 3600
end
if @time_to_live == 0
@time_to_live = nil
end
end
if @time_to_live.nil? || @time_to_live == 0
# Default to one hour
@time_to_live = 1
end
@time_to_live = @time_to_live.round
return @time_to_live.hour
end
# Sets the feed time to live
def time_to_live=(new_time_to_live)
@time_to_live = (new_time_to_live / 3600).round
@time_to_live = 1 if @time_to_live < 1
end
# Returns the feed generator
def generator
if @generator.nil?
@generator = XPath.first(channel_node, "generator/text()").to_s
@generator = FeedTools.strip_html(@generator)
@generator = nil if @generator == ""
end
return @generator
end
# Sets the feed generator
def generator=(new_generator)
@generator = new_generator
end
# Returns the feed docs
def docs
if @docs.nil?
@docs = XPath.first(channel_node, "docs/text()").to_s
@docs = FeedTools.strip_html(@docs)
@docs = nil if @docs == ""
end
return @docs
end
# Sets the feed docs
def docs=(new_docs)
@docs = new_docs
end
# Returns the feed language
def language
if @language.nil?
@language = XPath.first(channel_node, "language/text()").to_s
if @language == ""
@language = XPath.first(channel_node, "dc:language/text()").to_s
end
if @language == ""
@language = XPath.first(channel_node, "xml:lang/text()").to_s
end
if @language == ""
@language = XPath.first(root_node, "xml:lang/text()").to_s
end
if @language == ""
@language = "en-us"
end
@language = @language.downcase
@language = nil if @language == ""
end
return @language
end
# Sets the feed language
def language=(new_language)
@language = new_language
end
# Returns true if this feed contains explicit material.
def explicit
if @explicit.nil?
if XPath.first(channel_node,
"media:adult/text()").to_s.downcase == "true" ||
XPath.first(channel_node,
"itunes:explicit/text()").to_s.downcase == "yes" ||
XPath.first(channel_node,
"itunes:explicit/text()").to_s.downcase == "true"
@explicit = true
else
@explicit = false
end
end
return @explicit
end
# Sets whether or not the feed contains explicit material
def explicit=(new_explicit)
@explicit = (new_explicit ? true : false)
end
# Returns the feed items
def items
if @items.nil?
raw_items = XPath.match(root_node, "item")
if raw_items == nil || raw_items == []
raw_items = XPath.match(channel_node, "item")
end
if raw_items == nil || raw_items == []
raw_items = XPath.match(channel_node, "entry")
end
# create the individual feed items
@items = []
if raw_items != nil
for item_node in raw_items
new_item = FeedItem.new
new_item.xml_data = item_node.to_s
new_item.feed = self
@items << new_item
end
end
end
# Sort the items
@items = @items.sort do |a,b|
(b.time or Time.mktime(1970)) <=> (a.time or Time.mktime(1970))
end
return @items
end
# The time that the feed was last requested from the remote server. Nil if it has
# never been pulled, or if it was created from scratch.
def last_retrieved
unless self.cache_object.nil?
@last_retrieved = self.cache_object.last_retrieved
end
return @last_retrieved
end
# Sets the time that the feed was last updated.
def last_retrieved=(new_last_retrieved)
@last_retrieved = new_last_retrieved
unless self.cache_object.nil?
self.cache_object.last_retrieved = new_last_retrieved
end
end
# True if this feed contains audio content enclosures
def podcast?
podcast = false
$test_feed.items.each do |item|
item.enclosures.each do |enclosure|
podcast = true if enclosure.audio?
end
end
return podcast
end
# True if this feed contains video content enclosures
def vidlog?
vidlog = false
$test_feed.items.each do |item|
item.enclosures.each do |enclosure|
vidlog = true if enclosure.video?
end
end
return vidlog
end
# True if this feed is malformed somehow
def bozo?
if @bozo.nil?
@bozo = false
end
return @bozo
end
# True if the feed was not last retrieved from the cache.
def live?
return @live
end
# True if the feed has expired and must be reacquired from the remote server.
def expired?
return self.last_retrieved == nil || (self.last_retrieved + self.time_to_live.hour) < Time.now
end
# Forces this feed to expire.
def expire
self.last_retrieved = Time.mktime(1970)
self.save
end
# A hook method that is called during the feed generation process. Overriding this method
# will enable additional content to be inserted into the feed.
def build_xml_hook(feed_type, version, xml_builder)
return nil
end
# Generates xml based on the content of the feed
def build_xml(feed_type="rss", version=0.0, xml_builder=Builder::XmlMarkup.new(:indent => 2))
if feed_type == "rss" && version == 0.0
version = 1.0
elsif feed_type == "atom" && version == 0.0
version = 0.3
end
if feed_type == "rss" && (version == 0.9 || version == 1.0 || version == 1.1)
# RDF-based rss format
return xml_builder.tag!("rdf:RDF") do
xml_builder.channel("rdf:about" => CGI.escapeHTML(link)) do
unless title.nil? || title == ""
xml_builder.title(title)
else
xml_builder.title
end
unless link.nil? || link == ""
xml_builder.link(link)
else
xml_builder.link
end
unless image_link.nil? || image_link == ""
xml_builder.image("rdf:resource" => CGI.escapeHTML(image_link))
end
unless description.nil? || description == ""
xml_builder.description(description)
else
xml_builder.description
end
unless language.nil? || language == ""
xml_builder.tag!("dc:language", language)
end
xml_builder.tag!("syn:updatePeriod", "hourly")
xml_builder.tag!("syn:updateFrequency", (time_to_live / 1.hour).to_s)
xml_builder.tag!("syn:updateBase", Time.mktime(1970).iso8601)
xml_builder.items do
xml_builder.tag!("rdf:Seq") do
unless items.nil?
for item in items
if item.link.nil?
raise "Cannot generate an rdf-based feed with a nil item link field."
end
xml_builder.tag!("rdf:li", "rdf:resource" => CGI.escapeHTML(item.link))
end
end
end
end
build_xml_hook(feed_type, version, xml_builder)
end
unless image_link.nil? || image_link == ""
xml_builder.image("rdf:about" => CGI.escapeHTML(image_link)) do
unless title.nil? || title == ""
xml_builder.title(title)
else
xml_builder.title
end
unless image_link.nil? || image_link == ""
xml_builder.url(image_link)
end
unless link.nil? || link == ""
xml_builder.link(link)
else
xml_builder.link
end
end
end
unless items.nil?
for item in items
item.build_xml(feed_type, version, xml_builder)
end
end
end
elsif feed_type == "rss"
# normal rss format
return xml_builder.rss("version" => version.to_s) do
unless title.nil? || title == ""
xml_builder.title(title)
end
unless link.nil? || link == ""
xml_builder.link(link)
end
unless description.nil? || description == ""
xml_builder.description(description)
end
xml_builder.ttl((time_to_live / 1.minute).to_s)
xml_builder.generator("http://www.sporkmonger.com/projects/feedtools")
build_xml_hook(feed_type, version, xml_builder)
unless items.nil?
for item in items
item.build_xml(feed_type, version, xml_builder)
end
end
end
elsif feed_type == "atom"
# normal atom format
return xml_builder.feed("xmlns" => "http://purl.org/atom/ns#",
"version" => version.to_s,
"xml:lang" => language) do
unless title.nil? || title == ""
xml_builder.title(title,
"mode" => "escaped",
"type" => "text/html")
end
unless link.nil? || link == ""
xml_builder.link("href" => link,
"rel" => "alternate",
"type" => "text/html",
"title" => title)
end
unless description.nil? || description == ""
xml_builder.tagline(description,
"mode" => "escaped",
"type" => "text/html")
end
xml_builder.generator("FeedTools",
"url" => "http://www.sporkmonger.com/projects/feedtools")
build_xml_hook(feed_type, version, xml_builder)
unless items.nil?
for item in items
item.build_xml(feed_type, version, xml_builder)
end
end
end
end
end
# Persists the current feed state to the cache.
def save
if FeedTools.feed_cache.nil?
raise "Caching is currently disabled. Cannot save to cache."
elsif self.url.nil?
raise "The url field must be set to save to the cache."
elsif self.xml_data.nil?
raise "The xml_data field must be set to save to the cache."
elsif self.cache_object.nil?
raise "The cache_object is currently nil. Cannot save to cache."
else
self.cache_object.url = self.url
self.cache_object.title = self.title
self.cache_object.link = self.link
self.cache_object.xml_data = self.xml_data
unless self.http_response.nil?
self.cache_object.http_headers = self.http_headers.to_yaml
end
self.cache_object.last_retrieved = self.last_retrieved
self.cache_object.save
end
end
alias_method :tagline, :description
alias_method :tagline=, :description=
alias_method :subtitle, :description
alias_method :subtitle=, :description=
alias_method :abstract, :description
alias_method :abstract=, :description=
alias_method :content, :description
alias_method :content=, :description=
alias_method :ttl, :time_to_live
alias_method :ttl=, :time_to_live=
alias_method :guid, :id
alias_method :guid=, :id=
alias_method :entries, :items
# passes missing methods to the cache_object
def method_missing(msg, *params)
if self.cache_object.nil?
raise NoMethodError, "Invalid method #{msg.to_s}"
end
return self.cache_object.send(msg, params)
end
# passes missing methods to the FeedTools.feed_cache
def Feed.method_missing(msg, *params)
if FeedTools.feed_cache.nil?
raise NoMethodError, "Invalid method Feed.#{msg.to_s}"
end
result = FeedTools.feed_cache.send(msg, params)
if result.kind_of? FeedTools.feed_cache
result = Feed.open(result.url)
end
return result
end
end
class FeedItem
include REXML
include AttributeDictionary
# This class stores information about a feed item's file enclosures.
class Enclosure
include AttributeDictionary
# The url for the enclosure
attr_accessor :url
# The MIME type of the file referenced by the enclosure
attr_accessor :type
# The size of the file referenced by the enclosure
attr_accessor :file_size
# The total play time of the file referenced by the enclosure
attr_accessor :duration
# The height in pixels of the enclosed media
attr_accessor :height
# The width in pixels of the enclosed media
attr_accessor :width
# The bitrate of the enclosed media
attr_accessor :bitrate
# The framerate of the enclosed media
attr_accessor :framerate
# The thumbnail for this enclosure
attr_accessor :thumbnail
# The categories for this enclosure
attr_accessor :categories
# A hash of the enclosed file
attr_accessor :hash
# A website containing some kind of media player instead of a direct
# link to the media file.
attr_accessor :player
# A list of credits for the enclosed media
attr_accessor :credits
# A text rendition of the enclosed media
attr_accessor :text
# A list of alternate version of the enclosed media file
attr_accessor :versions
# The default version of the enclosed media file
attr_accessor :default_version
# Returns true if this is the default enclosure
def is_default?
return @is_default
end
# Sets whether this is the default enclosure for the media group
def is_default=(new_is_default)
@is_default = new_is_default
end
# Returns true if the enclosure contains explicit material
def explicit?
return @explicit
end
# Sets the explicit attribute on the enclosure
def explicit=(new_explicit)
@explicit = new_explicit
end
# Determines if the object is a sample, or the full version of the
# object, or if it is a stream.
# Possible values are 'sample', 'full', 'nonstop'.
def expression
return @expression
end
# Sets the expression attribute on the enclosure.
# Allowed values are 'sample', 'full', 'nonstop'.
def expression=(new_expression)
unless ['sample', 'full', 'nonstop'].include? new_expression.downcase
raise ArgumentError,
"Permitted values are 'sample', 'full', 'nonstop'."
end
@expression = new_expression.downcase
end
# Returns true if this enclosure contains audio content
def audio?
unless self.type.nil?
return true if (self.type =~ /^audio/) != nil
end
# TODO: create a more complete list
# =================================
audio_extensions = ['mp3', 'm4a', 'm4p', 'wav', 'ogg', 'wma']
audio_extensions.each do |extension|
if (url =~ /#{extension}$/) != nil
return true
end
end
return false
end
# Returns true if this enclosure contains video content
def video?
unless self.type.nil?
return true if (self.type =~ /^video/) != nil
return true if self.type == "image/mov"
end
# TODO: create a more complete list
# =================================
video_extensions = ['mov', 'mp4', 'avi', 'wmv', 'asf']
video_extensions.each do |extension|
if (url =~ /#{extension}$/) != nil
return true
end
end
return false
end
end
EnclosureCategory = Struct.new( "EnclosureCategory", :category, :scheme, :label )
EnclosureHash = Struct.new( "EnclosureHash", :hash, :type )
EnclosurePlayer = Struct.new( "EnclosurePlayer", :url, :height, :width )
EnclosureCredit = Struct.new( "EnclosureCredit", :name, :role )
EnclosureThumbnail = Struct.new( "EnclosureThumbnail", :url, :height, :width )
# Returns the parent feed of this feed item
def feed
return @feed
end
# Sets the parent feed of this feed item
def feed=(new_feed)
@feed = new_feed
end
# Returns the feed item's raw xml data.
def xml_data
return @xml_data
end
# Sets the feed item's xml data.
def xml_data=(new_xml_data)
@xml_data = new_xml_data
end
# Returns a REXML Document of the xml_data
def xml
if @xml_doc.nil?
@xml_doc = Document.new(xml_data)
end
return @xml_doc
end
# Returns the first node within the root_node that matches the xpath query.
def find_node(xpath)
return XPath.first(root_node, xpath)
end
# Returns all nodes within the root_node that match the xpath query.
def find_all_nodes(xpath)
return XPath.match(root_node, xpath)
end
# Returns the root node of the feed item.
def root_node
if @root_node.nil?
@root_node = xml.root
end
return @root_node
end
# Returns the feed item title
def title
if @title.nil?
if XPath.first(root_node, "title/@type").to_s == "xhtml" ||
XPath.first(root_node, "title/@mode").to_s == "xhtml"
@title = XPath.first(root_node, "title").inner_xml
elsif XPath.first(root_node, "title/@type").to_s == "escaped" ||
XPath.first(root_node, "title/@mode").to_s == "escaped"
@title = CGI.unescapeHTML(
XPath.first(root_node, "title/text()").to_s)
else
@title = CGI.unescapeHTML(
XPath.first(root_node, "title/text()").to_s)
end
unless @title.nil?
@title = CGI.unescapeHTML(FeedTools.sanitize_html(@title, :strip))
end
if @title != ""
# Some blogging tools include the number of comments in a post
# in the title... this is supremely ugly, and breaks any
# applications which expect the title to be static, so we're
# gonna strip them out.
#
# If for some incredibly wierd reason you need the actual
# unstripped title, just use find_node("title/text()").to_s
@title = FeedTools.strip_html(
@title.strip.gsub(/\[\d*\]$/, "")).strip
@title.gsub!(/\n/, " ")
end
@title = nil if @title == ""
end
return @title
end
# Sets the feed item title
def title=(new_title)
@title = new_title
end
# Returns the feed item description
def description
if @description.nil?
# get the item content
@description = ""
body_node = XPath.first(root_node, "xhtml:body")
if body_node == nil
body_node = XPath.first(root_node, "body")
end
if body_node != nil
@description = body_node.inner_xml
end
if @description == ""
@description =
CGI.unescapeHTML(XPath.first(root_node, "content:encoded/text()").to_s)
end
if @description == ""
begin
@description = XPath.first(root_node, "description").cdatas.first.to_s
rescue
@description = ""
end
if @description == ""
@description = XPath.first(root_node, "description/text()").to_s
end
if @description != ""
if XPath.first(root_node, "description/@encoding").to_s != ""
# Not supported... yet.
@description = "[Embedded data objects are not supported.]"
else
@description = CGI.unescapeHTML(@description)
end
end
end
if @description == ""
@description = XPath.first(root_node, "content/text()").to_s
if @description != "" &&
(XPath.first(root_node, "content/@mode").to_s == "escaped" ||
XPath.first(root_node, "content/@type").to_s == "escaped")
@description = CGI.unescapeHTML(@description)
end
if XPath.first(root_node, "content/@mode").to_s == "xhtml" ||
XPath.first(root_node, "content/@type").to_s == "xhtml"
@description = XPath.first(root_node, "content").inner_xml
end
end
if @description == ""
begin
@description = XPath.first(root_node, "description").inner_xml
rescue
end
end
if @description == ""
@description = self.itunes_summary
@description = "" if @description.nil?
end
if @description == ""
@description = self.itunes_subtitle
@description = "" if @description.nil?
end
if @description == ""
@description = self.media_text
@description = "" if @description.nil?
end
unless @description.nil?
@description = FeedTools.sanitize_html(@description)
end
# If it started with a bunch of divs, hack them right off. We can put
# them back later if they're needed.
@description.gsub!(/^(
]*>)*/, "")
@description.gsub!(/(<\/div>)*$/, "")
@description.gsub!(/\n/, " ") if @description.size < 80
@description = @description.strip unless @description.nil?
@description = nil if @description == ""
end
return @description
end
# Sets the feed item description
def description=(new_description)
@description = new_description
end
# Returns the feed item link
def link
if @link.nil?
@link = XPath.first(root_node, "link[@rel='alternate']/@href").to_s
if @link == ""
@link = XPath.first(root_node, "link/@href").to_s
end
if @link == ""
@link = XPath.first(root_node, "link/text()").to_s
end
if @link == ""
@link = XPath.first(root_node, "@rdf:about").to_s
end
if @link == ""
@link = XPath.first(root_node, "guid[@isPermaLink='true']/text()").to_s
end
if @link == ""
if FeedTools.is_url? self.guid
@link = self.guid
end
end
if @link != ""
@link = CGI.unescapeHTML(@link)
end
if @link != "" && (@link =~ /http:\/\//) != 0 && (@link =~ /https:\/\//) != 0
if (feed.base[-1..-1] == "/" && @link[0..0] == "/")
@link = @link[1..-1]
end
# prepend the base to the link since they seem to have used a relative path
@link = feed.base + @link
end
@link = FeedTools.normalize_url(@link)
end
return @link
end
# Sets the feed item link
def link=(new_link)
@link = new_link
end
# Returns the feed comment link
def comment_link
if @comment_link.nil?
# get the feed comment link from the xml document
@comment_link = XPath.first(root_node, "comments/text()").to_s
if @comment_link == ""
@comment_link = self.link
end
@comment_link = FeedTools.normalize_url(@comment_link)
end
return @comment_link
end
# Sets the feed comment link
def comment_link=(new_comment_link)
@comment_link = new_comment_link
end
# Returns the feed image link
def image_link
if @image_link.nil?
# get the feed image link from the xml document
if @image_link == ""
@image_link = XPath.first(root_node, "link[@type='image/jpeg']/@href").to_s
end
if @image_link == ""
@image_link = XPath.first(root_node, "link[@type='image/gif']/@href").to_s
end
if @image_link == ""
@image_link = XPath.first(root_node, "link[@type='image/png']/@href").to_s
end
# The following two should technically never occur, but have been included
# simply because I've seen both occuring in the wild at least once.
if @image_link == ""
@image_link = XPath.first(root_node, "image/url/text()").to_s
end
if @image_link == ""
@image_link = XPath.first(root_node, "image/@rdf:resource").to_s
end
if @image_link == ""
# If there's only a media thumbnail, we can just borrow it. Technically, this isn't
# ideal, but chances are very good that anything that makes use of this image is
# simply not going to care anyhow.
@image_link = XPath.first(root_node, "media:thumbnail/@url").to_s
if @image_link == ""
@media_image_link = @image_link
end
end
if @image_link == ""
# If there's only an itunes image, we can just borrow it. See comment above regarding
# less-than-ideal-ness.
if @itunes_image_link == ""
@image_link = XPath.first(root_node, "itunes:image/@href").to_s
if @image_link == ""
@image_link = XPath.first(root_node, "itunes:link[@rel='image']/@href").to_s
end
@itunes_image_link = @image_link
else
@image_link = @itunes_image_link
end
end
@image_link = FeedTools.normalize_url(@image_link)
end
return @image_link
end
# Sets the feed image link
def image_link=(new_image_link)
@image_link = new_image_link
end
# Returns the feed item itunes image link
#
# If it's not present, falls back to the normal image link.
# Technically, the itunes spec says that the image needs to be
# square and larger than 300x300, but hey, if there's an image
# to be had, it's better than none at all.
def itunes_image_link
if @itunes_image_link.nil?
# get the feed item itunes image link from the xml document
@itunes_image_link = XPath.first(root_node, "itunes:image/@href").to_s
if @itunes_image_link == ""
@itunes_image_link = XPath.first(root_node, "itunes:link[@rel='image']/@href").to_s
end
if @itunes_image_link == ""
@itunes_image_link = self.image_link
end
@itunes_image_link = FeedTools.normalize_url(@itunes_image_link)
end
return @itunes_image_link
end
# Sets the feed item itunes image link
def itunes_image_link=(new_itunes_image_link)
@itunes_image_link = new_itunes_image_link
end
# Returns the feed item media thumbnail link
#
# If it's not present, falls back to the normal image link.
def media_thumbnail_link
if @media_thumbnail_link.nil?
# get the feed item itunes image link from the xml document
@media_thumbnail_link = XPath.first(root_node, "media:thumbnail/@url").to_s
if @media_thumbnail_link == ""
@media_thumbnail_link = image_link
end
@media_thumbnail_link = FeedTools.normalize_url(@media_thumbnail_link)
end
return @media_thumbnail_link
end
# Sets the feed item media thumbnail url
def media_thumbnail_link=(new_media_thumbnail_link)
@media_thumbnail_link = new_media_thumbnail_link
end
# Returns the feed items's unique id
def id
if @id.nil?
@id = XPath.first(root_node, "id/text()").to_s
if @id == ""
@id = XPath.first(root_node, "guid/text()").to_s
end
@id = nil if @id == ""
end
return @id
end
# Sets the feed item's unique id
def id=(new_id)
@id = new_id
end
# Returns all feed item enclosures
def enclosures
if @enclosures.nil?
@enclosures = []
# First, load up all the different possible sources of enclosures
rss_enclosures = XPath.match(root_node, "enclosure")
atom_enclosures = XPath.match(root_node, "link[@rel='enclosure']")
media_content_enclosures = XPath.match(root_node, "media:content")
media_group_enclosures = XPath.match(root_node, "media:group")
# Parse RSS-type enclosures. Thanks to a few buggy enclosures implementations,
# sometimes these also manage to show up in atom files.
for enclosure_node in rss_enclosures
enclosure = Enclosure.new
enclosure.url = CGI.unescapeHTML(enclosure_node.attributes["url"].to_s)
enclosure.type = enclosure_node.attributes["type"].to_s
enclosure.file_size = enclosure_node.attributes["length"].to_i
enclosure.credits = []
enclosure.explicit = false
@enclosures << enclosure
end
# Parse atom-type enclosures. If there are repeats of the same enclosure object,
# we merge the two together.
for enclosure_node in atom_enclosures
enclosure_url = CGI.unescapeHTML(enclosure_node.attributes["href"].to_s)
enclosure = nil
new_enclosure = false
for existing_enclosure in @enclosures
if existing_enclosure.url == enclosure_url
enclosure = existing_enclosure
break
end
end
if enclosure.nil?
new_enclosure = true
enclosure = Enclosure.new
end
enclosure.url = enclosure_url
enclosure.type = enclosure_node.attributes["type"].to_s
enclosure.file_size = enclosure_node.attributes["length"].to_i
enclosure.credits = []
enclosure.explicit = false
if new_enclosure
@enclosures << enclosure
end
end
# Creates an anonymous method to parse content objects from the media module. We
# do this to avoid excessive duplication of code since we have to do identical
# processing for content objects within group objects.
parse_media_content = lambda do |media_content_nodes|
affected_enclosures = []
for enclosure_node in media_content_nodes
enclosure_url = CGI.unescapeHTML(enclosure_node.attributes["url"].to_s)
enclosure = nil
new_enclosure = false
for existing_enclosure in @enclosures
if existing_enclosure.url == enclosure_url
enclosure = existing_enclosure
break
end
end
if enclosure.nil?
new_enclosure = true
enclosure = Enclosure.new
end
enclosure.url = enclosure_url
enclosure.type = enclosure_node.attributes["type"].to_s
enclosure.file_size = enclosure_node.attributes["fileSize"].to_i
enclosure.duration = enclosure_node.attributes["duration"].to_s
enclosure.height = enclosure_node.attributes["height"].to_i
enclosure.width = enclosure_node.attributes["width"].to_i
enclosure.bitrate = enclosure_node.attributes["bitrate"].to_i
enclosure.framerate = enclosure_node.attributes["framerate"].to_i
enclosure.expression = enclosure_node.attributes["expression"].to_s
enclosure.is_default =
(enclosure_node.attributes["isDefault"].to_s.downcase == "true")
if XPath.first(enclosure_node, "media:thumbnail/@url").to_s != ""
enclosure.thumbnail = EnclosureThumbnail.new(
CGI.unescapeHTML(XPath.first(enclosure_node, "media:thumbnail/@url").to_s),
CGI.unescapeHTML(XPath.first(enclosure_node, "media:thumbnail/@height").to_s),
CGI.unescapeHTML(XPath.first(enclosure_node, "media:thumbnail/@width").to_s)
)
if enclosure.thumbnail.height == ""
enclosure.thumbnail.height = nil
end
if enclosure.thumbnail.width == ""
enclosure.thumbnail.width = nil
end
end
enclosure.categories = []
for category in XPath.match(enclosure_node, "media:category")
enclosure.categories << EnclosureCategory.new(
CGI.unescapeHTML(category.text),
CGI.unescapeHTML(category.attributes["scheme"].to_s),
CGI.unescapeHTML(category.attributes["label"].to_s)
)
if enclosure.categories.last.scheme == ""
enclosure.categories.last.scheme = nil
end
if enclosure.categories.last.label == ""
enclosure.categories.last.label = nil
end
end
if XPath.first(enclosure_node, "media:hash/text()").to_s != ""
enclosure.hash = EnclosureHash.new(
FeedTools.sanitize_html(CGI.unescapeHTML(XPath.first(
enclosure_node, "media:hash/text()").to_s), :strip),
"md5"
)
end
if XPath.first(enclosure_node, "media:player/@url").to_s != ""
enclosure.player = EnclosurePlayer.new(
CGI.unescapeHTML(XPath.first(enclosure_node, "media:player/@url").to_s),
CGI.unescapeHTML(XPath.first(enclosure_node, "media:player/@height").to_s),
CGI.unescapeHTML(XPath.first(enclosure_node, "media:player/@width").to_s)
)
if enclosure.player.height == ""
enclosure.player.height = nil
end
if enclosure.player.width == ""
enclosure.player.width = nil
end
end
enclosure.credits = []
for credit in XPath.match(enclosure_node, "media:credit")
enclosure.credits << EnclosureCredit.new(
CGI.unescapeHTML(CGI.unescapeHTML(credit.text)),
CGI.unescapeHTML(credit.attributes["role"].to_s.downcase)
)
if enclosure.credits.last.role == ""
enclosure.credits.last.role = nil
end
end
enclosure.explicit = (XPath.first(enclosure_node,
"media:adult/text()").to_s.downcase == "true")
if XPath.first(enclosure_node, "media:text/text()").to_s != ""
enclosure.text = CGI.unescapeHTML(XPath.first(enclosure_node,
"media:text/text()").to_s)
end
affected_enclosures << enclosure
if new_enclosure
@enclosures << enclosure
end
end
affected_enclosures
end
# Parse the independant content objects.
parse_media_content.call(media_content_enclosures)
media_groups = []
# Parse the group objects.
for media_group in media_group_enclosures
group_media_content_enclosures =
XPath.match(media_group, "media:content")
# Parse the content objects within the group objects.
affected_enclosures =
parse_media_content.call(group_media_content_enclosures)
# Now make sure that content objects inherit certain properties from
# the group objects.
for enclosure in affected_enclosures
if enclosure.thumbnail.nil? &&
XPath.first(media_group, "media:thumbnail/@url").to_s != ""
enclosure.thumbnail = EnclosureThumbnail.new(
CGI.unescapeHTML(
XPath.first(media_group, "media:thumbnail/@url").to_s),
CGI.unescapeHTML(
XPath.first(media_group, "media:thumbnail/@height").to_s),
CGI.unescapeHTML(
XPath.first(media_group, "media:thumbnail/@width").to_s)
)
if enclosure.thumbnail.height == ""
enclosure.thumbnail.height = nil
end
if enclosure.thumbnail.width == ""
enclosure.thumbnail.width = nil
end
end
if (enclosure.categories.nil? || enclosure.categories.size == 0)
enclosure.categories = []
for category in XPath.match(media_group, "media:category")
enclosure.categories << EnclosureCategory.new(
CGI.unescapeHTML(category.text),
CGI.unescapeHTML(category.attributes["scheme"].to_s),
CGI.unescapeHTML(category.attributes["label"].to_s)
)
if enclosure.categories.last.scheme == ""
enclosure.categories.last.scheme = nil
end
if enclosure.categories.last.label == ""
enclosure.categories.last.label = nil
end
end
end
if enclosure.hash.nil? &&
XPath.first(media_group, "media:hash/text()").to_s != ""
enclosure.hash = EnclosureHash.new(
CGI.unescapeHTML(XPath.first(media_group, "media:hash/text()").to_s),
"md5"
)
end
if enclosure.player.nil? &&
XPath.first(media_group, "media:player/@url").to_s != ""
enclosure.player = EnclosurePlayer.new(
CGI.unescapeHTML(XPath.first(media_group, "media:player/@url").to_s),
CGI.unescapeHTML(XPath.first(media_group, "media:player/@height").to_s),
CGI.unescapeHTML(XPath.first(media_group, "media:player/@width").to_s)
)
if enclosure.player.height == ""
enclosure.player.height = nil
end
if enclosure.player.width == ""
enclosure.player.width = nil
end
end
if enclosure.credits.nil? || enclosure.credits.size == 0
enclosure.credits = []
for credit in XPath.match(media_group, "media:credit")
enclosure.credits << EnclosureCredit.new(
CGI.unescapeHTML(CGI.unescapeHTML(credit.text)),
CGI.unescapeHTML(credit.attributes["role"].to_s.downcase)
)
if enclosure.credits.last.role == ""
enclosure.credits.last.role = nil
end
end
end
if enclosure.explicit?.nil?
enclosure.explicit = (XPath.first(media_group,
"media:adult/text()").to_s.downcase == "true") ? true : false
end
if enclosure.text.nil? &&
XPath.first(media_group, "media:text/text()").to_s != ""
enclosure.text = FeedTools.sanitize_html(CGI.unescapeHTML(
XPath.first(media_group, "media:text/text()").to_s), :strip)
end
end
# Keep track of the media groups
media_groups << affected_enclosures
end
# Now we need to inherit any relevant item level information.
if self.explicit?
for enclosure in @enclosures
enclosure.explicit = true
end
end
# Add all the itunes categories
for itunes_category in XPath.match(root_node, "itunes:category")
genre = "Podcasts"
category = itunes_category.attributes["text"].to_s
subcategory = XPath.first(itunes_category, "itunes:category/@text").to_s
category_path = genre
if category != ""
category_path << "/" + category
end
if subcategory != ""
category_path << "/" + subcategory
end
for enclosure in @enclosures
if enclosure.categories.nil?
enclosure.categories = []
end
enclosure.categories << EnclosureCategory.new(
CGI.unescapeHTML(category_path),
CGI.unescapeHTML("http://www.apple.com/itunes/store/"),
CGI.unescapeHTML("iTunes Music Store Categories")
)
end
end
for enclosure in @enclosures
# Clean up any of those attributes that incorrectly have ""
# or 0 as their values
if enclosure.type == ""
enclosure.type = nil
end
if enclosure.file_size == 0
enclosure.file_size = nil
end
if enclosure.duration == 0
enclosure.duration = nil
end
if enclosure.height == 0
enclosure.height = nil
end
if enclosure.width == 0
enclosure.width = nil
end
if enclosure.bitrate == 0
enclosure.bitrate = nil
end
if enclosure.framerate == 0
enclosure.framerate = nil
end
if enclosure.expression == "" || enclosure.expression.nil?
enclosure.expression = "full"
end
# If an enclosure is missing the text field, fall back on the itunes:summary field
if enclosure.text.nil? || enclosure.text = ""
enclosure.text = self.itunes_summary
end
# Make sure we don't have duplicate categories
unless enclosure.categories.nil?
enclosure.categories.uniq!
end
end
# And finally, now things get complicated. This is where we make
# sure that the enclosures method only returns either default
# enclosures or enclosures with only one version. Any enclosures
# that are wrapped in a media:group will be placed in the appropriate
# versions field.
affected_enclosure_urls = []
for media_group in media_groups
affected_enclosure_urls =
affected_enclosure_urls | (media_group.map do |enclosure|
enclosure.url
end)
end
@enclosures.delete_if do |enclosure|
(affected_enclosure_urls.include? enclosure.url)
end
for media_group in media_groups
default_enclosure = nil
for enclosure in media_group
if enclosure.is_default?
default_enclosure = enclosure
end
end
for enclosure in media_group
enclosure.default_version = default_enclosure
enclosure.versions = media_group.clone
enclosure.versions.delete(enclosure)
end
@enclosures << default_enclosure
end
end
# If we have a single enclosure, it's safe to inherit the itunes:duration field
# if it's missing.
if @enclosures.size == 1
if @enclosures.first.duration.nil? || @enclosures.first.duration == 0
@enclosures.first.duration = self.duration
end
end
return @enclosures
end
def enclosures=(new_enclosures)
@enclosures = new_enclosures
end
# Returns the feed item author
def author_name
# TODO: make this not suck, actually ensure we're looking at a name
# and not an email address.
# Also, factor in itunes module.
# =================================================================
if @author_name.nil?
@author_name = CGI.unescapeHTML(XPath.first(root_node, "author/name/text()").to_s)
if @author_name == ""
@author_name = CGI.unescapeHTML(XPath.first(root_node, "dc:creator/text()").to_s)
end
if @author_name == ""
@author_name = CGI.unescapeHTML(XPath.first(root_node, "author/text()").to_s)
end
end
return @author_name
end
# Sets the feed item author
def author_name=(new_author_name)
@author_name = new_author_name
end
# Returns the contents of the itunes:summary element
def itunes_summary
if @itunes_summary.nil?
@itunes_summary = CGI.unescapeHTML(XPath.first(root_node,
"itunes:summary/text()").to_s)
if @itunes_summary == ""
@itunes_summary = nil
end
unless @itunes_summary.nil?
@itunes_summary = FeedTools.sanitize_html(@itunes_summary)
end
end
return @itunes_summary
end
# Sets the contents of the itunes:summary element
def itunes_summary=(new_itunes_summary)
@itunes_summary = new_itunes_summary
end
# Returns the contents of the itunes:subtitle element
def itunes_subtitle
if @itunes_subtitle.nil?
@itunes_subtitle = CGI.unescapeHTML(XPath.first(root_node,
"itunes:subtitle/text()").to_s)
if @itunes_subtitle == ""
@itunes_subtitle = nil
end
unless @itunes_subtitle.nil?
@itunes_subtitle = FeedTools.sanitize_html(@itunes_subtitle)
end
end
return @itunes_subtitle
end
# Sets the contents of the itunes:subtitle element
def itunes_subtitle=(new_itunes_subtitle)
@itunes_subtitle = new_itunes_subtitle
end
# Returns the contents of the media:text element
def media_text
if @media_text.nil?
@media_text = CGI.unescapeHTML(XPath.first(root_node,
"itunes:subtitle/text()").to_s)
if @media_text == ""
@media_text = nil
end
unless @media_text.nil?
@media_text = FeedTools.sanitize_html(@media_text)
end
end
return @media_text
end
# Sets the contents of the media:text element
def media_text=(new_media_text)
@media_text = new_media_text
end
# Returns the contents of the itunes:author element
#
# This inherits from any incorrectly placed channel-level itunes:author
# elements. They're actually amazingly commong. People don't read specs.
def itunes_author
if @itunes_author.nil?
@itunes_author = CGI.unescapeHTML(XPath.first(root_node,
"itunes:author/text()").to_s)
if @itunes_author == ""
@itunes_author = CGI.unescapeHTML(XPath.first(feed.channel_node,
"itunes:author/text()").to_s)
end
if @itunes_author == ""
@itunes_author = nil
end
end
return @itunes_author
end
# Sets the contents of the itunes:author element
def itunes_author=(new_itunes_author)
@itunes_author = new_itunes_author
end
# Returns the number of seconds that the associated media runs for
def duration
if @duration.nil?
itunes_duration = CGI.unescapeHTML(XPath.first(root_node,
"itunes:duration/text()").to_s)
if itunes_duration != ""
hms = itunes_duration.split(":").map { |x| x.to_i }
if hms.size == 3
@duration = hms[0].hour + hms[1].minute + hms[2]
elsif hms.size == 2
@duration = hms[0].minute + hms[1]
elsif hms.size == 1
@duration = hms[0]
end
end
end
return @duration
end
# Sets the number of seconds that the associate media runs for
def duration=(new_duration)
@duration = new_duration
end
# Sets the itunes:summary
def itunes_summary=(new_itunes_summary)
end
# Returns the feed item time
def time
if @time.nil?
time_string = XPath.first(root_node, "pubDate/text()").to_s
if time_string == ""
time_string = XPath.first(root_node, "dc:date/text()").to_s
end
if time_string == ""
time_string = XPath.first(root_node, "issued/text()").to_s
end
if time_string != ""
@time = Time.parse(time_string) rescue Time.now
elsif time_string == nil
@time = Time.now
end
end
return @time
end
# Sets the feed item time
def time=(new_time)
@time = new_time
end
# Returns the feed item tags
def tags
# TODO: support the rel="tag" microformat
# =======================================
if @tags.nil?
@tags = []
if @tags.nil? || @tags.size == 0
@tags = []
tag_list = XPath.match(root_node, "dc:subject/rdf:Bag/rdf:li/text()")
if tag_list.size > 1
for tag in tag_list
@tags << tag.to_s.downcase.strip
end
end
end
if @tags.nil? || @tags.size == 0
# messy effort to find ourselves some tags, mainly for del.icio.us
@tags = []
rdf_bag = XPath.match(root_node, "taxo:topics/rdf:Bag/rdf:li")
if rdf_bag != nil && rdf_bag.size > 0
for tag_node in rdf_bag
begin
tag_url = XPath.first(root_node, "@resource").to_s
tag_match = tag_url.scan(/\/(tag|tags)\/(\w+)/)
if tag_match.size > 0
@tags << tag_match.first.last.downcase.strip
end
rescue
end
end
end
end
if @tags.nil? || @tags.size == 0
@tags = []
tag_list = XPath.match(root_node, "category/text()")
for tag in tag_list
@tags << tag.to_s.downcase.strip
end
end
if @tags.nil? || @tags.size == 0
@tags = []
tag_list = XPath.match(root_node, "dc:subject/text()")
for tag in tag_list
@tags << tag.to_s.downcase.strip
end
end
if @tags.nil? || @tags.size == 0
begin
@tags = XPath.first(root_node, "itunes:keywords/text()").to_s.downcase.split(" ")
rescue
@tags = []
end
end
if @tags.nil?
@tags = []
end
@tags.uniq!
end
return @tags
end
# Sets the feed item tags
def tags=(new_tags)
@tags = new_tags
end
# Returns true if this feed item contains explicit material. If the whole
# feed has been marked as explicit, this will return true even if the item
# isn't explicitly marked as explicit.
def explicit?
if @explicit.nil?
if XPath.first(root_node,
"media:adult/text()").to_s.downcase == "true" ||
XPath.first(root_node,
"itunes:explicit/text()").to_s.downcase == "yes" ||
XPath.first(root_node,
"itunes:explicit/text()").to_s.downcase == "true" ||
feed.explicit
@explicit = true
else
@explicit = false
end
end
return @explicit
end
# Sets whether or not the feed contains explicit material
def explicit=(new_explicit)
@explicit = (new_explicit ? true : false)
end
# A hook method that is called during the feed generation process. Overriding this method
# will enable additional content to be inserted into the feed.
def build_xml_hook(feed_type, version, xml_builder)
return nil
end
# Generates xml based on the content of the feed item
def build_xml(feed_type="rss", version=0.0, xml_builder=Builder::XmlMarkup.new(:indent => 2))
if feed_type == "rss" && (version == 0.9 || version == 1.0 || version == 1.1)
# RDF-based rss format
if link.nil?
raise "Cannot generate an rdf-based feed item with a nil link field."
end
return xml_builder.item("rdf:about" => CGI.escapeHTML(link)) do
unless title.nil? || title == ""
xml_builder.title(title)
else
xml_builder.title
end
unless link.nil? || link == ""
xml_builder.link(link)
else
xml_builder.link
end
unless description.nil? || description == ""
xml_builder.description(description)
else
xml_builder.description
end
unless time.nil?
xml_builder.tag!("dc:date", time.iso8601)
end
unless tags.nil? || tags.size == 0
xml_builder.tag!("dc:subject") do
xml_builder.tag!("rdf:Bag") do
for tag in tags
xml_builder.tag!("rdf:li", tag)
end
end
end
xml_builder.tag!("itunes:keywords", tags.join(" "))
end
build_xml_hook(feed_type, version, xml_builder)
end
elsif feed_type == "rss"
# normal rss format
return xml_builder.item do
unless title.nil? || title == ""
xml_builder.title(title)
end
unless link.nil? || link == ""
xml_builder.link(link)
end
unless description.nil? || description == ""
xml_builder.description(description)
end
unless time.nil?
xml_builder.pubDate(time.rfc822)
end
unless tags.nil? || tags.size == 0
xml_builder.tag!("dc:subject") do
xml_builder.tag!("rdf:Bag") do
for tag in tags
xml_builder.tag!("rdf:li", tag)
end
end
end
xml_builder.tag!("itunes:keywords", tags.join(" "))
end
build_xml_hook(feed_type, version, xml_builder)
end
elsif feed_type == "atom"
# normal atom format
return xml_builder.entry("xmlns" => "http://purl.org/atom/ns#") do
unless title.nil? || title == ""
xml_builder.title(title,
"mode" => "escaped",
"type" => "text/html")
end
unless link.nil? || link == ""
xml_builder.link("href" => link,
"rel" => "alternate",
"type" => "text/html",
"title" => title)
end
unless description.nil? || description == ""
xml_builder.content(description,
"mode" => "escaped",
"type" => "text/html")
end
unless time.nil?
xml_builder.issued(time.iso8601)
end
unless tags.nil? || tags.size == 0
for tag in tags
xml_builder.category(tag)
end
end
build_xml_hook(feed_type, version, xml_builder)
end
end
end
alias_method :tagline, :description
alias_method :tagline=, :description=
alias_method :subtitle, :description
alias_method :subtitle=, :description=
alias_method :abstract, :description
alias_method :abstract=, :description=
alias_method :content, :description
alias_method :content=, :description=
alias_method :guid, :id
alias_method :guid=, :id=
end
end
module REXML #:nodoc:
class Element #:nodoc:
def inner_xml #:nodoc:
result = ""
self.each_child do |child|
result << child.to_s
end
return result
end
end
end
begin
unless FeedTools.feed_cache.nil?
FeedTools.feed_cache.initialize_cache
end
rescue
end