lib/feedzirra/feed.rb in kete-feedzirra-0.0.8.1 vs lib/feedzirra/feed.rb in kete-feedzirra-0.0.16.1
- old
+ new
@@ -25,11 +25,11 @@
# === Parameters
# [xml<String>] The XML that you would like determine the parser for.
# === Returns
# The class name of the parser that can handle the XML.
def self.determine_feed_parser_for_xml(xml)
- start_of_doc = xml.slice(0, 1000)
+ start_of_doc = xml.slice(0, 2000)
feed_classes.detect {|klass| klass.able_to_parse?(start_of_doc)}
end
# Adds a new feed parsing class that will be used for parsing.
#
@@ -44,11 +44,11 @@
# Provides a list of registered feed parsing classes.
#
# === Returns
# A array of class names.
def self.feed_classes
- @feed_classes ||= [ITunesRSS, RSS, AtomFeedBurner, Atom]
+ @feed_classes ||= [Feedzirra::Parser::RSS, Feedzirra::Parser::AtomFeedBurner, Feedzirra::Parser::Atom]
end
# Makes all entry types look for the passed in element to parse. This is actually just a call to
# element (a SAXMachine call) in the class
#
@@ -56,29 +56,15 @@
# [element_tag<String>]
# [options<Hash>] Valid keys are same as with SAXMachine
def self.add_common_feed_entry_element(element_tag, options = {})
# need to think of a better way to do this. will break for people who want this behavior
# across their added classes
- [RSSEntry, AtomFeedBurnerEntry, AtomEntry].each do |klass|
+ feed_classes.map{|k| eval("#{k}Entry") }.each do |klass|
klass.send(:element, element_tag, options)
end
end
- # Makes all entry types look for the passed in elements to parse. This is actually just a call to
- # elements (a SAXMachine call) in the class
- #
- # === Parameters
- # [element_tag<String>]
- # [options<Hash>] Valid keys are same as with SAXMachine
- def self.add_common_feed_entry_elements(element_tag, options = {})
- # need to think of a better way to do this. will break for people who want this behavior
- # across their added classes
- [RSSEntry, AtomFeedBurnerEntry, AtomEntry].each do |klass|
- klass.send(:elements, element_tag, options)
- end
- end
-
# Fetches and returns the raw XML for each URL provided.
#
# === Parameters
# [urls<String> or <Array>] A single feed URL, or an array of feed URLs.
# [options<Hash>] Valid keys for this argument as as followed:
@@ -98,13 +84,16 @@
url_queue.each do |url|
easy = Curl::Easy.new(url) do |curl|
curl.headers["User-Agent"] = (options[:user_agent] || USER_AGENT)
curl.headers["If-Modified-Since"] = options[:if_modified_since].httpdate if options.has_key?(:if_modified_since)
curl.headers["If-None-Match"] = options[:if_none_match] if options.has_key?(:if_none_match)
- curl.headers["Accept-encoding"] = 'gzip, deflate'
+ curl.headers["Accept-encoding"] = 'gzip, deflate' if options.has_key?(:compress)
curl.follow_location = true
curl.userpwd = options[:http_authentication].join(':') if options.has_key?(:http_authentication)
+
+ curl.max_redirects = options[:max_redirects] if options[:max_redirects]
+ curl.timeout = options[:timeout] if options[:timeout]
curl.on_success do |c|
responses[url] = decode_content(c)
end
curl.on_failure do |c|
@@ -113,38 +102,38 @@
end
multi.add(easy)
end
multi.perform
- return urls.is_a?(String) ? responses.values.first : responses
+ urls.is_a?(String) ? responses.values.first : responses
end
# Fetches and returns the parsed XML for each URL provided.
#
# === Parameters
# [urls<String> or <Array>] A single feed URL, or an array of feed URLs.
# [options<Hash>] Valid keys for this argument as as followed:
- # * :user_agent - String that overrides the default user agent.
- # * :if_modified_since - Time object representing when the feed was last updated.
- # * :if_none_match - String, an etag for the request that was stored previously.
- # * :on_success - Block that gets executed after a successful request.
- # * :on_failure - Block that gets executed after a failed request.
+ # * :user_agent - String that overrides the default user agent.
+ # * :if_modified_since - Time object representing when the feed was last updated.
+ # * :if_none_match - String, an etag for the request that was stored previously.
+ # * :on_success - Block that gets executed after a successful request.
+ # * :on_failure - Block that gets executed after a failed request.
# === Returns
# A Feed object if a single URL is passed.
#
# A Hash if multiple URL's are passed. The key will be the URL, and the value the Feed object.
def self.fetch_and_parse(urls, options = {})
url_queue = [*urls]
multi = Curl::Multi.new
responses = {}
- # I broke these down so I would only try to do 30 simultaneously because
+ # I broke these down so I would only try to do 30 simultaneously because
# I was getting weird errors when doing a lot. As one finishes it pops another off the queue.
url_queue.slice!(0, 30).each do |url|
add_url_to_multi(multi, url, url_queue, responses, options)
end
-
+
multi.perform
return urls.is_a?(String) ? responses.values.first : responses
end
# Decodes the XML document if it was compressed.
@@ -192,11 +181,11 @@
feed_queue.slice!(0, 30).each do |feed|
add_feed_to_multi(multi, feed, feed_queue, responses, options)
end
multi.perform
- return responses.size == 1 ? responses.values.first : responses.values
+ responses.size == 1 ? responses.values.first : responses.values
end
# An abstraction for adding a feed by URL to the passed Curb::multi stack.
#
# === Parameters
@@ -214,26 +203,33 @@
def self.add_url_to_multi(multi, url, url_queue, responses, options)
easy = Curl::Easy.new(url) do |curl|
curl.headers["User-Agent"] = (options[:user_agent] || USER_AGENT)
curl.headers["If-Modified-Since"] = options[:if_modified_since].httpdate if options.has_key?(:if_modified_since)
curl.headers["If-None-Match"] = options[:if_none_match] if options.has_key?(:if_none_match)
- curl.headers["Accept-encoding"] = 'gzip, deflate'
+ curl.headers["Accept-encoding"] = 'gzip, deflate' if options.has_key?(:compress)
curl.follow_location = true
curl.userpwd = options[:http_authentication].join(':') if options.has_key?(:http_authentication)
+
+ curl.max_redirects = options[:max_redirects] if options[:max_redirects]
+ curl.timeout = options[:timeout] if options[:timeout]
curl.on_success do |c|
add_url_to_multi(multi, url_queue.shift, url_queue, responses, options) unless url_queue.empty?
xml = decode_content(c)
klass = determine_feed_parser_for_xml(xml)
if klass
- feed = klass.parse(xml)
- feed.feed_url = c.last_effective_url
- feed.etag = etag_from_header(c.header_str)
- feed.last_modified = last_modified_from_header(c.header_str)
- responses[url] = feed
- options[:on_success].call(url, feed) if options.has_key?(:on_success)
+ begin
+ feed = klass.parse(xml)
+ feed.feed_url = c.last_effective_url
+ feed.etag = etag_from_header(c.header_str)
+ feed.last_modified = last_modified_from_header(c.header_str)
+ responses[url] = feed
+ options[:on_success].call(url, feed) if options.has_key?(:on_success)
+ rescue Exception => e
+ options[:on_failure].call(url, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
+ end
else
# puts "Error determining parser for #{url} - #{c.last_effective_url}"
# raise NoParserAvailable.new("no valid parser for content.") (this would unfirtunately fail the whole 'multi', so it's not really useable)
options[:on_failure].call(url, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
end
@@ -268,18 +264,25 @@
curl.headers["If-Modified-Since"] = feed.last_modified.httpdate if feed.last_modified
curl.headers["If-None-Match"] = feed.etag if feed.etag
curl.userpwd = options[:http_authentication].join(':') if options.has_key?(:http_authentication)
curl.follow_location = true
+ curl.max_redirects = options[:max_redirects] if options[:max_redirects]
+ curl.timeout = options[:timeout] if options[:timeout]
+
curl.on_success do |c|
- add_feed_to_multi(multi, feed_queue.shift, feed_queue, responses, options) unless feed_queue.empty?
- updated_feed = Feed.parse(c.body_str)
- updated_feed.feed_url = c.last_effective_url
- updated_feed.etag = etag_from_header(c.header_str)
- updated_feed.last_modified = last_modified_from_header(c.header_str)
- feed.update_from_feed(updated_feed)
- responses[feed.feed_url] = feed
- options[:on_success].call(feed) if options.has_key?(:on_success)
+ begin
+ add_feed_to_multi(multi, feed_queue.shift, feed_queue, responses, options) unless feed_queue.empty?
+ updated_feed = Feed.parse(c.body_str)
+ updated_feed.feed_url = c.last_effective_url
+ updated_feed.etag = etag_from_header(c.header_str)
+ updated_feed.last_modified = last_modified_from_header(c.header_str)
+ feed.update_from_feed(updated_feed)
+ responses[feed.feed_url] = feed
+ options[:on_success].call(feed) if options.has_key?(:on_success)
+ rescue Exception => e
+ options[:on_failure].call(feed, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
+ end
end
curl.on_failure do |c|
add_feed_to_multi(multi, feed_queue.shift, feed_queue, responses, options) unless feed_queue.empty?
response_code = c.response_code
\ No newline at end of file