lib/feedzirra/feed.rb in feedzirra-0.3.0 vs lib/feedzirra/feed.rb in feedzirra-0.4.0
- old
+ new
@@ -169,12 +169,13 @@
curl.headers["If-None-Match"] = options[:if_none_match] if options.has_key?(:if_none_match)
curl.on_success do |c|
responses[url] = decode_content(c)
end
- curl.on_failure do |c, err|
- responses[url] = c.response_code
+
+ curl.on_complete do |c, err|
+ responses[url] = c.response_code unless responses.has_key?(url)
end
end
multi.add(easy)
end
@@ -280,52 +281,53 @@
setup_easy curl, options
curl.headers["If-Modified-Since"] = options[:if_modified_since].httpdate if options.has_key?(:if_modified_since)
curl.headers["If-None-Match"] = options[:if_none_match] if options.has_key?(:if_none_match)
curl.on_success do |c|
- add_url_to_multi(multi, url_queue.shift, url_queue, responses, options) unless url_queue.empty?
xml = decode_content(c)
klass = determine_feed_parser_for_xml(xml)
if klass
begin
- feed = klass.parse(xml, Proc.new{|message| warn "Error while parsing [#{url}] #{message}" })
+ feed = klass.parse xml, on_parser_failure(url)
+
feed.feed_url = c.last_effective_url
feed.etag = etag_from_header(c.header_str)
feed.last_modified = last_modified_from_header(c.header_str)
responses[url] = feed
options[:on_success].call(url, feed) if options.has_key?(:on_success)
rescue Exception => e
options[:on_failure].call(url, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
end
else
- # puts "Error determining parser for #{url} - #{c.last_effective_url}"
- # raise NoParserAvailable.new("no valid parser for content.") (this would unfortunately fail the whole 'multi', so it's not really usable)
options[:on_failure].call(url, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
end
end
#
# trigger on_failure for 404s
#
curl.on_complete do |c|
add_url_to_multi(multi, url_queue.shift, url_queue, responses, options) unless url_queue.empty?
- responses[url] = c.response_code
+ responses[url] = c.response_code unless responses.has_key?(url)
+ end
+ curl.on_redirect do |c|
+ if c.response_code == 304 # it's not modified. this isn't an error condition
+ options[:on_success].call(url, nil) if options.has_key?(:on_success)
+ end
+ end
+
+ curl.on_missing do |c|
if c.response_code == 404 && options.has_key?(:on_failure)
options[:on_failure].call(url, c.response_code, c.header_str, c.body_str)
end
end
curl.on_failure do |c, err|
- add_url_to_multi(multi, url_queue.shift, url_queue, responses, options) unless url_queue.empty?
responses[url] = c.response_code
- if c.response_code == 304 # it's not modified. this isn't an error condition
- options[:on_success].call(url, nil) if options.has_key?(:on_success)
- else
- options[:on_failure].call(url, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
- end
+ options[:on_failure].call(url, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
end
end
multi.add(easy)
end
@@ -350,12 +352,12 @@
curl.headers["If-Modified-Since"] = options[:if_modified_since] if options[:if_modified_since] && (!feed.last_modified || (Time.parse(options[:if_modified_since].to_s) > feed.last_modified))
curl.headers["If-None-Match"] = feed.etag if feed.etag
curl.on_success do |c|
begin
- add_feed_to_multi(multi, feed_queue.shift, feed_queue, responses, options) unless feed_queue.empty?
- updated_feed = Feed.parse(c.body_str){ |message| warn "Error while parsing [#{feed.feed_url}] #{message}" }
+ updated_feed = Feed.parse c.body_str, &on_parser_failure(feed.feed_url)
+
updated_feed.feed_url = c.last_effective_url
updated_feed.etag = etag_from_header(c.header_str)
updated_feed.last_modified = last_modified_from_header(c.header_str)
feed.update_from_feed(updated_feed)
responses[feed.feed_url] = feed
@@ -363,21 +365,25 @@
rescue Exception => e
options[:on_failure].call(feed, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
end
end
- curl.on_failure do |c, err|
- add_feed_to_multi(multi, feed_queue.shift, feed_queue, responses, options) unless feed_queue.empty?
- response_code = c.response_code
- if response_code == 304 # it's not modified. this isn't an error condition
- responses[feed.feed_url] = feed
+ curl.on_failure do |c, err| # response code 50X
+ responses[feed.url] = c.response_code
+ options[:on_failure].call(feed, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
+ end
+
+ curl.on_redirect do |c, err| # response code 30X
+ if c.response_code == 304
options[:on_success].call(feed) if options.has_key?(:on_success)
- else
- responses[feed.url] = c.response_code
- options[:on_failure].call(feed, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
end
end
+
+ curl.on_complete do |c|
+ add_feed_to_multi(multi, feed_queue.shift, feed_queue, responses, options) unless feed_queue.empty?
+ responses[feed.feed_url] = feed unless responses.has_key?(feed.feed_url)
+ end
end
multi.add(easy)
end
# Determines the etag from the request headers.
@@ -398,8 +404,16 @@
# === Returns
# A Time object of the last modified date or nil if it cannot be found in the headers.
def self.last_modified_from_header(header)
header =~ /.*Last-Modified:\s(.*)\r/
Time.parse_safely($1) if $1
+ end
+
+ class << self
+ private
+
+ def on_parser_failure(url)
+ Proc.new { |message| raise "Error while parsing [#{url}] #{message}" }
+ end
end
end
end