lib/feedzirra/feed.rb in feedzirra-0.3.0 vs lib/feedzirra/feed.rb in feedzirra-0.4.0

- old
+ new

@@ -169,12 +169,13 @@ curl.headers["If-None-Match"] = options[:if_none_match] if options.has_key?(:if_none_match) curl.on_success do |c| responses[url] = decode_content(c) end - curl.on_failure do |c, err| - responses[url] = c.response_code + + curl.on_complete do |c, err| + responses[url] = c.response_code unless responses.has_key?(url) end end multi.add(easy) end @@ -280,52 +281,53 @@ setup_easy curl, options curl.headers["If-Modified-Since"] = options[:if_modified_since].httpdate if options.has_key?(:if_modified_since) curl.headers["If-None-Match"] = options[:if_none_match] if options.has_key?(:if_none_match) curl.on_success do |c| - add_url_to_multi(multi, url_queue.shift, url_queue, responses, options) unless url_queue.empty? xml = decode_content(c) klass = determine_feed_parser_for_xml(xml) if klass begin - feed = klass.parse(xml, Proc.new{|message| warn "Error while parsing [#{url}] #{message}" }) + feed = klass.parse xml, on_parser_failure(url) + feed.feed_url = c.last_effective_url feed.etag = etag_from_header(c.header_str) feed.last_modified = last_modified_from_header(c.header_str) responses[url] = feed options[:on_success].call(url, feed) if options.has_key?(:on_success) rescue Exception => e options[:on_failure].call(url, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure) end else - # puts "Error determining parser for #{url} - #{c.last_effective_url}" - # raise NoParserAvailable.new("no valid parser for content.") (this would unfortunately fail the whole 'multi', so it's not really usable) options[:on_failure].call(url, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure) end end # # trigger on_failure for 404s # curl.on_complete do |c| add_url_to_multi(multi, url_queue.shift, url_queue, responses, options) unless url_queue.empty? - responses[url] = c.response_code + responses[url] = c.response_code unless responses.has_key?(url) + end + curl.on_redirect do |c| + if c.response_code == 304 # it's not modified. this isn't an error condition + options[:on_success].call(url, nil) if options.has_key?(:on_success) + end + end + + curl.on_missing do |c| if c.response_code == 404 && options.has_key?(:on_failure) options[:on_failure].call(url, c.response_code, c.header_str, c.body_str) end end curl.on_failure do |c, err| - add_url_to_multi(multi, url_queue.shift, url_queue, responses, options) unless url_queue.empty? responses[url] = c.response_code - if c.response_code == 304 # it's not modified. this isn't an error condition - options[:on_success].call(url, nil) if options.has_key?(:on_success) - else - options[:on_failure].call(url, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure) - end + options[:on_failure].call(url, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure) end end multi.add(easy) end @@ -350,12 +352,12 @@ curl.headers["If-Modified-Since"] = options[:if_modified_since] if options[:if_modified_since] && (!feed.last_modified || (Time.parse(options[:if_modified_since].to_s) > feed.last_modified)) curl.headers["If-None-Match"] = feed.etag if feed.etag curl.on_success do |c| begin - add_feed_to_multi(multi, feed_queue.shift, feed_queue, responses, options) unless feed_queue.empty? - updated_feed = Feed.parse(c.body_str){ |message| warn "Error while parsing [#{feed.feed_url}] #{message}" } + updated_feed = Feed.parse c.body_str, &on_parser_failure(feed.feed_url) + updated_feed.feed_url = c.last_effective_url updated_feed.etag = etag_from_header(c.header_str) updated_feed.last_modified = last_modified_from_header(c.header_str) feed.update_from_feed(updated_feed) responses[feed.feed_url] = feed @@ -363,21 +365,25 @@ rescue Exception => e options[:on_failure].call(feed, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure) end end - curl.on_failure do |c, err| - add_feed_to_multi(multi, feed_queue.shift, feed_queue, responses, options) unless feed_queue.empty? - response_code = c.response_code - if response_code == 304 # it's not modified. this isn't an error condition - responses[feed.feed_url] = feed + curl.on_failure do |c, err| # response code 50X + responses[feed.url] = c.response_code + options[:on_failure].call(feed, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure) + end + + curl.on_redirect do |c, err| # response code 30X + if c.response_code == 304 options[:on_success].call(feed) if options.has_key?(:on_success) - else - responses[feed.url] = c.response_code - options[:on_failure].call(feed, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure) end end + + curl.on_complete do |c| + add_feed_to_multi(multi, feed_queue.shift, feed_queue, responses, options) unless feed_queue.empty? + responses[feed.feed_url] = feed unless responses.has_key?(feed.feed_url) + end end multi.add(easy) end # Determines the etag from the request headers. @@ -398,8 +404,16 @@ # === Returns # A Time object of the last modified date or nil if it cannot be found in the headers. def self.last_modified_from_header(header) header =~ /.*Last-Modified:\s(.*)\r/ Time.parse_safely($1) if $1 + end + + class << self + private + + def on_parser_failure(url) + Proc.new { |message| raise "Error while parsing [#{url}] #{message}" } + end end end end