lib/feedzirra/feed.rb in somezack-feedzirra-0.0.2 vs lib/feedzirra/feed.rb in somezack-feedzirra-0.0.3

- old
+ new

@@ -27,31 +27,32 @@ # can take a single url or an array of urls # when passed a single url it returns the body of the response # when passed an array of urls it returns a hash with the urls as keys and body of responses as values def self.fetch_raw(urls, options = {}) - urls = [*urls] + url_queue = [*urls] multi = Curl::Multi.new responses = {} - urls.each do |url| + url_queue.each do |url| easy = Curl::Easy.new(url) do |curl| curl.headers["User-Agent"] = (options[:user_agent] || USER_AGENT) curl.headers["If-Modified-Since"] = options[:if_modified_since].httpdate if options.has_key?(:if_modified_since) curl.headers["If-None-Match"] = options[:if_none_match] if options.has_key?(:if_none_match) + curl.headers["Accept-encoding"] = 'gzip, deflate' curl.follow_location = true curl.on_success do |c| - responses[url] = c.body_str + responses[url] = decode_content(c) end curl.on_failure do |c| responses[url] = c.response_code end end multi.add(easy) end multi.perform - return responses.size == 1 ? responses.values.first : responses + return urls.is_a?(String) ? responses.values.first : responses end def self.fetch_and_parse(urls, options = {}) url_queue = [*urls] multi = Curl::Multi.new @@ -62,13 +63,27 @@ url_queue.slice!(0, 30).each do |url| add_url_to_multi(multi, url, url_queue, responses, options) end multi.perform - return responses.size == 1 ? responses.values.first : responses + return urls.is_a?(String) ? responses.values.first : responses end + def self.decode_content(c) + if c.header_str.match(/Content-Encoding: gzip/) + gz = Zlib::GzipReader.new(StringIO.new(c.body_str)) + xml = gz.read + gz.close + elsif c.header_str.match(/Content-Encoding: deflate/) + xml = Zlib::Deflate.inflate(c.body_str) + else + xml = c.body_str + end + + xml + end + def self.update(feeds, options = {}) feed_queue = [*feeds] multi = Curl::Multi.new responses = {} feed_queue.slice!(0, 30).each do |feed| @@ -82,13 +97,14 @@ def self.add_url_to_multi(multi, url, url_queue, responses, options) easy = Curl::Easy.new(url) do |curl| curl.headers["User-Agent"] = (options[:user_agent] || USER_AGENT) curl.headers["If-Modified-Since"] = options[:if_modified_since].httpdate if options.has_key?(:if_modified_since) curl.headers["If-None-Match"] = options[:if_none_match] if options.has_key?(:if_none_match) + curl.headers["Accept-encoding"] = 'gzip, deflate' curl.follow_location = true curl.on_success do |c| add_url_to_multi(multi, url_queue.shift, url_queue, responses, options) unless url_queue.empty? - xml = c.body_str + xml = decode_content(c) klass = determine_feed_parser_for_xml(xml) if klass feed = klass.parse(xml) feed.feed_url = c.last_effective_url feed.etag = etag_from_header(c.header_str) \ No newline at end of file