lib/feedzirra/feed.rb in somezack-feedzirra-0.0.2 vs lib/feedzirra/feed.rb in somezack-feedzirra-0.0.3
- old
+ new
@@ -27,31 +27,32 @@
# can take a single url or an array of urls
# when passed a single url it returns the body of the response
# when passed an array of urls it returns a hash with the urls as keys and body of responses as values
def self.fetch_raw(urls, options = {})
- urls = [*urls]
+ url_queue = [*urls]
multi = Curl::Multi.new
responses = {}
- urls.each do |url|
+ url_queue.each do |url|
easy = Curl::Easy.new(url) do |curl|
curl.headers["User-Agent"] = (options[:user_agent] || USER_AGENT)
curl.headers["If-Modified-Since"] = options[:if_modified_since].httpdate if options.has_key?(:if_modified_since)
curl.headers["If-None-Match"] = options[:if_none_match] if options.has_key?(:if_none_match)
+ curl.headers["Accept-encoding"] = 'gzip, deflate'
curl.follow_location = true
curl.on_success do |c|
- responses[url] = c.body_str
+ responses[url] = decode_content(c)
end
curl.on_failure do |c|
responses[url] = c.response_code
end
end
multi.add(easy)
end
multi.perform
- return responses.size == 1 ? responses.values.first : responses
+ return urls.is_a?(String) ? responses.values.first : responses
end
def self.fetch_and_parse(urls, options = {})
url_queue = [*urls]
multi = Curl::Multi.new
@@ -62,13 +63,27 @@
url_queue.slice!(0, 30).each do |url|
add_url_to_multi(multi, url, url_queue, responses, options)
end
multi.perform
- return responses.size == 1 ? responses.values.first : responses
+ return urls.is_a?(String) ? responses.values.first : responses
end
+ def self.decode_content(c)
+ if c.header_str.match(/Content-Encoding: gzip/)
+ gz = Zlib::GzipReader.new(StringIO.new(c.body_str))
+ xml = gz.read
+ gz.close
+ elsif c.header_str.match(/Content-Encoding: deflate/)
+ xml = Zlib::Deflate.inflate(c.body_str)
+ else
+ xml = c.body_str
+ end
+
+ xml
+ end
+
def self.update(feeds, options = {})
feed_queue = [*feeds]
multi = Curl::Multi.new
responses = {}
feed_queue.slice!(0, 30).each do |feed|
@@ -82,13 +97,14 @@
def self.add_url_to_multi(multi, url, url_queue, responses, options)
easy = Curl::Easy.new(url) do |curl|
curl.headers["User-Agent"] = (options[:user_agent] || USER_AGENT)
curl.headers["If-Modified-Since"] = options[:if_modified_since].httpdate if options.has_key?(:if_modified_since)
curl.headers["If-None-Match"] = options[:if_none_match] if options.has_key?(:if_none_match)
+ curl.headers["Accept-encoding"] = 'gzip, deflate'
curl.follow_location = true
curl.on_success do |c|
add_url_to_multi(multi, url_queue.shift, url_queue, responses, options) unless url_queue.empty?
- xml = c.body_str
+ xml = decode_content(c)
klass = determine_feed_parser_for_xml(xml)
if klass
feed = klass.parse(xml)
feed.feed_url = c.last_effective_url
feed.etag = etag_from_header(c.header_str)
\ No newline at end of file