require 'uri' require 'net/http' module Eeml class FeedRetriever DEFAULT_TIMEOUT = 5 PREMIUM_ENVIRONMENT_AGE = raise 3 attr_accessor :retrieval_errors #will hold an array of string error messages populated during feed retrieval. currently populated only in fetch_remote_data attr_accessor :logger attr_accessor :feed_url, :retrieved_at attr_accessor :feed_changed, :feed_retrieved attr_accessor :feed_content, :mime_type def initialize end # only attempt to retrieve data if request is after our minimum time delta #currently makes use of get_response() and fetch() to do actual retrieval. def fetch_remote_data @retrieval_errors = [] logger.debug("*** Attempting to fetch remote data") # only attempt to retrieve data if this environment has a feed_url if !self.feed_url.blank? && (self.retrieved_at.nil? || (Time.now.utc - self.retrieved_at.utc) > PREMIUM_ENVIRONMENT_AGE) logger.debug("*** Our refresh delta has passed so retrieve remote data") self.feed_changed = false self.feed_retrieved = false response = get_response case response when Net::HTTPSuccess logger.debug("*** 200 ok... checking mime type #{response.content_type}.") if MIME_TYPES.include?(response.content_type) logger.debug("*** We have a valid mime type") self.feed_content = response.body.to_s.strip self.mime_type = response.content_type self.retrieved_at = Time.now.utc.to_s(:db) else logger.debug("*** wrong mime-type.") white_list = ['text/html', 'text/javascript', 'application/javascript'] # acceptably WRONG header values. filtered_advice = ( white_list.member?(response.content_type) ? " Got '#{response.content_type}'." : "" ) @retrieval_errors << "Wrong mime-type. Need application/xml, text/csv, or variants." + filtered_advice end else self.feed_retrieved = false self.feed_changed = false logger.debug("*** Unable to fetch remote data") end else logger.debug("*** No feed url present or refresh delta not yet expired - don't do nothin") # TODO remove after development end rescue URI::InvalidURIError, Timeout::Error, SystemCallError => e self.feed_retrieved = false self.feed_changed = false @retrieval_errors << "Url bad or unavailable." logger.error("*** Error retrieving feed from remote source: #{e}") end # private # separating this method out makes mocking our response fairly simple # basically we just create a dummy response object, and override this # method with a stubbed version that returns our dummy response def get_response return fetch(feed_url) end # TODO: update test specs so this passes properly # this fetch method recursively follows redirections up to a maximum depth of 10 redirections def fetch(uri_str, limit = 10) uri = create_uri(uri_str) logger.debug("*** Fetching content from :#{uri}") http_client = Net::HTTP.new(uri.host, uri.port) # the default timeout appears to be 60 seconds. this is very long # override it to be 5 seconds http_client.open_timeout = DEFAULT_TIMEOUT http_client.read_timeout = DEFAULT_TIMEOUT response = http_client.request_get(uri.request_uri) logger.debug("*** Got response: #{response}") case response when Net::HTTPSuccess then response when Net::HTTPRedirection then fetch(response['location'], limit - 1) else response end end def create_uri(uri_str) logger.debug("*** Creating uri from: #{uri_str}") uri = URI.parse(uri_str) raise URI::InvalidURIError unless uri.is_a?(URI::HTTP) or uri.is_a?(URI::HTTPS) return uri end end end