require 'content_scrapper' require 'content_scrapper/feedzirra' class Infosource < ActiveRecord::Base include InfosourcesHelper MIN_SOURCENAME_SIZE = 3 MAX_SOURCENAME_SIZE = 40 RANGE_SOURCENAMESIZE = (3..40) has_many :articles, :order => "published_at DESC" validates_uniqueness_of :sourcename validates_length_of :sourcename, :within => RANGE_SOURCENAMESIZE validates_format_of :sourcefeed, :with => Regexp.new(URI::regexp(%w(http https)).to_s + '|^$'), :allow_nil => true def fetch_source if has_sourcefeed? begin #TODO simulate Feedzirra collapse in tests feed = Feedzirra::Feed.fetch_and_parse(self.sourcefeed) rescue Exception Rails.logger.error("Error occured during fetching infosource #{self.sourcename} (#{self.sourcefeed}): #{$!}") end if feed != 0 entries_count = add_entries(feed.entries) Rails.logger.info "Infosource #{self.sourcename}'s fetched from feed #{self.sourcefeed}, #{entries_count} entries added." else entries_count = nil Rails.logger.error "Infosource #{self.sourcename} count not be fetched. An error occured." end else entries_count = nil Rails.logger.info "Infosource #{self.sourcename} has not feed set, nothing fetched." end entries_count end #TODO updating feed using the Feedzirra, the feeds should be mapped to the articles model def self.fetch_all_sources Rails.logger.info "Starting to fetch feeds for #{Infosource.count} infosources." begin total_fetches = 0 Infosource.all.each do |infosource| total_fetches += infosource.fetch_source || 0 end rescue Exception #TODO test for failed fetching Rails.logger.error("Error occured during fetching all infosources: #{$!}") ensure Sunspot.commit_if_dirty Rails.logger.info "#{total_fetches} articles were fetched. Harvesting finished." end total_fetches end private def add_entries(entries) new_entries = 0 entries.each do |entry| unless Article.exists?(:guid => entry.id) then article = Article.create!(:title => entry.title, :summary => entry.summary, :body => entry.scrap_content, :url => entry.url, :published_at => entry.published, :guid => entry.id, :infosource => self) new_entries += 1 end end new_entries end def has_sourcefeed? self.sourcefeed && !self.sourcefeed.empty? end end