lib/socialcrawler.rb in socialcrawler-0.0.3 vs lib/socialcrawler.rb in socialcrawler-0.0.4

- old
+ new

@@ -26,11 +26,16 @@ def initialize @map = { twitter: 'twitter.com/', facebook: 'facebook.com/', - google_plus: 'plus.google.com/' + google_plus: 'plus.google.com/', + instagram: 'www.instagram.com', + you_tube: 'youtube.com/user', + pinterest: 'pinterest.com/', + linked_in: 'linkedin.com/', + flickr: 'flickr.com/' } end def _put(hash, symbol, value, log=nil) log = Logger.new(STDOUT) if log.nil? @@ -55,11 +60,11 @@ end def crawl_url(url, log=nil) log = Logger.new(STDOUT) if log.nil? log.info("Crawling #{url}") - result = Hash.new('NOT FOUND') + result = Hash.new(:NOT_FOUND) begin page = Nokogiri::HTML(open(url)) title = page.css('title') if not title.nil? result[:title] = title.text.strip @@ -79,17 +84,11 @@ def load_status_cache(status_filename, log=nil) status = Hash.new if not status_filename.nil? and File.exists?(status_filename) log.info("Loading previous status from #{status_filename}") CSV.foreach(status_filename) do |row| - if row.count >= 3 - status[row[0]] = { - :url => row[0], - :result => row[1], - :message => row[2] - } - end + set_status_cache_data(status, row) end log.info("Loading previous status from #{status_filename} finished, #{status.keys.length} loaded.") end return status end @@ -99,20 +98,11 @@ log.info("Loading previous status from #{output_list_filename}") if not File.exist?(output_list_filename) return data end CSV.foreach(output_list_filename) do |row| - log.info("Loading #{row} #{row.count}") - if row.count >= 5 - data[row[0]] = { - :url => row[0], - :title => row[1], - :twitter => row[2], - :facebook => row[3], - :google_plus => row[4] - } - end + set_output_cache_data(data, row) log.info("Loading previous status from #{output_list_filename} finished, #{data.keys.length} loaded.") end return data end @@ -123,17 +113,13 @@ status = load_status_cache(status_filename, log) data = load_output_cache(output_list_filename, log) CSV.open(output_list_filename, "wb") do |output| - data.each do |k, v| - output << [k, v[:title], v[:twitter], v[:facebook], v[:google_plus]] - end + write_data(data, output) CSV.open(status_filename, "wb") do |status_line| - status.each do |k, v| - status_line << [k, v[:success], v[:message]] - end + write_status(status, status_line) crawl_loop(data, domain_list_filename, log, output, status, status_line) end end end @@ -149,10 +135,22 @@ end end private + def write_data(data, output) + data.each do |k, v| + output << [k, v[:title], v[:twitter], v[:facebook], v[:google_plus]] + end + end + + def write_status(status, status_line) + status.each do |k, v| + status_line << [k, v[:success], v[:message]] + end + end + def set_data(result, url, data, output) if result[:success] == true data[url] = result output << [url, result[:title], result[:twitter], result[:facebook], result[:google_plus]] end @@ -164,13 +162,35 @@ :result => result[:success], :message => result[:message] } status_line << [url, result[:success], result[:message]] end + + def set_output_cache_data(data, row) + if row.count >= 5 + data[row[0]] = { + :url => row[0], + :title => row[1], + :twitter => row[2], + :facebook => row[3], + :google_plus => row[4] + } + end + end + + def set_status_cache_data(status, row) + if row.count >= 3 + status[row[0]] = { + :url => row[0], + :result => row[1], + :message => row[2] + } + end + end end end if __FILE__ == $0 - # :nocov: + #:nocov: SocialCrawler::SocialCrawler.new.crawl(ARGV[0], ARGV[1], ARGV[2]) - # :nocov: + #:nocov: end \ No newline at end of file