Sha256: 5026bc5b3687a730d7842f858f844cf5ceb270bea2064614ec2c538040cee54e

Contents?: true

Size: 850 Bytes

Versions: 20

Compression:

Stored size: 850 Bytes

Contents

def puts! a, b=''
    puts "+++ #{b}"
    puts a.inspect
end

module Ish
  class Crawler

    def self.google_first_result text
      result = HTTParty.get( "https://www.google.com/search?q=#{text}", :verify => false )
      r = Nokogiri::HTML(result.body)
      website = r.css('cite')[0].text
      website = "https://#{website}" unless website[0..3] == 'http'

      puts! website, 'website'

      begin
        r = HTTParty.get( website, :verify => false )
      rescue OpenSSL::SSL::SSLError => e
        puts! e, 'e'
        return { :url => website }
      end

      return { :url => website, :html => r.body }
    end

    def self.look_for_emails text
      email_regex = /\A[\w+\-.]+@[a-z\d\-]+(\.[a-z\d\-]+)*\.[a-z]+\z/i
      result = text.scan( email_regex )
      return result.length > 0 ? result.join(',') : nil
    end

  end
end

Version data entries

20 entries across 20 versions & 1 rubygems

Version Path
ish_models-0.0.33.103 lib/ish/crawler.rb
ish_models-0.0.33.100 lib/ish/crawler.rb
ish_models-0.0.33.99 lib/ish/crawler.rb
ish_models-0.0.33.98 lib/ish/crawler.rb
ish_models-0.0.33.97 lib/ish/crawler.rb
ish_models-0.0.33.96 lib/ish/crawler.rb
ish_models-0.0.33.95 lib/ish/crawler.rb
ish_models-0.0.33.94 lib/ish/crawler.rb
ish_models-0.0.33.92 lib/ish/crawler.rb
ish_models-0.0.33.91 lib/ish/crawler.rb
ish_models-0.0.33.90 lib/ish/crawler.rb
ish_models-0.0.33.89 lib/ish/crawler.rb
ish_models-0.0.33.87 lib/ish/crawler.rb
ish_models-0.0.33.86 lib/ish/crawler.rb
ish_models-0.0.33.85 lib/ish/crawler.rb
ish_models-0.0.33.83 lib/ish/crawler.rb
ish_models-0.0.33.79 lib/ish/crawler.rb
ish_models-0.0.33.78 lib/ish/crawler.rb
ish_models-0.0.33.77 lib/ish/crawler.rb
ish_models-0.0.33.76 lib/ish/crawler.rb