Sha256: dd11757835c119422563ec1b7560bfe6e630a040a5de4cce0d5beee34c11b23c

Contents?: true

Size: 975 Bytes

Versions: 1

Compression:

Stored size: 975 Bytes

Contents

require "mechanize"

module EmailCrawler
  module MechanizeHelper
    READ_TIMEOUT = 15

    def new_agent
      Thread.current[:agent] ||= Mechanize.new do |agent|
        agent.user_agent_alias = "Windows Mozilla"
        agent.open_timeout = agent.read_timeout = READ_TIMEOUT
        agent.verify_mode = OpenSSL::SSL::VERIFY_NONE
        agent.history.max_size = 1
        yield(agent) if block_given?
      end
    end

    def get(url)
      retried = false

      begin
        page = begin
                 Timeout::timeout(READ_TIMEOUT) do
                   agent.get(url)
                 end
               rescue Timeout::Error
                 unless retried
                   retried = true
                   retry
                 end
               end
        page if page.is_a?(Mechanize::Page)
      rescue Mechanize::Error;
      rescue SocketError
        unless retried
          retried = true
          retry
        end
      end
    end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
email_crawler-0.1.0 lib/email_crawler/mechanize_helper.rb