lib/http_crawler/client.rb in http_crawler-0.3.0.5 vs lib/http_crawler/client.rb in http_crawler-0.3.0.6

- old
+ new

@@ -64,10 +64,11 @@ # 初始化超时时间 def init_timeout @connect_time = 5 @write_time = 5 @read_time = 5 + @all_timeout = nil end # 初始化 ssl 协议 def init_ssl if (@uri.scheme == "https") @@ -75,23 +76,32 @@ @ctx = OpenSSL::SSL::SSLContext.new @ctx.verify_mode = OpenSSL::SSL::VERIFY_NONE end end + attr_accessor :header # 头文件相关方法 def header(parameter = {}) @header ||= init_header end def init_header(parameter = {}) - @header = {} + @header = { + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", + "Accept-Encoding": "gzip, br", + "Accept-Language": "zh-CN,zh;q=0.9", + "Connection": "keep-alive", + "Upgrade-Insecure-Requests": "1", + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36", + } end def update_header(parameter = {}) - nil + @header = init_header end + attr_accessor :cookies # cookies相关方法 def cookies(parameter = {}) @cookies ||= init_cookies end @@ -101,10 +111,18 @@ def update_cookies(parameter = {}) nil end + # 字符串转换成cookies + # "abc=123; cd=412" => { "abc": "123", "cd": "412"} + def str_to_cookies(str) + str.scan(/([^=]*)=([^;]*);? ?/) do |m| + self.cookies[:"#{m[0]}"] = m[1] + end + end + # 代理设置 def auto_proxy=(value) Rails.logger.debug "自动更新代理" @auto_proxy = value update_proxy if (value == true && @proxy.blank?) @@ -204,10 +222,16 @@ # 添加cookies h = h.cookies(cookies) if cookies # 添加超时时间 - h = h.timeout(connect: @connect_time, write: @write_time, read: @read_time) + if(@all_timeout) + # 整体总计超时时间 + h = h.timeout(@all_timeout) + else + # 指定每个处理超时时间 + h = h.timeout(connect: @connect_time, write: @write_time, read: @read_time) + end h end #