lib/http_crawler/client.rb in http_crawler-0.3.0.5 vs lib/http_crawler/client.rb in http_crawler-0.3.0.6
- old
+ new
@@ -64,10 +64,11 @@
# 初始化超时时间
def init_timeout
@connect_time = 5
@write_time = 5
@read_time = 5
+ @all_timeout = nil
end
# 初始化 ssl 协议
def init_ssl
if (@uri.scheme == "https")
@@ -75,23 +76,32 @@
@ctx = OpenSSL::SSL::SSLContext.new
@ctx.verify_mode = OpenSSL::SSL::VERIFY_NONE
end
end
+ attr_accessor :header
# 头文件相关方法
def header(parameter = {})
@header ||= init_header
end
def init_header(parameter = {})
- @header = {}
+ @header = {
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
+ "Accept-Encoding": "gzip, br",
+ "Accept-Language": "zh-CN,zh;q=0.9",
+ "Connection": "keep-alive",
+ "Upgrade-Insecure-Requests": "1",
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36",
+ }
end
def update_header(parameter = {})
- nil
+ @header = init_header
end
+ attr_accessor :cookies
# cookies相关方法
def cookies(parameter = {})
@cookies ||= init_cookies
end
@@ -101,10 +111,18 @@
def update_cookies(parameter = {})
nil
end
+ # 字符串转换成cookies
+ # "abc=123; cd=412" => { "abc": "123", "cd": "412"}
+ def str_to_cookies(str)
+ str.scan(/([^=]*)=([^;]*);? ?/) do |m|
+ self.cookies[:"#{m[0]}"] = m[1]
+ end
+ end
+
# 代理设置
def auto_proxy=(value)
Rails.logger.debug "自动更新代理"
@auto_proxy = value
update_proxy if (value == true && @proxy.blank?)
@@ -204,10 +222,16 @@
# 添加cookies
h = h.cookies(cookies) if cookies
# 添加超时时间
- h = h.timeout(connect: @connect_time, write: @write_time, read: @read_time)
+ if(@all_timeout)
+ # 整体总计超时时间
+ h = h.timeout(@all_timeout)
+ else
+ # 指定每个处理超时时间
+ h = h.timeout(connect: @connect_time, write: @write_time, read: @read_time)
+ end
h
end
#