lib/http_crawler/client.rb in http_crawler-0.3.0.3 vs lib/http_crawler/client.rb in http_crawler-0.3.0.4
- old
+ new
@@ -1,28 +1,32 @@
require_dependency File.dirname(__FILE__) + '/http/response.rb'
module HttpCrawler
- module Client
+ class Client
class << self
# 接收格式
# web_name = "biquge_duquanben"
# 返回 HttpCrawler::Web::BiqugeDuquanben::Client 实例
#
- def for(web_name, *args)
- "HttpCrawler::Web::#{web_name.camelize}::Client".constantize.new(*args)
+ def for(web_name)
+ "HttpCrawler::Web::#{web_name.camelize}::Client".constantize.new()
end
#
# 接收格式
# module_name = "HttpCrawler::Web::BiqugeDuquanben"
# 返回 HttpCrawler::Web::BiqugeDuquanben::Client 实例
#
def for_module(module_name, *args)
- "#{module_name}::Client".constantize.new(*args)
+ "#{module_name}::Client".constantize.new()
end
+
+ def for_uri(path)
+ self.new(uri: path)
+ end
end
attr_accessor :max_error_num
# 最大错误重试次数
@@ -36,10 +40,29 @@
#
def init_uri
@uri = nil
end
+ # 更新uri
+ def update_uri(uri_or_path)
+ case uri_or_path
+ when URI
+ @uri = uri_or_path
+ when String
+ if uri_or_path =~ /^http/
+ @uri = URI(uri_or_path)
+ else
+ @uri = @uri + uri_or_path
+ end
+ else
+ raise ArgumentError, uri_or_path
+ end
+ # 初始化 ssl 协议
+ self.init_ssl
+ self.uri
+ end
+
# 初始化超时时间
def init_timeout
@connect_time = 5
@write_time = 5
@read_time = 5
@@ -53,28 +76,35 @@
@ctx.verify_mode = OpenSSL::SSL::VERIFY_NONE
end
end
# 头文件相关方法
- def header
+ def header(parameter = {})
@header ||= init_header
end
- def init_header
- nil
+ def init_header(parameter = {})
+ @header = {}
end
def update_header(parameter = {})
nil
end
- # cookies
- def cookies
- @cookies ||= {}
+ # cookies相关方法
+ def cookies(parameter = {})
+ @cookies ||= init_cookies
end
+ def init_cookies(parameter = {})
+ @cookies = {}
+ end
+ def update_cookies(parameter = {})
+ nil
+ end
+
# 代理设置
def auto_proxy=(value)
Rails.logger.debug "自动更新代理"
@auto_proxy = value
update_proxy if (value == true && @proxy.blank?)
@@ -148,13 +178,13 @@
def add_error_url(url_string)
@http.error_urls << url_string
end
- # 初始化http参数
+ # 初始化init_client参数
def init_client
-
+ nil
end
# 初始化http请求前置条件
def http
# 自动重定向。最大重定向次数 max_hops: 5
@@ -177,14 +207,20 @@
#
# init_uri 如果未初始化@uri,则会报错
# 继承类需要重定义 init_uri
#
- def initialize
+ def initialize(parameter = {})
# 初始化 uri
- raise "Client uri为空" unless init_uri
+ init_uri
+ # 如果自定义uri
+ if parameter[:uri]
+ raise "Client uri为重复初始化" if uri
+ update_uri(parameter[:uri])
+ end
+
# 初始化超时时间
init_timeout
# 初始化 ssl 协议
init_ssl
@@ -196,16 +232,24 @@
@proxy_params = {key: "#{self.class}"}
end
# 发送 get 请求
def get(path, params = {})
- request {http.get((@uri + path).to_s, :params => params, :ssl_context => @ctx)}
+ raise "Client uri为空" unless self.uri
+ request {http.get((self.uri + path).to_s, :params => params, :ssl_context => @ctx)}
end
+ # 直接发送uri的get请求
+ def get_uri
+ raise "Client uri为空" unless self.uri
+ request {http.get(self.uri.to_s, :ssl_context => @ctx)}
+ end
+
# 发送 post 请求
def post(path, params = {})
- request {http.post((@uri + path).to_s, :form => params, :ssl_context => @ctx)}
+ raise "Client uri为空" unless self.uri
+ request {http.post((self.uri + path).to_s, :form => params, :ssl_context => @ctx)}
end
# 请求的响应
attr_accessor :response
protected :response=
@@ -231,30 +275,28 @@
raise "必须定义块" unless block_given?
n = max_error_num
begin
block.call
rescue => error
-
+ Rails.logger.debug error.class
case error
when HTTP::TimeoutError
# 超时错误切换代理
if self.update_proxy?
retry
else
raise error
end
-
else
# 错误尝试次数
if n <= 0
raise error
else
n -= 1
retry
end
end
-
end
- end
+ end # def request(&block)
end
end