lib/http_crawler/client.rb in http_crawler-0.3.0.3 vs lib/http_crawler/client.rb in http_crawler-0.3.0.4

- old
+ new

@@ -1,28 +1,32 @@ require_dependency File.dirname(__FILE__) + '/http/response.rb' module HttpCrawler - module Client + class Client class << self # 接收格式 # web_name = "biquge_duquanben" # 返回 HttpCrawler::Web::BiqugeDuquanben::Client 实例 # - def for(web_name, *args) - "HttpCrawler::Web::#{web_name.camelize}::Client".constantize.new(*args) + def for(web_name) + "HttpCrawler::Web::#{web_name.camelize}::Client".constantize.new() end # # 接收格式 # module_name = "HttpCrawler::Web::BiqugeDuquanben" # 返回 HttpCrawler::Web::BiqugeDuquanben::Client 实例 # def for_module(module_name, *args) - "#{module_name}::Client".constantize.new(*args) + "#{module_name}::Client".constantize.new() end + + def for_uri(path) + self.new(uri: path) + end end attr_accessor :max_error_num # 最大错误重试次数 @@ -36,10 +40,29 @@ # def init_uri @uri = nil end + # 更新uri + def update_uri(uri_or_path) + case uri_or_path + when URI + @uri = uri_or_path + when String + if uri_or_path =~ /^http/ + @uri = URI(uri_or_path) + else + @uri = @uri + uri_or_path + end + else + raise ArgumentError, uri_or_path + end + # 初始化 ssl 协议 + self.init_ssl + self.uri + end + # 初始化超时时间 def init_timeout @connect_time = 5 @write_time = 5 @read_time = 5 @@ -53,28 +76,35 @@ @ctx.verify_mode = OpenSSL::SSL::VERIFY_NONE end end # 头文件相关方法 - def header + def header(parameter = {}) @header ||= init_header end - def init_header - nil + def init_header(parameter = {}) + @header = {} end def update_header(parameter = {}) nil end - # cookies - def cookies - @cookies ||= {} + # cookies相关方法 + def cookies(parameter = {}) + @cookies ||= init_cookies end + def init_cookies(parameter = {}) + @cookies = {} + end + def update_cookies(parameter = {}) + nil + end + # 代理设置 def auto_proxy=(value) Rails.logger.debug "自动更新代理" @auto_proxy = value update_proxy if (value == true && @proxy.blank?) @@ -148,13 +178,13 @@ def add_error_url(url_string) @http.error_urls << url_string end - # 初始化http参数 + # 初始化init_client参数 def init_client - + nil end # 初始化http请求前置条件 def http # 自动重定向。最大重定向次数 max_hops: 5 @@ -177,14 +207,20 @@ # # init_uri 如果未初始化@uri,则会报错 # 继承类需要重定义 init_uri # - def initialize + def initialize(parameter = {}) # 初始化 uri - raise "Client uri为空" unless init_uri + init_uri + # 如果自定义uri + if parameter[:uri] + raise "Client uri为重复初始化" if uri + update_uri(parameter[:uri]) + end + # 初始化超时时间 init_timeout # 初始化 ssl 协议 init_ssl @@ -196,16 +232,24 @@ @proxy_params = {key: "#{self.class}"} end # 发送 get 请求 def get(path, params = {}) - request {http.get((@uri + path).to_s, :params => params, :ssl_context => @ctx)} + raise "Client uri为空" unless self.uri + request {http.get((self.uri + path).to_s, :params => params, :ssl_context => @ctx)} end + # 直接发送uri的get请求 + def get_uri + raise "Client uri为空" unless self.uri + request {http.get(self.uri.to_s, :ssl_context => @ctx)} + end + # 发送 post 请求 def post(path, params = {}) - request {http.post((@uri + path).to_s, :form => params, :ssl_context => @ctx)} + raise "Client uri为空" unless self.uri + request {http.post((self.uri + path).to_s, :form => params, :ssl_context => @ctx)} end # 请求的响应 attr_accessor :response protected :response= @@ -231,30 +275,28 @@ raise "必须定义块" unless block_given? n = max_error_num begin block.call rescue => error - + Rails.logger.debug error.class case error when HTTP::TimeoutError # 超时错误切换代理 if self.update_proxy? retry else raise error end - else # 错误尝试次数 if n <= 0 raise error else n -= 1 retry end end - end - end + end # def request(&block) end end