lib/retriever/target.rb in rubyretriever-1.0.3 vs lib/retriever/target.rb in rubyretriever-1.1.0
- old
+ new
@@ -1,52 +1,44 @@
require 'open-uri'
module Retriever
-
+ #
class Target
-
HTTP_RE = Regexp.new(/^http/i).freeze
DUB_DUB_DUB_DOT_RE = Regexp.new(/^www\./i).freeze
-
+
attr_reader :host, :target, :host_re, :source, :file_re
- def initialize(url,file_re=nil)
- url = "http://#{url}" if (!(HTTP_RE =~ url))
- fail "Bad URL" if (!(/\./ =~ url))
+ def initialize(url, file_re = nil)
+ url = "http://#{url}" unless HTTP_RE =~ url
+ fail 'Bad URL' unless /\./ =~ url
new_uri = URI(url)
@target = new_uri.to_s
@host = new_uri.host
- @host_re = Regexp.new(@host.sub('www.',''))
+ @host_re = Regexp.new(@host.sub('www.', ''))
@file_re ||= file_re
end
def source
- resp = false
- begin
- resp = open(@target)
- rescue StandardError => e
- trap("ABRT"){
- puts "#{@target} failed SSL Certification Verification"
- }
- return false
- end
+ resp = open(@target)
resp_url = resp.base_uri.to_s
- if (@target != resp_url)
- if @host_re =~ resp_url #if redirect URL is same hose, we want to re-sync our target with the right URL
- new_t = Retriever::Target.new(resp_url)
- @target = new_t.target
- @host = new_t.host
- return new_t.source
- end
- fail "Domain redirecting to new host: #{resp.base_uri.to_s}" #if it's not same host, we want to fail
+ if @target != resp_url
+ fail "Domain redirecting: #{resp_url}" unless @host_re =~ resp_url
+ # if redirect URL is same host, we want to re-sync @target
+ return resync_target_and_return_source(resp_url)
end
resp = resp.read
- if resp == ""
- fail "Domain is not working. Try the non-WWW version."
- end
- fail "Domain not working. Try HTTPS???" if !resp
- return resp.encode('UTF-8', 'binary', :invalid => :replace, :undef => :replace) #consider using scrub from ruby 2.1? this misses some things
+ #
+ fail 'Domain is not working. Try the non-WWW version.' if resp == ''
+ fail 'Domain not working. Try HTTPS???' unless resp
+ # consider using scrub from ruby 2.1? this misses some things
+ resp.encode('UTF-8', 'binary', :invalid => :replace, :undef => :replace)
end
+ def resync_target_and_return_source(url)
+ new_t = Retriever::Target.new(url)
+ @target = new_t.target
+ @host = new_t.host
+ new_t.source
+ end
end
-
end