lib/retriever/target.rb in rubyretriever-1.1.0 vs lib/retriever/target.rb in rubyretriever-1.2.0

- old
+ new

@@ -1,23 +1,24 @@ require 'open-uri' +require 'addressable/uri' module Retriever # class Target - HTTP_RE = Regexp.new(/^http/i).freeze - DUB_DUB_DUB_DOT_RE = Regexp.new(/^www\./i).freeze + HTTP_RE = Regexp.new(/^http/i).freeze - attr_reader :host, :target, :host_re, :source, :file_re + attr_reader :host, :target, :host_re, :source, :file_re, :scheme def initialize(url, file_re = nil) - url = "http://#{url}" unless HTTP_RE =~ url - fail 'Bad URL' unless /\./ =~ url - new_uri = URI(url) - @target = new_uri.to_s - @host = new_uri.host - @host_re = Regexp.new(@host.sub('www.', '')) - @file_re ||= file_re + fail 'Bad URL' unless url.include?('.') + url = "http://#{url}" unless HTTP_RE =~ url + target_uri = Addressable::URI.parse(url) + @target = target_uri.to_s + @host = target_uri.host + @host_re = Regexp.new(@host.sub('www.', '')) + @file_re ||= file_re + @scheme = target_uri.scheme end def source resp = open(@target) resp_url = resp.base_uri.to_s @@ -29,16 +30,17 @@ resp = resp.read # fail 'Domain is not working. Try the non-WWW version.' if resp == '' fail 'Domain not working. Try HTTPS???' unless resp # consider using scrub from ruby 2.1? this misses some things - resp.encode('UTF-8', 'binary', :invalid => :replace, :undef => :replace) + resp.encode('UTF-8', 'binary', invalid: :replace, undef: :replace) end def resync_target_and_return_source(url) - new_t = Retriever::Target.new(url) + new_t = Retriever::Target.new(url) @target = new_t.target - @host = new_t.host + @host = new_t.host + @scheme = new_t.scheme new_t.source end end end