lib/spidr/agent.rb in spidr-0.6.0 vs lib/spidr/agent.rb in spidr-0.6.1
- old
+ new
@@ -266,11 +266,11 @@
# The newly created agent.
#
# @see #initialize
#
def self.site(url,options={},&block)
- url = URI(url.to_s) unless url.kind_of?(URI)
+ url = URI(url)
agent = new(options.merge(host: url.host),&block)
agent.start_at(url)
end
@@ -406,13 +406,11 @@
#
def history=(new_history)
@history.clear
new_history.each do |url|
- url = URI(url.to_s) unless url.kind_of?(URI)
-
- @history << url
+ @history << URI(url)
end
return @history
end
@@ -423,21 +421,21 @@
#
# @return [Array<String>]
# The links which have been visited.
#
def visited_links
- @history.map { |url| url.to_s }
+ @history.map(&:to_s)
end
#
# Specifies all hosts that were visited.
#
# @return [Array<String>]
# The hosts which have been visited.
#
def visited_hosts
- visited_urls.map { |uri| uri.host }.uniq
+ visited_urls.map(&:host).uniq
end
#
# Determines whether a URL was visited or not.
#
@@ -446,13 +444,11 @@
#
# @return [Boolean]
# Specifies whether a URL was visited.
#
def visited?(url)
- url = URI(url.to_s) unless url.kind_of?(URI)
-
- return @history.include?(url)
+ @history.include?(URI(url))
end
#
# Sets the list of failed URLs.
#
@@ -467,13 +463,11 @@
#
def failures=(new_failures)
@failures.clear
new_failures.each do |url|
- url = URI(url.to_s) unless url.kind_of?(URI)
-
- @failures << url
+ @failures << URI(url)
end
return @failures
end
@@ -485,13 +479,11 @@
#
# @return [Boolean]
# Specifies whether the given URL was unable to be visited.
#
def failed?(url)
- url = URI(url.to_s) unless url.kind_of?(URI)
-
- return @failures.include?(url)
+ @failures.include?(URI(url))
end
alias pending_urls queue
#
@@ -508,13 +500,11 @@
#
def queue=(new_queue)
@queue.clear
new_queue.each do |url|
- url = URI(url.to_s) unless url.kind_of?(URI)
-
- @queue << url
+ @queue << URI(url)
end
return @queue
end
@@ -592,11 +582,11 @@
#
# @return [Page, nil]
# The page for the response, or `nil` if the request failed.
#
def get_page(url)
- url = URI(url.to_s)
+ url = URI(url)
prepare_request(url) do |session,path,headers|
new_page = Page.new(url,session.get(path,headers))
# save any new cookies
@@ -627,11 +617,11 @@
# The page for the response, or `nil` if the request failed.
#
# @since 0.2.2
#
def post_page(url,post_data='')
- url = URI(url.to_s) unless url.kind_of?(URI)
+ url = URI(url)
prepare_request(url) do |session,path,headers|
new_page = Page.new(url,session.post(path,post_data,headers))
# save any new cookies
@@ -723,11 +713,11 @@
# set any additional HTTP headers
headers = @default_headers.dup
unless @host_headers.empty?
@host_headers.each do |name,header|
- if host.match(name)
+ if url.host.match(name)
headers['Host'] = header
break
end
end
end
@@ -767,11 +757,9 @@
# A Hash of request header options.
#
# @since 0.2.2
#
def prepare_request(url,&block)
- host = url.host
- port = url.port
path = unless url.path.empty?
url.path
else
'/'
end