lib/regexp_crawler/crawler.rb in flyerhzm-regexp_crawler-0.8.1 vs lib/regexp_crawler/crawler.rb in flyerhzm-regexp_crawler-0.8.2
- old
+ new
@@ -35,11 +35,11 @@
response = Net::HTTP.get_response_with_headers(uri, @headers)
parse_response(response, uri)
end
def continue_uri(uri, page)
- if page.start_with?(uri.scheme)
+ if page =~ /^#{uri.scheme}/
URI.parse(page)
elsif page.start_with?('/')
URI.join(uri.scheme + '://' + uri.host, page)
else
URI.parse(uri.to_s.split('/')[0..-2].join('/') + '/' + page)
@@ -54,10 +54,10 @@
page = page.compact.first if page.is_a? Array
continue_uri = continue_uri(uri, page)
@pages << continue_uri unless @captured_pages.include?(continue_uri) or @pages.include?(continue_uri)
end
end
- if @need_parse.nil? or @need_parse.call(uri.to_i, response_body)
+ if @need_parse.nil? or @need_parse.call(uri.to_s, response_body)
md = @capture_regexp.match(response_body)
if md
captures = md.captures
result = {}
captures.each_index do |i|