lib/regexp_crawler/crawler.rb in flyerhzm-regexp_crawler-0.8.1 vs lib/regexp_crawler/crawler.rb in flyerhzm-regexp_crawler-0.8.2

- old
+ new

@@ -35,11 +35,11 @@ response = Net::HTTP.get_response_with_headers(uri, @headers) parse_response(response, uri) end def continue_uri(uri, page) - if page.start_with?(uri.scheme) + if page =~ /^#{uri.scheme}/ URI.parse(page) elsif page.start_with?('/') URI.join(uri.scheme + '://' + uri.host, page) else URI.parse(uri.to_s.split('/')[0..-2].join('/') + '/' + page) @@ -54,10 +54,10 @@ page = page.compact.first if page.is_a? Array continue_uri = continue_uri(uri, page) @pages << continue_uri unless @captured_pages.include?(continue_uri) or @pages.include?(continue_uri) end end - if @need_parse.nil? or @need_parse.call(uri.to_i, response_body) + if @need_parse.nil? or @need_parse.call(uri.to_s, response_body) md = @capture_regexp.match(response_body) if md captures = md.captures result = {} captures.each_index do |i|