lib/sitediff/crawler.rb in sitediff-1.1.1 vs lib/sitediff/crawler.rb in sitediff-1.2.0
- old
+ new
@@ -19,11 +19,11 @@
interval,
include_regex,
exclude_regex,
depth = DEFAULT_DEPTH,
curl_opts = UriWrapper::DEFAULT_CURL_OPTS,
- debug = true,
+ debug: true,
&block)
@hydra = hydra
@base_uri = Addressable::URI.parse(base)
@base = base
@interval = interval
@@ -41,11 +41,11 @@
def add_uri(rel, depth)
return if @found.include? rel
@found << rel
- wrapper = UriWrapper.new(@base + rel, @curl_opts, @debug)
+ wrapper = UriWrapper.new(@base + rel, @curl_opts, debug: @debug)
wrapper.queue(@hydra) do |res|
fetched_uri(rel, depth, res)
end
end
@@ -102,9 +102,19 @@
nil
end
# Make a link relative to @base_uri
def relativize_link(uri)
+ # fullPath = uri.path
+ # if uri.query
+ # fullPath += "?" + uri.query
+ # end
+ #
+ # if uri.fragment
+ # fullPath += "#" + uri.fragment
+ # end
+ # fullPath.gsub(@base_uri.path, "")
+ #
uri.path.slice(@base_uri.path.length, uri.path.length)
end
# Return a list of string links found on a page.
def find_links(doc)