lib/spieker/link_scraper.rb in spieker-0.0.2 vs lib/spieker/link_scraper.rb in spieker-0.0.3
- old
+ new
@@ -1,11 +1,10 @@
require 'capybara'
require 'capybara/poltergeist'
module Spieker
class LinkScraper
- LOCAL_LINK_REGEX = /^(?!(http(s)?\:|\/\/)|data\:).*/
include Capybara::DSL
attr_writer :links
def initialize(url)
@@ -43,32 +42,19 @@
end
end
def cleaned_up_links(links)
links.select { |link|
- is_local?(link) && !is_email?(link)
+ LinkValidator.new(link, @url.to_s).valid?
}.map(&method(:filter_hash)).compact.uniq
end
- def is_local?(link)
- link =~ LOCAL_LINK_REGEX ||
- begin
- URI.parse(link).hostname == @url.hostname
- rescue
- false
- end
- end
-
def filter_hash(link)
if match = link.match(/(.*)#(.*)$/)
match[1]
else
link
end
- end
-
- def is_email? link
- link =~ /mailto/
end
end
end
class NullStream