Sha256: 3ddac256aa2f69b19dd0d7b061acc65759b5bcf42d05c4a3cd8f80cb2fda57f4
Contents?: true
Size: 786 Bytes
Versions: 26
Compression:
Stored size: 786 Bytes
Contents
# A basic spider that will follow links on an infinite loop $: << File.join(File.dirname(__FILE__), "/../lib") require 'rubygems' require 'meta_inspector' q = Queue.new visited_links=[] puts "Enter a valid http url to spider it following internal links" url = gets.strip page = MetaInspector.new(url) q.push(url) while q.size > 0 visited_links << url = q.pop page = MetaInspector.new(url) puts "Spidering #{page.url}" puts "TITLE: #{page.title}" puts "META DESCRIPTION: #{page.meta_description}" puts "META KEYWORDS: #{page.meta_keywords}" puts "LINKS: #{page.internal_links.size}" page.internal_links.each do |link| if !visited_links.include?(link) q.push(link) end end puts "#{visited_links.size} pages visited, #{q.size} pages on queue\n\n" end
Version data entries
26 entries across 26 versions & 1 rubygems