Sha256: fe5c6ef94ac89d06982001e7d4a2294e527a4317a69654f2939b41343895736a

Contents?: true

Size: 776 Bytes

Versions: 10

Compression:

Stored size: 776 Bytes

Contents

# A basic spider that will follow links on an infinite loop
$: << File.join(File.dirname(__FILE__), "/../lib")
require 'meta_inspector'

q = Queue.new
visited_links=[]

puts "Enter a valid http url to spider it following external links"
url = gets.strip

page = MetaInspector.new(url)
q.push(url)

while q.size > 0
  visited_links << url = q.pop
  page = MetaInspector.new(url)
  puts "Spidering #{page.url}"

  puts "TITLE: #{page.title}"
  puts "META DESCRIPTION: #{page.meta_description}"
  puts "META KEYWORDS: #{page.meta_keywords}"
  puts "LINKS: #{page.links.size}"
  page.links.each do |link|
    if link[0..6] == 'http://' && !visited_links.include?(link)
      q.push(link)
    end
  end
  puts "#{visited_links.size} pages visited, #{q.size} pages on queue\n\n"
end

Version data entries

10 entries across 10 versions & 1 rubygems

Version Path
metainspector-1.8.8 samples/spider.rb
metainspector-1.8.7 samples/spider.rb
metainspector-1.8.6 samples/spider.rb
metainspector-1.8.5 samples/spider.rb
metainspector-1.8.4 samples/spider.rb
metainspector-1.8.3 samples/spider.rb
metainspector-1.8.2 samples/spider.rb
metainspector-1.7.1 samples/spider.rb
metainspector-1.7.0 samples/spider.rb
metainspector-1.6.0 samples/spider.rb