Sha256: 474da1663fde68cf7ef49e369debcc1e4daf6a3d95edf99b54b9f83e9ef07da1

Contents?: true

Size: 888 Bytes

Versions: 16

Compression:

Stored size: 888 Bytes

Contents

# A basic spider that will follow internal links
#
# Usage example:
#
#   ruby spider.rb example.com

require '../lib/metainspector'
puts "Using MetaInspector #{MetaInspector::VERSION}"

# Two arrays, one for the scraping queue and one for the visited links
queue   = []
visited = []

# Get the starting URL
url = ARGV[0] || (puts "Enter a starting url"; gets.strip)

# Resolve initial redirections
page = MetaInspector.new(url)

# Push this initial URL to the queue
queue.push(page.url)

while queue.any?
  url = queue.pop

  visited.push(url)

  puts "VISITED: #{url}"

  page = MetaInspector.new(url)

  page.links.internal.each do |link|
    queue.push(link) unless visited.include?(link) || queue.include?(link)
  end

  puts "#{visited.size} pages visited, #{queue.size} pages on queue\n\n"
end

puts "\nScraping finished, these are the internal links found:\n\n"
puts visited.sort

Version data entries

16 entries across 16 versions & 1 rubygems

Version Path
metainspector-5.4.1 examples/spider.rb
metainspector-5.4.0 examples/spider.rb
metainspector-5.3.1 examples/spider.rb
metainspector-5.3.0 examples/spider.rb
metainspector-5.2.3 examples/spider.rb
metainspector-5.2.2 examples/spider.rb
metainspector-5.2.1 examples/spider.rb
metainspector-5.2.0 examples/spider.rb
metainspector-5.1.3 examples/spider.rb
metainspector-5.1.2 examples/spider.rb
metainspector-5.1.1 examples/spider.rb
metainspector-5.1.0 examples/spider.rb
metainspector-5.0.2 examples/spider.rb
metainspector-5.0.1 examples/spider.rb
metainspector-5.0.0 examples/spider.rb
metainspector-5.0.0.rc1 examples/spider.rb