lib/anemone/core.rb in anemone-0.0.1 vs lib/anemone/core.rb in anemone-0.0.2
- old
+ new
@@ -9,32 +9,28 @@
attr_reader :pages
#
# Initialize the crawl with a starting *url*, *options*, and optional *block*
#
- def initialize(url, options={}, &block)
+ def initialize(url, &block)
url = URI(url) if url.is_a?(String)
@url = url
- @options = options
@tentacles = []
@pages = PageHash.new
@on_every_page_blocks = []
@on_pages_like_blocks = Hash.new { |hash,key| hash[key] = [] }
@skip_link_patterns = []
@after_crawl_blocks = []
- @options[:threads] ||= 4
- @options[:verbose] ||= false
-
block.call(self) if block
end
#
# Convenience method to start a new crawl
#
- def self.crawl(root, options={}, &block)
- self.new(root, options) do |core|
+ def self.crawl(root, &block)
+ self.new(root) do |core|
block.call(core) if block
core.run
core.do_after_crawl_blocks
return core
end
@@ -89,11 +85,11 @@
#
def run
link_queue = Queue.new
page_queue = Queue.new
- @options[:threads].times do |id|
+ Anemone.options.threads.times do |id|
@tentacles << Thread.new { Tentacle.new(link_queue, page_queue).run }
end
return if !visit_link?(@url)
@@ -102,10 +98,10 @@
while true do
page = page_queue.deq
@pages[page.url] = page
- puts "#{page.url} Queue: #{link_queue.size}" if @options[:verbose]
+ puts "#{page.url} Queue: #{link_queue.size}" if Anemone.options.verbose
do_page_blocks(page)
page.links.each do |link|
if visit_link?(link)
\ No newline at end of file