Sha256: 03fecd07da4dd274643119ffb4f55786a0313c9b98661b2e8e2b7ac885126d5b

Contents?: true

Size: 1.51 KB

Versions: 1

Compression:

Stored size: 1.51 KB

Contents

require 'ostruct'
require 'anemone/core'

module Anemone
  # Version number
  VERSION = '0.2.0'

  #module-wide options
  def Anemone.options=(options)
    @options = options
  end
  
  def Anemone.options
    @options
  end
  
  #
  # Convenience method to start a crawl using Core
  #
  def Anemone.crawl(urls, options = {}, &block)
    Anemone.options = OpenStruct.new(options)
	
    # by default, run 4 Tentacle threads to fetch pages
    Anemone.options.threads ||= 4
	
    # disable verbose output by default
    Anemone.options.verbose ||= false
	
    # by default, don't throw away the page response body after scanning it for links
    Anemone.options.discard_page_bodies ||= false

    # by default, identify self as Anemone/VERSION
    Anemone.options.user_agent ||= "Anemone/#{self::VERSION}"   

    # no delay between requests by default
    Anemone.options.delay ||= 0

    # by default, don't obey the robots exclusion protocol
    if Anemone.options.obey_robots_txt ||= false
      begin
        require 'robots'
      rescue LoadError
        warn "To support the robot exclusion protocol, install the robots gem:\n" \
          "sudo gem sources -a http://gems.github.com\n" \
          "sudo gem install fizx-robots"
        exit
      end
    end
    
    # by default, don't limit the depth of the crawl
    Anemone.options.depth_limit ||= :infinity

    #use a single thread if a delay was requested
    if(Anemone.options.delay != 0)
      Anemone.options.threads = 1
    end
    
    Core.crawl(urls, &block)
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
anemone-0.2.0 lib/anemone/anemone.rb