#
#--
# Ronin Web - A Ruby library for Ronin that provides support for web
# scraping and spidering functionality.
#
# Copyright (c) 2006-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
#
require 'ronin/web/web'
require 'spidr/agent'
module Ronin
module Web
class Spider < Spidr::Agent
#
# Creates a new Spider object with the given _options_ and
# _block_. If a _block_ is given, it will be passed the newly created
# Spider object.
#
# _options_ may contain the following keys:
# :proxy:: The proxy to use while spidering. Defaults to
# Web.proxy.
# :user_agent:: The User-Agent string to send. Defaults to
# Web.user_agent.
# :referer:: The referer URL to send.
# :delay:: Duration in seconds to pause between spidering each
# link. Defaults to 0.
# :host:: The host-name to visit.
# :hosts:: An +Array+ of host patterns to visit.
# :ignore_hosts:: An +Array+ of host patterns to not visit.
# :ports:: An +Array+ of port patterns to visit.
# :ignore_ports:: An +Array+ of port patterns to not visit.
# :links:: An +Array+ of link patterns to visit.
# :ignore_links:: An +Array+ of link patterns to not visit.
# :exts:: An +Array+ of File extension patterns to visit.
# :ignore_exts:: An +Array+ of File extension patterns to not
# visit.
#
def self.agent(options={},&block)
self.new(self.default_options.merge(options),&block)
end
#
# Creates a new Spider object with the given _options_ and will begin
# spidering the specified host _name_. If a _block_ is given it
# will be passed the newly created Spider object, before the agent
# begins spidering.
#
def self.host(name,options={},&block)
super(name,self.default_options.merge(options),&block)
end
#
# Creates a new Spider object with the given _options_ and will begin
# spidering the host of the specified _url_. If a _block_ is
# given it will be passed the newly created Spider object, before
# the agent begins spidering.
#
def self.site(url,options={},&block)
super(url,self.default_options.merge(options),&block)
end
protected
#
# Returns the default options for Spider.
#
def self.default_options
{:proxy => Web.proxy, :user_agent => Web.user_agent}
end
end
end
end