# #-- # Ronin - A Ruby platform designed for information security and data # exploration tasks. # # Copyright (c) 2006-2008 Hal Brodigan (postmodern.mod3 at gmail.com) # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #++ # require 'ronin/network/http' require 'uri/http' require 'mechanize' require 'open-uri' require 'spidr' module Ronin module Web # # Returns the default Ronin Web proxy port. # def Web.default_proxy_port Network::HTTP.default_proxy_port end # # Sets the default Ronin Web proxy port to the specified _port_. # def Web.default_proxy_port=(port) Network::HTTP.default_proxy_port = port end # # Returns the +Hash+ of the Ronin Web proxy information. # def Web.proxy Network::HTTP.proxy end # # Resets the Web proxy settings. # def Web.disable_proxy Network::HTTP.disable_proxy end # # Creates a HTTP URI based from the given _proxy_info_ hash. The # _proxy_info_ hash defaults to Web.proxy, if not given. # def Web.proxy_url(proxy_info=Web.proxy) if Web.proxy[:host] userinfo = nil if (Web.proxy[:user] || Web.proxy[:password]) userinfo = "#{Web.proxy[:user]}:#{Web.proxy[:password]}" end return URI::HTTP.build( :host => Web.proxy[:host], :port => Web.proxy[:port], :userinfo => userinfo, :path => '/' ) end end # # Returns the supported Web User-Agent Aliases. # def Web.user_agent_aliases WWW::Mechanize::AGENT_ALIASES end # # Returns the Ronin Web User-Agent # def Web.user_agent Network::HTTP.user_agent end # # Sets the Ronin Web User-Agent to the specified _new_agent_. # def Web.user_agent=(new_agent) Network::HTTP.user_agent = new_agent end # # Sets the Ronin Web User-Agent to the specified user agent alias # _name_. # def Web.user_agent_alias=(name) Network::HTTP.user_agent = Web.user_agent_aliases[name.to_s] end # # Opens the _url_ with the given _options_. The contents of the _url_ # will be returned. # # _options_ may contain the following keys: # :user_agent_alias:: The User-Agent Alias to use. # :user_agent:: The User-Agent string to use. # :proxy:: A +Hash+ of the proxy information to use. # # Web.open('http://www.hackety.org/') # # Web.open('http://tenderlovemaking.com/', # :user_agent_alias => 'Linux Mozilla') # # Web.open('http://www.wired.com/', :user_agent => 'the future') # def Web.open(url,options={}) headers = {} if options[:user_agent_alias] headers['User-Agent'] = Web.user_agent_aliases[options[:user_agent_alias]] elsif options[:user_agent] headers['User-Agent'] = options[:user_agent] elsif Web.user_agent headers['User-Agent'] = Web.user_agent end proxy = (options[:proxy] || Web.proxy) if proxy[:host] headers[:proxy] = Web.proxy_url(proxy) end return Kernel.open(url,headers) end # # Creates a new Mechanize agent with the given _options_. # # _options_ may contain the following keys: # :user_agent_alias:: The User-Agent Alias to use. # :user_agent:: The User-Agent string to use. # :proxy:: A +Hash+ of the proxy information to use. # # Web.agent # Web.agent(:user_agent_alias => 'Linux Mozilla') # Web.agent(:user_agent => 'wooden pants') # def Web.agent(options={},&block) agent = WWW::Mechanize.new if options[:user_agent_alias] agent.user_agent_alias = options[:user_agent_alias] elsif options[:user_agent] agent.user_agent = options[:user_agent] elsif Web.user_agent agent.user_agent = Web.user_agent end proxy = (options[:proxy] || Web.proxy) if proxy[:host] agent.set_proxy(proxy[:host],proxy[:port],proxy[:user],proxy[:password]) end block.call(agent) if block return agent end # # Gets the specified _url_ with the given _options_. If a _block_ is # given, it will be passed the retrieved page. # # _options_ may contain the following keys: # :user_agent_alias:: The User-Agent Alias to use. # :user_agent:: The User-Agent string to use. # :proxy:: A +Hash+ of the proxy information to use. # # Web.get('http://www.0x000000.com') # => WWW::Mechanize::Page # # Web.get('http://www.rubyinside.com') do |page| # page.search('div.post/h2/a').each do |title| # puts title.inner_text # end # end # def Web.get(url,options={},&block) page = Web.agent(options).get(url) block.call(page) if block return page end # # Gets the specified _url_ with the given _options_, returning the body # of the requested page. If a _block_ is given, it will be passed the # body of the retrieved page. # # _options_ may contain the following keys: # :user_agent_alias:: The User-Agent Alias to use. # :user_agent:: The User-Agent string to use. # :proxy:: A +Hash+ of the proxy information to use. # # Web.get_body('http://www.rubyinside.com') # => String # # Web.get_body('http://www.rubyinside.com') do |body| # puts body # end # def Web.get_body(url,options={},&block) body = Web.get(url,options).body block.call(body) if block return body end # # Posts the specified _url_ with the given _options_. If a _block_ is # given, it will be passed the posted page. # # _options_ may contain the following keys: # :query:: The query parameters to post to the specified _url_. # :user_agent_alias:: The User-Agent Alias to use. # :user_agent:: The User-Agent string to use. # :proxy:: A +Hash+ of the proxy information to use. # # Web.post('http://www.rubyinside.com') # => WWW::Mechanize::Page # def Web.post(url,options={},&block) query = (options[:query] || {}) page = Web.agent(options).post(url,query) block.call(page) if block return page end # # Poststhe specified _url_ with the given _options_, returning the body # of the posted page. If a _block_ is given, it will be passed the # body of the posted page. # # _options_ may contain the following keys: # :user_agent_alias:: The User-Agent Alias to use. # :user_agent:: The User-Agent string to use. # :proxy:: A +Hash+ of the proxy information to use. # # Web.post_body('http://www.rubyinside.com') # => String # # Web.post_body('http://www.rubyinside.com') do |body| # puts body # end # def Web.post_body(url,options={},&block) body = Web.post(url,options).body block.call(body) if block return body end # # Creates a new Spidr::Agent object with the given _options_ and # _block_. If a _block_ is given, it will be passed the newly created # Spidr::Agent object. # # _options_ may contain the following keys: # :proxy:: The proxy to use while spidering. Defaults to # Web.proxy. # :user_agent:: The User-Agent string to send. Defaults to # Web.user_agent. # :referer:: The referer URL to send. # :delay:: Duration in seconds to pause between spidering each # link. Defaults to 0. # :host:: The host-name to visit. # :hosts:: An +Array+ of host patterns to visit. # :ignore_hosts:: An +Array+ of host patterns to not visit. # :ports:: An +Array+ of port patterns to visit. # :ignore_ports:: An +Array+ of port patterns to not visit. # :links:: An +Array+ of link patterns to visit. # :ignore_links:: An +Array+ of link patterns to not visit. # :exts:: An +Array+ of File extension patterns to visit. # :ignore_exts:: An +Array+ of File extension patterns to not # visit. # def Web.spider_agent(options={},&block) options = Web.spider_default_options.merge(options) return Spidr::Agent.new(options,&block) end # # Creates a new Spidr::Agent object with the given _options_ and will # begin spidering the specified host _name_. If a _block_ is given it # will be passed the newly created Spidr::Agent object, before the # agent begins spidering. # def Web.spider_host(name,options={},&block) options = Web.spider_default_options.merge(options) return Spidr::Agent.host(name,options,&block) end # # Creates a new Spidr::Agent object with the given _options_ and will # begin spidering the host of the specified _url_. If a _block_ is # given it will be passed the newly created Spidr::Agent object, before # the agent begins spidering. # def Web.spider_site(url,options={},&block) options = Web.spider_default_options.merge(options) return Spidr::Agent.site(url,options,&block) end protected # # Returns the default options for Spidr::Agent. # def Web.spider_default_options {:proxy => Web.proxy, :user_agent => Web.user_agent} end end end