lib/arachni/module/trainer.rb in arachni-0.4.0.4 vs lib/arachni/module/trainer.rb in arachni-0.4.1
- old
+ new
@@ -1,224 +1,153 @@
=begin
- Arachni
- Copyright (c) 2010-2012 Tasos "Zapotek" Laskos <tasos.laskos@gmail.com>
+ Copyright 2010-2012 Tasos Laskos <tasos.laskos@gmail.com>
- This is free software; you can copy and distribute and modify
- this program under the term of the GPL v2.0 License
- (See LICENSE file for details)
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
=end
-require Arachni::Options.instance.dir['lib'] + 'module/element_db'
-require Arachni::Options.instance.dir['lib'] + 'module/output'
-
module Arachni
+
+require Options.dir['lib'] + 'module/element_db'
+require Options.dir['lib'] + 'module/output'
+
module Module
#
# Trainer class
#
-# Analyzes all HTTP responses looking for new auditable elements.
+# Analyzes key HTTP responses looking for new auditable elements.
#
-# <tasos.laskos@gmail.com>
-# <zapotek@segfault.gr>
-# @version: 0.2.1
+# @author Tasos Laskos <tasos.laskos@gmail.com>
#
class Trainer
-
include Output
include ElementDB
include Utilities
- attr_writer :page
- attr_accessor :http
- attr_accessor :parser
+ attr_reader :page
- def initialize
- @opts = Options.instance
- @updated = false
+ # @param [Arachni::Options] opts
+ def initialize( opts )
+ @opts = opts
+ @updated = false
- @pages = []
+ @pages = []
end
- def set_page( page )
+ #
+ # Sets the current working page and inits the element DB.
+ #
+ # @param [Arachni::Page] page
+ #
+ def page=( page )
+ init_db_from_page( page )
@page = page.deep_clone
end
+ alias :init :page=
#
- # Passes the reponse to {#analyze} for analysis
+ # Flushes the page buffer
#
- # @param [Typhoeus::Response] res
- # @param [Bool] redir was the response forcing a redirection?
+ # @return [Array<Arachni::Page>]
#
- def add_response( res, redir = false )
-
- # non text files won't contain any auditable elements
- type = @http.class.content_type( res.headers_hash )
- if type.is_a?( String) && !type.substring?( 'text' )
- return false
- end
-
- @parser = Parser.new( Options.instance, res )
- @parser.url = @page.url
-
- begin
- url = @parser.to_absolute( res.effective_url )
-
- return if !follow?( url )
-
- analyze( [ res, redir ] )
-
- rescue Exception => e
- print_error( "Invalid URL, probably broken redirection. Ignoring..." )
- print_error( "URL: #{res.effective_url}" )
- print_error_backtrace( e )
- raise e
- end
-
+ def flush
+ pages = @pages.dup
+ @pages.clear
+ pages
end
- def follow?( url )
- !@parser.skip?( url )
- end
-
#
- # Returns an updated {Arachni::Parser::Page} object or nil if there waere no updates
+ # Passes the response on for analysis.
#
- # @return [Page]
+ # If the response contains new elements it creates a new page
+ # with those elements and pushes it a buffer.
#
- def page
- if( @updated )
- @updated = false
- return @page
- else
- return nil
+ # These new pages can then be retrieved by flushing the buffer (#flush).
+ #
+ # @param [Typhoeus::Response] res
+ #
+ def push( res )
+ if !@page
+ print_debug 'No seed page assigned yet.'
+ return
end
+
+ @parser = Parser.new( res )
+ return false if !@parser.text? || @parser.skip?( @parser.url )
+
+ analyze( res )
+ true
+ rescue => e
+ print_error( e.to_s )
+ print_error_backtrace( e )
end
+ alias :<< :push
+ private
#
# Analyzes a response looking for new links, forms and cookies.
#
- # @param [Typhoeus::Response, Bool] res
+ # @param [Typhoeus::Response] res
#
def analyze( res )
+ print_debug "Started for response with request ID: ##{res.request.id}"
- print_debug( 'Started for response with request ID: #' +
- res[0].request.id.to_s )
+ page_data = @page.to_hash
+ page_data[:cookies] = find_new( :cookies )
- @parser.url = @parser.to_absolute( url_sanitize( res[0].effective_url ) )
-
- train_cookies( res[0] )
-
# if the response body is the same as the page body and
# no new cookies have appeared there's no reason to analyze the page
- if( res[0].body == @page.html && !@updated )
- print_debug( 'Page hasn\'t changed, skipping...' )
+ if res.body == @page.body && !@updated && @page.url == @parser.url
+ print_debug 'Page hasn\'t changed.'
return
end
- train_forms( res[0] )
- train_links( res[0], res[1] )
+ [ :forms, :links ].each { |type| page_data[type] = find_new( type ) }
- if( @updated )
+ if @updated
+ page_data[:url] = @parser.url
+ page_data[:query_vars] = @parser.link_vars( @parser.url )
+ page_data[:code] = res.code
+ page_data[:method] = res.request.method.to_s.upcase
+ page_data[:body] = res.body
+ page_data[:doc] = @parser.doc
+ page_data[:response_headers] = res.headers_hash
- begin
- url = res[0].request.url
- # prepare the page url
- @parser.url = @parser.to_absolute( url )
- rescue Exception => e
- print_error( "Invalid URL, probably broken redirection. Ignoring..." )
+ @pages << Page.new( page_data )
- begin
- print_error( "URL: #{res[0].request.url}" )
- rescue
- end
-
- print_error_backtrace( e )
- return
- end
-
- @page.html = res[0].body.dup
- @page.response_headers = res[0].headers_hash
- @page.query_vars = @parser.link_vars( @parser.url ).dup
- @page.url = @parser.url.dup
- @page.code = res[0].code
- @page.method = res[0].request.method.to_s.upcase
-
- @page.forms ||= []
- @page.links ||= []
- @page.cookies ||= []
-
- @pages << @page
-
@updated = false
end
- print_debug( 'Training complete.' )
+ print_debug 'Training complete.'
end
- def flush_pages
- pages = @pages.dup
- @pages = []
- pages
- end
+ def find_new( element_type )
+ elements, count = send( "update_#{element_type}".to_sym, @parser.send( element_type ) )
+ return [] if count == 0
- private
+ @updated = true
+ print_info "Found #{count} new #{element_type}."
- def train_forms( res )
- return [], 0 if !@opts.audit_forms
-
- cforms, form_cnt = update_forms( @parser.forms )
-
- if ( form_cnt > 0 )
- @page.forms = cforms.flatten.map{ |elem| elem.override_instance_scope!; elem }
- @updated = true
-
- print_info( 'Found ' + form_cnt.to_s + ' new forms.' )
- end
-
+ prepare_new_elements( elements )
end
- def train_links( res, redir = false )
- return [], 0 if !@opts.audit_links
-
- links = @parser.links.deep_clone
- if( redir )
-
- url = @parser.to_absolute( url_sanitize( res.effective_url ) )
- links << Arachni::Parser::Element::Link.new( url, {
- 'href' => url,
- 'vars' => @parser.link_vars( url )
- } )
- end
-
- clinks, link_cnt = update_links( links )
-
- if ( link_cnt > 0 )
- @page.links = clinks.flatten.map{ |elem| elem.override_instance_scope!; elem }
- @updated = true
-
- print_info( 'Found ' + link_cnt.to_s + ' new links.' )
- end
-
+ def prepare_new_elements( elements )
+ elements.flatten.map { |elem| elem.override_instance_scope; elem }
end
- def train_cookies( res )
-
- ccookies, cookie_cnt = update_cookies( @parser.cookies )
-
- if ( cookie_cnt > 0 )
- @page.cookies = ccookies.flatten.map{ |elem| elem.override_instance_scope!; elem }
- @updated = true
-
- print_info( 'Found ' + cookie_cnt.to_s + ' new cookies.' )
- end
-
- end
-
def self.info
- { :name => 'Trainer' }
+ { name: 'Trainer' }
end
end
end
end