lib/arachni/module/trainer.rb in arachni-0.4.0.4 vs lib/arachni/module/trainer.rb in arachni-0.4.1

- old
+ new

@@ -1,224 +1,153 @@ =begin - Arachni - Copyright (c) 2010-2012 Tasos "Zapotek" Laskos <tasos.laskos@gmail.com> + Copyright 2010-2012 Tasos Laskos <tasos.laskos@gmail.com> - This is free software; you can copy and distribute and modify - this program under the term of the GPL v2.0 License - (See LICENSE file for details) + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. =end -require Arachni::Options.instance.dir['lib'] + 'module/element_db' -require Arachni::Options.instance.dir['lib'] + 'module/output' - module Arachni + +require Options.dir['lib'] + 'module/element_db' +require Options.dir['lib'] + 'module/output' + module Module # # Trainer class # -# Analyzes all HTTP responses looking for new auditable elements. +# Analyzes key HTTP responses looking for new auditable elements. # -# <tasos.laskos@gmail.com> -# <zapotek@segfault.gr> -# @version: 0.2.1 +# @author Tasos Laskos <tasos.laskos@gmail.com> # class Trainer - include Output include ElementDB include Utilities - attr_writer :page - attr_accessor :http - attr_accessor :parser + attr_reader :page - def initialize - @opts = Options.instance - @updated = false + # @param [Arachni::Options] opts + def initialize( opts ) + @opts = opts + @updated = false - @pages = [] + @pages = [] end - def set_page( page ) + # + # Sets the current working page and inits the element DB. + # + # @param [Arachni::Page] page + # + def page=( page ) + init_db_from_page( page ) @page = page.deep_clone end + alias :init :page= # - # Passes the reponse to {#analyze} for analysis + # Flushes the page buffer # - # @param [Typhoeus::Response] res - # @param [Bool] redir was the response forcing a redirection? + # @return [Array<Arachni::Page>] # - def add_response( res, redir = false ) - - # non text files won't contain any auditable elements - type = @http.class.content_type( res.headers_hash ) - if type.is_a?( String) && !type.substring?( 'text' ) - return false - end - - @parser = Parser.new( Options.instance, res ) - @parser.url = @page.url - - begin - url = @parser.to_absolute( res.effective_url ) - - return if !follow?( url ) - - analyze( [ res, redir ] ) - - rescue Exception => e - print_error( "Invalid URL, probably broken redirection. Ignoring..." ) - print_error( "URL: #{res.effective_url}" ) - print_error_backtrace( e ) - raise e - end - + def flush + pages = @pages.dup + @pages.clear + pages end - def follow?( url ) - !@parser.skip?( url ) - end - # - # Returns an updated {Arachni::Parser::Page} object or nil if there waere no updates + # Passes the response on for analysis. # - # @return [Page] + # If the response contains new elements it creates a new page + # with those elements and pushes it a buffer. # - def page - if( @updated ) - @updated = false - return @page - else - return nil + # These new pages can then be retrieved by flushing the buffer (#flush). + # + # @param [Typhoeus::Response] res + # + def push( res ) + if !@page + print_debug 'No seed page assigned yet.' + return end + + @parser = Parser.new( res ) + return false if !@parser.text? || @parser.skip?( @parser.url ) + + analyze( res ) + true + rescue => e + print_error( e.to_s ) + print_error_backtrace( e ) end + alias :<< :push + private # # Analyzes a response looking for new links, forms and cookies. # - # @param [Typhoeus::Response, Bool] res + # @param [Typhoeus::Response] res # def analyze( res ) + print_debug "Started for response with request ID: ##{res.request.id}" - print_debug( 'Started for response with request ID: #' + - res[0].request.id.to_s ) + page_data = @page.to_hash + page_data[:cookies] = find_new( :cookies ) - @parser.url = @parser.to_absolute( url_sanitize( res[0].effective_url ) ) - - train_cookies( res[0] ) - # if the response body is the same as the page body and # no new cookies have appeared there's no reason to analyze the page - if( res[0].body == @page.html && !@updated ) - print_debug( 'Page hasn\'t changed, skipping...' ) + if res.body == @page.body && !@updated && @page.url == @parser.url + print_debug 'Page hasn\'t changed.' return end - train_forms( res[0] ) - train_links( res[0], res[1] ) + [ :forms, :links ].each { |type| page_data[type] = find_new( type ) } - if( @updated ) + if @updated + page_data[:url] = @parser.url + page_data[:query_vars] = @parser.link_vars( @parser.url ) + page_data[:code] = res.code + page_data[:method] = res.request.method.to_s.upcase + page_data[:body] = res.body + page_data[:doc] = @parser.doc + page_data[:response_headers] = res.headers_hash - begin - url = res[0].request.url - # prepare the page url - @parser.url = @parser.to_absolute( url ) - rescue Exception => e - print_error( "Invalid URL, probably broken redirection. Ignoring..." ) + @pages << Page.new( page_data ) - begin - print_error( "URL: #{res[0].request.url}" ) - rescue - end - - print_error_backtrace( e ) - return - end - - @page.html = res[0].body.dup - @page.response_headers = res[0].headers_hash - @page.query_vars = @parser.link_vars( @parser.url ).dup - @page.url = @parser.url.dup - @page.code = res[0].code - @page.method = res[0].request.method.to_s.upcase - - @page.forms ||= [] - @page.links ||= [] - @page.cookies ||= [] - - @pages << @page - @updated = false end - print_debug( 'Training complete.' ) + print_debug 'Training complete.' end - def flush_pages - pages = @pages.dup - @pages = [] - pages - end + def find_new( element_type ) + elements, count = send( "update_#{element_type}".to_sym, @parser.send( element_type ) ) + return [] if count == 0 - private + @updated = true + print_info "Found #{count} new #{element_type}." - def train_forms( res ) - return [], 0 if !@opts.audit_forms - - cforms, form_cnt = update_forms( @parser.forms ) - - if ( form_cnt > 0 ) - @page.forms = cforms.flatten.map{ |elem| elem.override_instance_scope!; elem } - @updated = true - - print_info( 'Found ' + form_cnt.to_s + ' new forms.' ) - end - + prepare_new_elements( elements ) end - def train_links( res, redir = false ) - return [], 0 if !@opts.audit_links - - links = @parser.links.deep_clone - if( redir ) - - url = @parser.to_absolute( url_sanitize( res.effective_url ) ) - links << Arachni::Parser::Element::Link.new( url, { - 'href' => url, - 'vars' => @parser.link_vars( url ) - } ) - end - - clinks, link_cnt = update_links( links ) - - if ( link_cnt > 0 ) - @page.links = clinks.flatten.map{ |elem| elem.override_instance_scope!; elem } - @updated = true - - print_info( 'Found ' + link_cnt.to_s + ' new links.' ) - end - + def prepare_new_elements( elements ) + elements.flatten.map { |elem| elem.override_instance_scope; elem } end - def train_cookies( res ) - - ccookies, cookie_cnt = update_cookies( @parser.cookies ) - - if ( cookie_cnt > 0 ) - @page.cookies = ccookies.flatten.map{ |elem| elem.override_instance_scope!; elem } - @updated = true - - print_info( 'Found ' + cookie_cnt.to_s + ' new cookies.' ) - end - - end - def self.info - { :name => 'Trainer' } + { name: 'Trainer' } end end end end