trainer.rb in arachni-0.4.1

- old
+ new
@@ -1,224 +1,153 @@
 =begin
-                  Arachni
-  Copyright (c) 2010-2012 Tasos "Zapotek" Laskos <tasos.laskos@gmail.com>
+    Copyright 2010-2012 Tasos Laskos <tasos.laskos@gmail.com>
 
-  This is free software; you can copy and distribute and modify
-  this program under the term of the GPL v2.0 License
-  (See LICENSE file for details)
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
 
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
 =end
 
-require Arachni::Options.instance.dir['lib'] + 'module/element_db'
-require Arachni::Options.instance.dir['lib'] + 'module/output'
-
 module Arachni
+
+require Options.dir['lib'] + 'module/element_db'
+require Options.dir['lib'] + 'module/output'
+
 module Module
 
 #
 # Trainer class
 #
-# Analyzes all HTTP responses looking for new auditable elements.
+# Analyzes key HTTP responses looking for new auditable elements.
 #
-#                                      <tasos.laskos@gmail.com>
-#                                      <zapotek@segfault.gr>
-# @version: 0.2.1
+# @author Tasos Laskos <tasos.laskos@gmail.com>
 #
 class Trainer
-
     include Output
     include ElementDB
     include Utilities
 
-    attr_writer   :page
-    attr_accessor :http
-    attr_accessor :parser
+    attr_reader :page
 
-    def initialize
-      @opts     = Options.instance
-      @updated  = false
+    # @param    [Arachni::Options]  opts
+    def initialize( opts )
+        @opts    = opts
+        @updated = false
 
-      @pages = []
+        @pages = []
     end
 
-    def set_page( page )
+    #
+    # Sets the current working page and inits the element DB.
+    #
+    # @param    [Arachni::Page]    page
+    #
+    def page=( page )
+        init_db_from_page( page )
         @page = page.deep_clone
     end
+    alias :init :page=
 
     #
-    # Passes the reponse to {#analyze} for analysis
+    # Flushes the page buffer
     #
-    # @param  [Typhoeus::Response]  res
-    # @param  [Bool]  redir  was the response forcing a redirection?
+    # @return   [Array<Arachni::Page>]
     #
-    def add_response( res, redir = false )
-
-        # non text files won't contain any auditable elements
-        type = @http.class.content_type( res.headers_hash )
-        if type.is_a?( String) && !type.substring?( 'text' )
-            return false
-        end
-
-        @parser = Parser.new( Options.instance, res )
-        @parser.url = @page.url
-
-        begin
-            url = @parser.to_absolute( res.effective_url )
-
-            return if !follow?( url )
-
-            analyze( [ res, redir ] )
-
-        rescue Exception => e
-            print_error( "Invalid URL, probably broken redirection. Ignoring..." )
-            print_error( "URL: #{res.effective_url}" )
-            print_error_backtrace( e )
-            raise e
-        end
-
+    def flush
+        pages = @pages.dup
+        @pages.clear
+        pages
     end
 
-    def follow?( url )
-        !@parser.skip?( url )
-    end
-
     #
-    # Returns an updated {Arachni::Parser::Page} object or nil if there waere no updates
+    # Passes the response on for analysis.
     #
-    # @return  [Page]
+    # If the response contains new elements it creates a new page
+    # with those elements and pushes it a buffer.
     #
-    def page
-        if( @updated  )
-              @updated = false
-              return  @page
-          else
-              return nil
+    # These new pages can then be retrieved by flushing the buffer (#flush).
+    #
+    # @param  [Typhoeus::Response]  res
+    #
+    def push( res )
+        if !@page
+            print_debug 'No seed page assigned yet.'
+            return
         end
+
+        @parser = Parser.new( res )
+        return false if !@parser.text? || @parser.skip?( @parser.url )
+
+        analyze( res )
+        true
+    rescue => e
+        print_error( e.to_s )
+        print_error_backtrace( e )
     end
+    alias :<< :push
 
+    private
 
     #
     # Analyzes a response looking for new links, forms and cookies.
     #
-    # @param   [Typhoeus::Response, Bool]  res
+    # @param   [Typhoeus::Response]  res
     #
     def analyze( res )
+        print_debug "Started for response with request ID: ##{res.request.id}"
 
-        print_debug( 'Started for response with request ID: #' +
-          res[0].request.id.to_s )
+        page_data           = @page.to_hash
+        page_data[:cookies] = find_new( :cookies )
 
-        @parser.url = @parser.to_absolute( url_sanitize( res[0].effective_url ) )
-
-        train_cookies( res[0] )
-
         # if the response body is the same as the page body and
         # no new cookies have appeared there's no reason to analyze the page
-        if( res[0].body == @page.html && !@updated )
-            print_debug( 'Page hasn\'t changed, skipping...' )
+        if res.body == @page.body && !@updated && @page.url == @parser.url
+            print_debug 'Page hasn\'t changed.'
             return
         end
 
-        train_forms( res[0] )
-        train_links( res[0], res[1] )
+        [ :forms, :links ].each { |type| page_data[type] = find_new( type ) }
 
-        if( @updated )
+        if @updated
+            page_data[:url]              = @parser.url
+            page_data[:query_vars]       = @parser.link_vars( @parser.url )
+            page_data[:code]             = res.code
+            page_data[:method]           = res.request.method.to_s.upcase
+            page_data[:body]             = res.body
+            page_data[:doc]              = @parser.doc
+            page_data[:response_headers] = res.headers_hash
 
-            begin
-                url         = res[0].request.url
-                # prepare the page url
-                @parser.url = @parser.to_absolute( url )
-            rescue Exception => e
-                print_error( "Invalid URL, probably broken redirection. Ignoring..." )
+            @pages << Page.new( page_data )
 
-                begin
-                    print_error( "URL: #{res[0].request.url}" )
-                rescue
-                end
-
-                print_error_backtrace( e )
-                return
-            end
-
-            @page.html = res[0].body.dup
-            @page.response_headers    = res[0].headers_hash
-            @page.query_vars = @parser.link_vars( @parser.url ).dup
-            @page.url        = @parser.url.dup
-            @page.code       = res[0].code
-            @page.method     = res[0].request.method.to_s.upcase
-
-            @page.forms      ||= []
-            @page.links      ||= []
-            @page.cookies    ||= []
-
-            @pages << @page
-
             @updated = false
         end
 
-        print_debug( 'Training complete.' )
+        print_debug 'Training complete.'
     end
 
-    def flush_pages
-        pages = @pages.dup
-        @pages = []
-        pages
-    end
+    def find_new( element_type )
+        elements, count = send( "update_#{element_type}".to_sym, @parser.send( element_type ) )
+        return [] if count == 0
 
-    private
+        @updated = true
+        print_info "Found #{count} new #{element_type}."
 
-    def train_forms( res )
-        return [], 0 if !@opts.audit_forms
-
-        cforms, form_cnt = update_forms( @parser.forms )
-
-        if ( form_cnt > 0 )
-            @page.forms = cforms.flatten.map{ |elem| elem.override_instance_scope!; elem }
-            @updated = true
-
-            print_info( 'Found ' + form_cnt.to_s + ' new forms.' )
-        end
-
+        prepare_new_elements( elements )
     end
 
-    def train_links( res, redir = false )
-        return [], 0  if !@opts.audit_links
-
-        links = @parser.links.deep_clone
-        if( redir )
-
-            url = @parser.to_absolute( url_sanitize( res.effective_url ) )
-            links << Arachni::Parser::Element::Link.new( url, {
-                'href' => url,
-                'vars' => @parser.link_vars( url )
-            } )
-        end
-
-        clinks, link_cnt = update_links( links )
-
-        if ( link_cnt > 0 )
-            @page.links = clinks.flatten.map{ |elem| elem.override_instance_scope!; elem }
-            @updated = true
-
-            print_info( 'Found ' + link_cnt.to_s + ' new links.' )
-        end
-
+    def prepare_new_elements( elements )
+        elements.flatten.map { |elem| elem.override_instance_scope; elem }
     end
 
-    def train_cookies( res )
-
-        ccookies, cookie_cnt = update_cookies( @parser.cookies )
-
-        if ( cookie_cnt > 0 )
-            @page.cookies = ccookies.flatten.map{ |elem| elem.override_instance_scope!; elem }
-            @updated = true
-
-            print_info( 'Found ' + cookie_cnt.to_s + ' new cookies.' )
-        end
-
-    end
-
     def self.info
-      { :name  => 'Trainer' }
+        { name: 'Trainer' }
     end
 
 end
 end
 end