=begin
    Copyright 2010-2013 Tasos Laskos <tasos.laskos@gmail.com>

    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.
    You may obtain a copy of the License at

        http://www.apache.org/licenses/LICENSE-2.0

    Unless required by applicable law or agreed to in writing, software
    distributed under the License is distributed on an "AS IS" BASIS,
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and
    limitations under the License.
=end

require 'erb'
require 'ostruct'

#
# Passive proxy.
#
# Will gather data based on user actions and exchanged HTTP traffic and push that
# data to {Framework#push_to_page_queue} to be audited.
#
# @author Tasos "Zapotek" Laskos <tasos.laskos@gmail.com>
#
# @version 0.2.1
#
class Arachni::Plugins::Proxy < Arachni::Plugin::Base

    BASEDIR  = "#{File.dirname( __FILE__ )}/proxy/"
    BASE_URL = 'http://arachni.proxy/'

    class TemplateScope
        include Arachni::Utilities

        PANEL_BASEDIR  = "#{Arachni::Plugins::Proxy::BASEDIR}panel/"
        PANEL_TEMPLATE = "#{PANEL_BASEDIR}panel.html.erb"
        PANEL_URL      = "#{Arachni::Plugins::Proxy::BASE_URL}panel/"

        def initialize( params = {} )
            update( params )
        end

        def self.new( *args )
            @self ||= super( *args )
        end

        def self.get
            new
        end

        def root_url
            PANEL_URL
        end

        def js_url
            "#{root_url}js/"
        end

        def css_url
            "#{root_url}css/"
        end

        def img_url
            "#{root_url}img/"
        end

        def inspect_url
            "#{root_url}inspect"
        end

        def shutdown_url
            url_for :shutdown
        end

        def url_for( *args )
            Arachni::Plugins::Proxy.url_for( *args )
        end

        def update( params )
            params.each { |name, value| set( name, value ) }
            self
        end

        def set( name, value )
            self.class.send( :attr_accessor, name )
            instance_variable_set( "@#{name.to_s}", value )
            self
        end

        def content_for?( ivar )
            !!instance_variable_get( "@#{ivar.to_s}" )
        end

        def content_for( name, value = :nil )
            if value == :nil
                instance_variable_get( "@#{name.to_s}" )
            else
                set( name, html_encode( value.to_s ) )
                nil
            end
        end

        def erb( tpl, params = {} )
            params = params.dup
            params[:params] ||= {}

            with_layout = true
            with_layout = !!params.delete( :layout ) if params.include?( :layout )

            update( params )

            tpl = tpl.to_s + '.html.erb' if tpl.is_a?( Symbol )

            path = File.exist?( tpl ) ? tpl : PANEL_BASEDIR + tpl

            evaled = ERB.new( IO.read( path ) ).result( get_binding )
            with_layout ? layout { evaled } : evaled
        end

        def render( tpl, opts )
            erb tpl, opts.merge( layout: false )
        end

        def layout
            ERB.new( IO.read( PANEL_BASEDIR + 'layout.html.erb' ) ).result( binding )
        end

        def panel
            erb :panel
        end

        def get_binding
            binding
        end

        def clear
            instance_variables.each { |v| instance_variable_set( v, nil ) }
        end
    end


    MSG_SHUTDOWN = 'Shutting down the Arachni proxy plug-in...'

    MSG_DISALLOWED = 'You can\'t access this resource via the Arachni ' +
                    'proxy plug-in for the following reasons:'

    MSG_NOT_IN_DOMAIN = 'This resource is on a domain or subdomain' +
        ' outside the scope of the audit.'

    MSG_EXCLUDED = 'This resource is matched by an exclude rule.'

    MSG_NOT_INCLUDED = 'This resource is disallowed based on an include rule.'

    SESSION_TOKEN_COOKIE = 'arachni.proxy.session_token'

    def prepare
        # don't let the framework run just yet
        framework.pause
        print_info 'System paused.'

        require "#{File.dirname( __FILE__ )}/proxy/server"

        @server = Server.new(
             BindAddress:         options['bind_address'],
             Port:                options['port'],
             ProxyVia:            false,
             ProxyContentHandler: method( :response_handler ),
             ProxyRequestHandler: method( :request_handler ),
             AccessLog:           [],
             Logger:              WEBrick::Log::new( '/dev/null', 7 ),
             Timeout:             options['timeout']
        )

        @pages = Set.new
        @login_sequence = []
    end

    def run
        print_status "Listening on: http://#{@server[:BindAddress]}:#{@server[:Port]}"

        print_status "Shutdown URL: #{url_for( :shutdown )}"
        print_info 'The scan will resume once you visit the shutdown URL.'

        print_info
        print_info '*' * 82
        print_info '* You need to clear your browser\'s cookies for this site before using the proxy! *'
        print_info '*' * 82
        print_info

        def @server.service( req, res )
            if req.request_method.downcase == 'connect'
                super( req, res )
                return
            end

            super( req, res ) if @config[:ProxyRequestHandler].call( req, res )
        end

        TemplateScope.get.set :params, {}
        @server.start
    end

    def clean_up
        @pages.each { |p| framework.push_to_page_queue( p ) }
        framework.resume
    end

    def prepare_pages_for_inspection
        @pages.select { |p| (p.forms.any? || p.links.any? || p.cookies.any?) && p.text? }
    end

    def request_handler( req, res )
        # Clear the template scope to prepare it for this request.
        TemplateScope.get.clear

        TemplateScope.get.set :page_count, prepare_pages_for_inspection.size
        TemplateScope.get.set :recording, recording?

        #
        # Bare with me 'cause this is gonna get weird.
        #
        # We need the session cookie to be set for both the domain of the scan
        # target (so that we'll be able to authorize every request) *and*
        # for the domain used for controlling the proxy via the panel
        # (so that we can check those requests too prevent another user
        # from shutting down the proxy).
        #
        p = URI( framework.opts.url )

        # This is the URL we'll use to sign in and set the cookie for the
        # domain of the scan target.
        sign_in_url = "#{p.scheme}://#{p.host}/arachni.proxy.sign_in"

        TemplateScope.get.set :sign_in_url, sign_in_url

        url    = req.request_uri.to_s
        params = parse_request_body( req.body.to_s ).merge( parse_query( url ) ) || {}

        print_status "Requesting #{url}"

        reasons = []
        if !system_url?( url )
            reasons << MSG_NOT_IN_DOMAIN if !path_in_domain?( url )
            reasons << MSG_EXCLUDED      if exclude_path?( url )
            reasons << MSG_NOT_INCLUDED  if !include_path?( url )
        end

        if reasons.any?
            print_info MSG_DISALLOWED
            reasons.each { |reason| print_info "  * #{reason}" }

            # Forbidden.
            res.status = 403
            set_response_body( res, erb( '403_forbidden'.to_sym, { reasons: reasons } ) )
            return false
        end

        # This is a sign-in request.
        if params['session_token'] == options['session_token']
            # Set us up for the redirection that's coming.
            res.status = 302

            # Set the session cookie.
            res.header['Set-Cookie'] = "#{SESSION_TOKEN_COOKIE}=#{options['session_token']}; path=/"

            # This is the cookie-set request for the domain of the scan target domain...
            if url == sign_in_url && req.request_method == 'POST'
                # ...now we need to set the cookie for the proxy control domain
                # so redirect us to its handler.
                res.header['Location'] = "#{url_for( :sign_in )}?session_token=#{params['session_token']}"

            # This is the cookie-set request for the domain of the proxy control domain...
            elsif url.start_with?( url_for( :sign_in ) )
                # ...time to send the user to the webapp.
                res.header['Location'] = framework.opts.url
            end

            return
        elsif requires_token?( url ) && !valid_session_token?( req )
            print_info MSG_DISALLOWED
            print_info '  * Request does not have a valid session token'

            # Unauthorized.
            res.status = 401
            set_response_body( res, erb( 'sign_in'.to_sym ) )

            return
        end

        if shutdown?( url )
            print_status 'Shutting down...'
            set_response_body( res, erb( :shutdown_message ) )
            @server.shutdown
            return
        end

        @login_sequence << req if recording?

        # Avoid propagating the proxy's session cookie to the webapp.
        req.cookies.reject! { |c| c.name == SESSION_TOKEN_COOKIE }

        if url.start_with? url_for( :panel )
            body =  case '/' + res.request_uri.path.split( '/' )[2..-1].join( '/' )
                        when '/'
                            erb :panel

                        when '/inspect'
                            erb :inspect,
                                pages: prepare_pages_for_inspection

                        when '/help'
                            erb :help

                        when '/record/start'
                            record_start
                            erb :panel

                        when '/record/stop'
                            record_stop
                            erb :verify_login_check, verify_fail: false, params: {
                                'url'     => session.opts.login_check_url,
                                'pattern' => session.opts.login_check_pattern
                            }

                        when '/verify/login_check'

                            if req.request_method != 'POST'
                                erb :verify_login_check, verify_fail: false
                            else
                                session.set_login_check( params['url'], params['pattern'] )

                                if !session.logged_in?
                                    erb :verify_login_check, verify_fail: true
                                else
                                    erb :verify_login_sequence,
                                        params: params,
                                        form:   find_login_form
                                end

                            end

                        when '/verify/login_sequence'

                            session.login_form = find_login_form

                            logged_in = false
                            framework.http.sandbox do |http|
                                http.cookie_jar.clear
                                session.login
                                logged_in = session.logged_in?
                            end

                            erb :verify_login_final, ok: logged_in

                        else
                            begin
                                IO.read TemplateScope::PANEL_BASEDIR + '/../' + res.request_uri.path
                            rescue Errno::ENOENT
                                # forbidden
                                res.status = 404
                                erb '404_not_found'.to_sym
                            end
                        end.to_s
            set_response_body( res, body )
            return
        end

        true
    end

    def requires_token?( url )
        !(asset?( url ) || url.start_with?( url_for( :sign_in ) ))
    end

    def valid_session_token?( req )
        session_token = options['session_token']
        return true if options['session_token'].to_s.empty?

        cookies_to_hash( req.cookies )[SESSION_TOKEN_COOKIE] == session_token
    end

    def recording?
        @record ||= false
    end

    def record_start
        @login_sequence = []
        @record = true
    end
    def record_stop
        @record = false
    end

    #
    # Tries to determine which form is the login one from the logged requests in
    # the recorded login sequence.
    #
    # @return   [Array<Arachni::Element::Form>]
    #
    def find_login_form
        @login_sequence.each do |r|
            form = find_login_form_from_request( r )
            return form if form
        end
        nil
    end

    #
    # Goes through all forms which contain password fields and tries to match
    # them to the given request.
    #
    # @param    [WEBrick::HTTPRequest]  request
    #
    # @return   [Array<Arachni::Element::Form>]
    #
    # @see #forms_with_password
    #
    def find_login_form_from_request( request )
        return if (params = parse_request_body( request.body )).empty?

        f = session.find_login_form( pages:  @pages.to_a,
                                     action: normalize_url( request.request_uri.to_s ),
                                     inputs: params.keys )

        return if !f
        f.update( params )
    end

    #
    # Goes through the logged pages and returns all forms which contain password fields
    #
    # @return   [Array<Arachni::Element::Form>]
    #
    def forms_with_password
        @pages.map { |p| p.forms.select { |f| f.requires_password? } }.flatten
    end

    #
    # Called by the proxy for each response.
    #
    def response_handler( req, res )
        return res if res.request_method.to_s.downcase == 'connect'

        headers = {}
        headers.merge!( res.header.dup )    if res.header
        headers['set-cookie'] = res.cookies if !res.cookies.empty?

        page = page_from_response( Typhoeus::Response.new(
                effective_url: res.request_uri.to_s,
                body:          res.body.dup,
                headers_hash:  headers,
                method:        res.request_method,
                code:          res.status.to_i,
                request:       Typhoeus::Request.new( req.request_uri.to_s )
            )
        )
        page = update_forms( page, req, res ) if req.body

        print_info " *  #{page.forms.size} forms"
        print_info " *  #{page.links.size} links"
        print_info " *  #{page.cookies.size} cookies"

        @pages << page.dup

        inject_panel( res )
    end

    def inject_panel( res )
        return res if !res.header['content-type'].to_s.start_with?( 'text/html' )

        body_tag = res.body.match( /<(\s*)body(.*)>/i )
        res.body.gsub!( body_tag.to_s, "#{body_tag}#{panel_iframe}" )
        res.header['content-length'] = res.body.size.to_s
        res
    end

    def cookies_to_hash( cookies )
        cookies.inject({}) { |h, c| h[c.name] = c.value; h }
    end

    def panel_iframe
        <<-HTML
            <style type='text/css'>
                .panel {
                    left:   0px;
                    top:    0px;
                    margin: 0px;
                    width:  100%;
                    height: 50px;
                    border: 0px;
                    position:fixed;
                }
                body {
                    padding-top: 40px;
                }
            </style>
            <iframe class='panel' src='#{TemplateScope::PANEL_URL}'></iframe>
        HTML
    end

    def erb( *args )
        TemplateScope.get.erb( *args )
    end

    def update_forms( page, req, res )
        page.forms << Form.new( res.request_uri.to_s,
            action: res.request_uri.to_s,
            method: req.request_method,
            inputs: form_parse_request_body( req.body )
        )
        page
    end

    def system_url?( url )
        url.start_with? BASE_URL
    end

    def shutdown?( url )
        url.to_s.start_with? url_for( :shutdown )
    end

    def set_response_body( res, body )
        res.body = body
        res.header['content-length'] = res.body.size.to_s
        res.header['content-type'] = 'text/html' if body =~ /<(\s)*html(\s*)(.*?)>/i
        res
    end

    def asset?( url )
        url.start_with?( "#{url_for( :panel )}/css/" ) ||
            url.start_with?( "#{url_for( :panel )}/js/" ) ||
            url.start_with?( "#{url_for( :panel )}/img/" )
    end

    def self.url_for( type )
        {
            shutdown: "#{BASE_URL}shutdown",
            panel:    "#{BASE_URL}panel",
            inspect:  "#{BASE_URL}panel/inspect",
            sign_in:  "#{BASE_URL}sign_in",
        }[type]
    end
    def url_for( *args )
        self.class.url_for( *args )
    end

    def self.info
        {
            name:        'Proxy',
            description: %q{
                * Gathers data based on user actions and exchanged HTTP
                    traffic and pushes that data to the framework's page-queue to be audited.
                * Updates the framework cookies with the cookies of the HTTP requests and
                    responses, thus it can also be used to login to a web application.
                * Supports SSL interception.
                * Authorization via a configurable session token.

                To skip crawling and only audit elements discovered by using the proxy
                set the link-count limit option to 0.

                NOTICE:
                    The 'session_token' will be looked for in a cookie named
                    'arachni.proxy.session_token', so if you choose to use a token to
                    restrict access to the proxy and need to pass traffic through the
                    proxy programmatically please configure your HTTP client with
                    a cookie named 'arachni.proxy.session_token' with the value of
                    the 'session_token' option.

                WARNING:
                    The 'session_token' option is not a way to secure usage of
                    this proxy but rather a way to restrict usage enough to avoid
                    users unwittingly interfering with each others' sessions.},
            author:      'Tasos "Zapotek" Laskos <tasos.laskos@gmail.com>',
            version:     '0.2.1',
            options:     [
                 Options::Port.new( 'port', [false, 'Port to bind to.', 8282] ),
                 Options::Address.new( 'bind_address',
                                       [false, 'IP address to bind to.', '0.0.0.0'] ),
                 Options::String.new( 'session_token',
                                      [false, 'A session token to demand from ' +
                                          'users before allowing them to use the proxy.', ''] ),
                 Options::Int.new( 'timeout',
                                   [false, 'How long to wait for a request to ' +
                                       'complete, in milliseconds.', 20000] )
             ]
        }
    end

end