# Author::    Sergio Fierens
# License::   MPL 1.1
# Project::   ai4r
# Url::       http://ai4r.rubyforge.org/
#
# You can redistribute it and/or modify it under the terms of 
# the Mozilla Public License version 1.1  as published by the 
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt

require File.dirname(__FILE__) + '/../data/parameterizable' 

module Ai4r
  
  # Artificial Neural Networks are mathematical or computational models based on 
  # biological neural networks. 
  # 
  # More about neural networks:
  # 
  # * http://en.wikipedia.org/wiki/Artificial_neural_network
  #
  module NeuralNetwork
    
    # = Introduction
    # 
    # This is an implementation of a multilayer perceptron network, using
    # the backpropagation algorithm for learning.
    # 
    # Backpropagation is a supervised learning technique (described 
    # by Paul Werbos in 1974, and further developed by David E. 
    # Rumelhart, Geoffrey E. Hinton and Ronald J. Williams in 1986)
    # 
    # = Features
    # 
    # * Support for any network architecture (number of layers and neurons)
    # * Configurable propagation function
    # * Optional usage of bias 
    # * Configurable momentum
    # * Configurable learning rate
    # * Configurable initial weight function
    # * 100% ruby code, no external dependency
    # 
    # = Parameters
    # 
    # Use class method get_parameters_info to obtain details on the algorithm
    # parameters. Use set_parameters to set values for this parameters.
    # 
    # * :disable_bias => If true, the alforithm will not use bias nodes. 
    #   False by default.
    # * :initial_weight_function => f(n, i, j) must return the initial 
    #   weight for the conection between the node i in layer n, and node j in 
    #   layer n+1. By default a random number in [-1, 1) range.
    # * :propagation_function => By default: 
    #   lambda { |x| 1/(1+Math.exp(-1*(x))) }
    # * :derivative_propagation_function => Derivative of the propagation 
    #   function, based on propagation function output. 
    #   By default: lambda { |y| y*(1-y) }, where y=propagation_function(x)
    # * :learning_rate => By default 0.25        
    # * :momentum => By default 0.1. Set this parameter to 0 to disable
    #   momentum
    # 
    # = How to use it
    # 
    #   # Create the network with 4 inputs, 1 hidden layer with 3 neurons,
    #   # and 2 outputs
    #   net = Ai4r::NeuralNetwork::Backpropagation.new([4, 3, 2])  
    #
    #   # Train the network 
    #   1000.times do |i|
    #     net.train(example[i], result[i])
    #   end
    #   
    #   # Use it: Evaluate data with the trained network
    #   net.eval([12, 48, 12, 25])  
    #     =>  [0.86, 0.01]   
    #     
    # More about multilayer perceptron neural networks and backpropagation:
    # 
    # * http://en.wikipedia.org/wiki/Backpropagation
    # * http://en.wikipedia.org/wiki/Multilayer_perceptron
    # 
    # = About the project
    # Author::    Sergio Fierens
    # License::   MPL 1.1
    # Url::       http://ai4r.rubyforge.org
    class Backpropagation
      
      include Ai4r::Data::Parameterizable
      
      parameters_info :disable_bias => "If true, the alforithm will not use "+
            "bias nodes. False by default.",
        :initial_weight_function => "f(n, i, j) must return the initial "+
            "weight for the conection between the node i in layer n, and "+
            "node j in layer n+1. By default a random number in [-1, 1) range.",
        :propagation_function => "By default: " + 
            "lambda { |x| 1/(1+Math.exp(-1*(x))) }",         
        :derivative_propagation_function => "Derivative of the propagation "+
            "function, based on propagation function output. By default: " +
            "lambda { |y| y*(1-y) }, where y=propagation_function(x)",
        :learning_rate => "By default 0.25",        
        :momentum => "By default 0.1. Set this parameter to 0 to disable "+
            "momentum."
          
      attr_accessor :structure, :weights, :activation_nodes, :last_changes
      
      # Creates a new network specifying the its architecture.
      # E.g.
      #    
      #   net = Backpropagation.new([4, 3, 2])  # 4 inputs
      #                                         # 1 hidden layer with 3 neurons, 
      #                                         # 2 outputs    
      #   net = Backpropagation.new([2, 3, 3, 4])   # 2 inputs
      #                                             # 2 hidden layer with 3 neurons each, 
      #                                             # 4 outputs    
      #   net = Backpropagation.new([2, 1])   # 2 inputs
      #                                       # No hidden layer
      #                                       # 1 output      
      def initialize(network_structure)
        @structure = network_structure
        @initial_weight_function = lambda { |n, i, j| ((rand 2000)/1000.0) - 1}
        @propagation_function = lambda { |x| 1/(1+Math.exp(-1*(x))) } #lambda { |x| Math.tanh(x) }
        @derivative_propagation_function = lambda { |y| y*(1-y) } #lambda { |y| 1.0 - y**2 }
        @disable_bias = false
        @learning_rate = 0.25
        @momentum = 0.1
      end

      # Evaluates the input.
      # E.g.
      #     net = Backpropagation.new([4, 3, 2])
      #     net.eval([25, 32.3, 12.8, 1.5])
      #         # =>  [0.83, 0.03]
      def eval(input_values)
        check_input_dimension(input_values.length)
        init_network if !@weights
        feedforward(input_values)
        return @activation_nodes.last.clone
      end
      
      # This method trains the network using the backpropagation algorithm.
      # 
      # input: Networks input
      # 
      # output: Expected output for the given input.
      #
      # This method returns the network error:
      # => 0.5 * sum( (expected_value[i] - output_value[i])**2 )
      def train(inputs, outputs)
        eval(inputs)
        backpropagate(outputs)
        calculate_error(outputs)
      end
      
      # Initialize (or reset) activation nodes and weights, with the 
      # provided net structure and parameters.
      def init_network
        init_activation_nodes
        init_weights
        init_last_changes
        return self
      end

      protected

      # Custom serialization. It used to fail trying to serialize because
      # it uses lambda functions internally, and they cannot be serialized.
      # Now it does not fail, but if you customize the values of
      # * initial_weight_function
      # * propagation_function
      # * derivative_propagation_function
      # you must restore their values manually after loading the instance.
      def marshal_dump
        [
          @structure,
          @disable_bias,
          @learning_rate,
          @momentum,
          @weights,
          @last_changes,
          @activation_nodes
        ]
     end

     def marshal_load(ary)
       @structure,
          @disable_bias,
          @learning_rate,
          @momentum,
          @weights,
          @last_changes,
          @activation_nodes = ary
       @initial_weight_function = lambda { |n, i, j| ((rand 2000)/1000.0) - 1}
       @propagation_function = lambda { |x| 1/(1+Math.exp(-1*(x))) } #lambda { |x| Math.tanh(x) }
       @derivative_propagation_function = lambda { |y| y*(1-y) } #lambda { |y| 1.0 - y**2 }
     end


      # Propagate error backwards
      def backpropagate(expected_output_values)
        check_output_dimension(expected_output_values.length)
        calculate_output_deltas(expected_output_values)
        calculate_internal_deltas
        update_weights
      end
      
      # Propagate values forward
      def feedforward(input_values)
        input_values.each_index do |input_index| 
          @activation_nodes.first[input_index] = input_values[input_index]
        end
        @weights.each_index do |n|
          @structure[n+1].times do |j|
            sum = 0.0
            @activation_nodes[n].each_index do |i|
              sum += (@activation_nodes[n][i] * @weights[n][i][j])
            end
            @activation_nodes[n+1][j] = @propagation_function.call(sum)
          end
        end        
      end
      
      # Initialize neurons structure.
      def init_activation_nodes
        @activation_nodes = Array.new(@structure.length) do |n| 
          Array.new(@structure[n], 1.0)
        end
        if not disable_bias
          @activation_nodes[0...-1].each {|layer| layer << 1.0 }
        end
      end
      
      # Initialize the weight arrays using function specified with the
      # initial_weight_function parameter
      def init_weights
        @weights = Array.new(@structure.length-1) do |i|
          nodes_origin = @activation_nodes[i].length
          nodes_target = @structure[i+1]
          Array.new(nodes_origin) do |j|
            Array.new(nodes_target) do |k| 
              @initial_weight_function.call(i, j, k)
            end
          end
        end
      end   

      # Momentum usage need to know how much a weight changed in the 
      # previous training. This method initialize the @last_changes 
      # structure with 0 values.
      def init_last_changes
        @last_changes = Array.new(@weights.length) do |w|
          Array.new(@weights[w].length) do |i| 
            Array.new(@weights[w][i].length, 0.0)
          end
        end
      end
      
      # Calculate deltas for output layer
      def calculate_output_deltas(expected_values)
        output_values = @activation_nodes.last
        output_deltas = []
        output_values.each_index do |output_index|
          error = expected_values[output_index] - output_values[output_index]
          output_deltas << @derivative_propagation_function.call(
            output_values[output_index]) * error
        end
        @deltas = [output_deltas]
      end
      
      # Calculate deltas for hidden layers
      def calculate_internal_deltas
        prev_deltas = @deltas.last
        (@activation_nodes.length-2).downto(1) do |layer_index|
          layer_deltas = []
          @activation_nodes[layer_index].each_index do |j|
            error = 0.0
            @structure[layer_index+1].times do |k|
              error += prev_deltas[k] * @weights[layer_index][j][k]
            end
            layer_deltas[j] = (@derivative_propagation_function.call(
              @activation_nodes[layer_index][j]) * error)
          end
          prev_deltas = layer_deltas
          @deltas.unshift(layer_deltas)
        end
      end
      
      # Update weights after @deltas have been calculated.
      def update_weights
        (@weights.length-1).downto(0) do |n|
          @weights[n].each_index do |i|  
            @weights[n][i].each_index do |j|  
              change = @deltas[n][j]*@activation_nodes[n][i]
              @weights[n][i][j] += ( learning_rate * change + 
                  momentum * @last_changes[n][i][j])
              @last_changes[n][i][j] = change
            end
          end
        end
      end
      
      # Calculate quadratic error for a expected output value 
      # Error = 0.5 * sum( (expected_value[i] - output_value[i])**2 )
      def calculate_error(expected_output)
        output_values = @activation_nodes.last
        error = 0.0
        expected_output.each_index do |output_index|
          error += 
            0.5*(output_values[output_index]-expected_output[output_index])**2
        end
        return error
      end
      
      def check_input_dimension(inputs)
        raise ArgumentError, "Wrong number of inputs. " +
          "Expected: #{@structure.first}, " +
          "received: #{inputs}." if inputs!=@structure.first
      end

      def check_output_dimension(outputs)
        raise ArgumentError, "Wrong number of outputs. " +
          "Expected: #{@structure.last}, " +
          "received: #{outputs}." if outputs!=@structure.last
      end
      
    end
  end
end