backpropagation.rb in ai4r-1.4

- old
+ new
@@ -1,270 +1,293 @@
 # Author::    Sergio Fierens
 # License::   MPL 1.1
 # Project::   ai4r
 # Url::       http://ai4r.rubyforge.org/
 #
-# Specials thanks to John Miller, for several bugs fixes and comments in the
-# Backpropagation implementation
-#
 # You can redistribute it and/or modify it under the terms of 
 # the Mozilla Public License version 1.1  as published by the 
 # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
-# 
 
-module Ai4r
+require File.dirname(__FILE__) + '/../data/parameterizable' 
 
-  # The utility of artificial neural network 
-  # models lies in the fact that they can be used 
-  # to infer a function from observations. 
-  # This is particularly useful in applications 
-  # where the complexity of the data or task makes the 
-  # design of such a function by hand impractical.
-  # Neural Networks are being used in many businesses and applications. Their 
-  # ability to learn by example makes them attractive in environments where 
-  # the business rules are either not well defined or are hard to enumerate and 
-  # define. Many people believe that Neural Networks can only solve toy problems.
-  # Give them a try, and let you decide if they are good enough to solve your 
-  # needs.
+module Ai4r
+  
+  # Artificial Neural Networks are mathematical or computational models based on 
+  # biological neural networks. 
   # 
-  # In this module you will find an implementation of neural networks
-  # using the Backpropagation is a supervised learning technique (described 
-  # by Paul Werbos in 1974, and further developed by David E. 
-  # Rumelhart, Geoffrey E. Hinton and Ronald J. Williams in 1986)
+  # More about neural networks:
   # 
-  # More about neural networks and backpropagation:
-  # 
-  # * http://en.wikipedia.org/wiki/Backpropagation
-  # * http://en.wikipedia.org/wiki/Neural_networks
+  # * http://en.wikipedia.org/wiki/Artificial_neural_network
+  #
   module NeuralNetwork
-
+    
     # = Introduction
     # 
-    # This is an implementation of neural networks
-    # using the Backpropagation is a supervised learning technique (described 
+    # This is an implementation of a multilayer perceptron network, using
+    # the backpropagation algorithm for learning.
+    # 
+    # Backpropagation is a supervised learning technique (described 
     # by Paul Werbos in 1974, and further developed by David E. 
     # Rumelhart, Geoffrey E. Hinton and Ronald J. Williams in 1986)
     # 
+    # = Features
+    # 
+    # * Support for any network architecture (number of layers and neurons)
+    # * Configurable propagation function
+    # * Optional usage of bias 
+    # * Configurable momentum
+    # * Configurable learning rate
+    # * Configurable initial weight function
+    # * 100% ruby code, no external dependency
+    # 
+    # = Parameters
+    # 
+    # Use class method get_parameters_info to obtain details on the algorithm
+    # parameters. Use set_parameters to set values for this parameters.
+    # 
+    # * :disable_bias => If true, the alforithm will not use bias nodes. 
+    #   False by default.
+    # * :initial_weight_function => f(n, i, j) must return the initial 
+    #   weight for the conection between the node i in layer n, and node j in 
+    #   layer n+1. By default a random number in [-1, 1) range.
+    # * :propagation_function => By default: 
+    #   lambda { |x| 1/(1+Math.exp(-1*(x))) }
+    # * :derivative_propagation_function => Derivative of the propagation 
+    #   function, based on propagation function output. 
+    #   By default: lambda { |y| y*(1-y) }, where y=propagation_function(x)
+    # * :learning_rate => By default 0.25        
+    # * :momentum => By default 0.1. Set this parameter to 0 to disable
+    #   momentum
+    # 
     # = How to use it
     # 
-    #   # Create the network
-    #   net = Ai4r::NeuralNetwork::Backpropagation.new([4, 3, 2])  # 4 inputs
-    #                                         # 1 hidden layer with 3 neurons, 
-    #                                         # 2 outputs
+    #   # Create the network with 4 inputs, 1 hidden layer with 3 neurons,
+    #   # and 2 outputs
+    #   net = Ai4r::NeuralNetwork::Backpropagation.new([4, 3, 2])  
+    #
     #   # Train the network 
-    #   1..upto(100) do |i|
+    #   1000.times do |i|
     #     net.train(example[i], result[i])
     #   end
     #   
     #   # Use it: Evaluate data with the trained network
-    #   net.eval([12, 48, 12, 25])  # =>  [0.86, 0.01]
-    #   
-    class Backpropagation
-
-      DEFAULT_BETA = 0.5
-      DEFAULT_LAMBDA = 0.25
-      DEFAULT_THRESHOLD = 0.66
-
-    # Creates a new network specifying the its architecture.
-    # E.g.
-    #    
-    #   net = Backpropagation.new([4, 3, 2])  # 4 inputs
-    #                                         # 1 hidden layer with 3 neurons, 
-    #                                         # 2 outputs    
-    #   net = Backpropagation.new([2, 3, 3, 4])   # 2 inputs
-    #                                             # 2 hidden layer with 3 neurons each, 
-    #                                             # 4 outputs    
-    #   net = Backpropagation.new([2, 1])   # 2 inputs
-    #                                       # No hidden layer
-    #                                       # 1 output
-    #
-    # Optionally you can customize certain parameters:
+    #   net.eval([12, 48, 12, 25])  
+    #     =>  [0.86, 0.01]   
+    #     
+    # More about multilayer perceptron neural networks and backpropagation:
     # 
-    # threshold = A real number which we will call Threshold. 
-    # Experiments have shown that best values for q are between 0.25 and 1. 
+    # * http://en.wikipedia.org/wiki/Backpropagation
+    # * http://en.wikipedia.org/wiki/Multilayer_perceptron
     # 
-    # lambda = The Learning Rate: a real number, usually between 0.05 and 0.25.
-    # 
-    # momentum = A momentum will avoid oscillations during learning, converging 
-    # to a solution in less iterations.
-      def initialize(layer_sizes, threshold=DEFAULT_THRESHOLD, lambda=DEFAULT_LAMBDA, momentum=DEFAULT_BETA)
-        @neurons = []
-        layer_sizes.reverse.each do |layer_size|
-          layer = []
-          layer_size.times { layer <<  Neuron.new(@neurons.last, threshold, lambda, momentum) }
-          @neurons << layer
-        end
-        @neurons.reverse!
+    # = About the project
+    # Author::    Sergio Fierens
+    # License::   MPL 1.1
+    # Url::       http://ai4r.rubyforge.org
+    class Backpropagation
+      
+      include Ai4r::Data::Parameterizable
+      
+      parameters_info :disable_bias => "If true, the alforithm will not use "+
+            "bias nodes. False by default.",
+        :initial_weight_function => "f(n, i, j) must return the initial "+
+            "weight for the conection between the node i in layer n, and "+
+            "node j in layer n+1. By default a random number in [-1, 1) range.",
+        :propagation_function => "By default: " + 
+            "lambda { |x| 1/(1+Math.exp(-1*(x))) }",         
+        :derivative_propagation_function => "Derivative of the propagation "+
+            "function, based on propagation function output. By default: " +
+            "lambda { |y| y*(1-y) }, where y=propagation_function(x)",
+        :learning_rate => "By default 0.25",        
+        :momentum => "By default 0.1. Set this parameter to 0 to disable "+
+            "momentum."
+          
+      attr_accessor :structure, :weights, :activation_nodes
+      
+      # Creates a new network specifying the its architecture.
+      # E.g.
+      #    
+      #   net = Backpropagation.new([4, 3, 2])  # 4 inputs
+      #                                         # 1 hidden layer with 3 neurons, 
+      #                                         # 2 outputs    
+      #   net = Backpropagation.new([2, 3, 3, 4])   # 2 inputs
+      #                                             # 2 hidden layer with 3 neurons each, 
+      #                                             # 4 outputs    
+      #   net = Backpropagation.new([2, 1])   # 2 inputs
+      #                                       # No hidden layer
+      #                                       # 1 output      
+      def initialize(network_structure)
+        @structure = network_structure
+        @initial_weight_function = lambda { |n, i, j| ((rand 2000)/1000.0) - 1}
+        @propagation_function = lambda { |x| 1/(1+Math.exp(-1*(x))) } #lambda { |x| Math.tanh(x) }
+        @derivative_propagation_function = lambda { |y| y*(1-y) } #lambda { |y| 1.0 - y**2 }
+        @disable_bias = false
+        @learning_rate = 0.25
+        @momentum = 0.1
       end
 
-    # Evaluates the input.
-    # E.g.
-    #     net = Backpropagation.new([4, 3, 2])
-    #     net.eval([25, 32.3, 12.8, 1.5])
-    #         # =>  [0.83, 0.03]
-      def eval(input)
-        #check input size
-        if(input.length != @neurons.first.length)
-          raise "Wrong input dimension. Expected: #{@neurons.first.length}, received: #{input.length}"  
-        end
-        #Present input
-        input.each_index do |input_index|
-          @neurons.first[input_index].propagate(input[input_index])
-        end
-        #Propagate
-        @neurons[1..-1].each do |layer|
-          layer.each {|neuron| neuron.propagate}
-        end
-        output = []
-        @neurons.last.each { |neuron| output << neuron.state }
-        return output
+      # Evaluates the input.
+      # E.g.
+      #     net = Backpropagation.new([4, 3, 2])
+      #     net.eval([25, 32.3, 12.8, 1.5])
+      #         # =>  [0.83, 0.03]
+      def eval(input_values)
+        check_input_dimension(input_values.length)
+        init_network if !@weights
+        feedforward(input_values)
+        return @activation_nodes.last.clone
       end
-
+      
       # This method trains the network using the backpropagation algorithm.
       # 
       # input: Networks input
       # 
       # output: Expected output for the given input.
       #
-      # This method returns the network error (not an absolut amount, 
-      # the difference between real output and the expected output)
-      def train(input, output)
-        #check output size
-        if(output.length != @neurons.last.length)
-          raise "Wrong output dimension. Expected: #{@neurons.last.length}, received: #{output.length}"  
+      # This method returns the network error:
+      # => 0.5 * sum( (expected_value[i] - output_value[i])**2 )
+      def train(inputs, outputs)
+        eval(inputs)
+        backpropagate(outputs)
+        calculate_error(outputs)
+      end
+      
+      # Initialize (or reset) activation nodes and weights, with the 
+      # provided net structure and parameters.
+      def init_network
+        init_activation_nodes
+        init_weights
+        init_last_changes
+        return self
+      end
+      
+      protected
+
+      # Propagate error backwards
+      def backpropagate(expected_output_values)
+        check_output_dimension(expected_output_values.length)
+        calculate_output_deltas(expected_output_values)
+        calculate_internal_deltas
+        update_weights
+      end
+      
+      # Propagate values forward
+      def feedforward(input_values)
+        input_values.each_index do |input_index| 
+          @activation_nodes.first[input_index] = input_values[input_index]
         end
-        #Eval input
-        eval(input)
-        #Set expected output
-        output.each_index do |output_index|
-          @neurons.last[output_index].expected_output = output[output_index]
+        @weights.each_index do |n|
+          @structure[n+1].times do |j|
+            sum = 0.0
+            @activation_nodes[n].each_index do |i|
+              sum += (@activation_nodes[n][i] * @weights[n][i][j])
+            end
+            @activation_nodes[n+1][j] = @propagation_function.call(sum)
+          end
+        end        
+      end
+      
+      # Initialize neurons structure.
+      def init_activation_nodes
+        @activation_nodes = Array.new(@structure.length) do |n| 
+          Array.new(@structure[n], 1.0)
         end
-        #Calculate error
-        @neurons.reverse.each do |layer|
-          layer.each {|neuron| neuron.calc_error}
+        if not disable_bias
+          @activation_nodes[0...-1].each {|layer| layer << 1.0 }
         end
-        #Change weight
-        @neurons.each do |layer|
-          layer.each {|neuron| neuron.change_weights }
-        end
-        #return net error
-        return @neurons.last.collect { |x| x.calc_error }
       end
+      
+      # Initialize the weight arrays using function specified with the
+      # initial_weight_function parameter
+      def init_weights
+        @weights = Array.new(@structure.length-1) do |i|
+          nodes_origin = @activation_nodes[i].length
+          nodes_target = @structure[i+1]
+          Array.new(nodes_origin) do |j|
+            Array.new(nodes_target) do |k| 
+              @initial_weight_function.call(i, j, k)
+            end
+          end
+        end
+      end   
 
-      private
-      def print_weight
-        @neurons.each_index do |layer_index|
-          @neurons[layer_index].each_index do |neuron_index| 
-            puts "L #{layer_index} N #{neuron_index} W #{@neurons[layer_index][neuron_index].w.inspect}"
+      # Momentum usage need to know how much a weight changed in the 
+      # previous training. This method initialize the @last_changes 
+      # structure with 0 values.
+      def init_last_changes
+        @last_changes = Array.new(@weights.length) do |w|
+          Array.new(@weights[w].length) do |i| 
+            Array.new(@weights[w][i].length, 0.0)
           end
         end
       end
-
-    end
-
-
-    class Neuron
-
-      attr_accessor :state
-      attr_accessor :error
-      attr_accessor :expected_output
-      attr_accessor :w
-      attr_accessor :x
-
-      def initialize(childs, threshold, lambda, momentum)
-        #instance state
-        @w = nil
-        @childs = childs
-        @error = nil
-        @state = 0
-        @pushed = 0
-        @last_delta = 0
-        @x = 0
-        #Parameters
-        @lambda = lambda
-        @momentum = momentum
-        @threshold = threshold
-        #init w     
-        if(childs)
-          @w = []
-          childs.each { @w << init_weight }
+      
+      # Calculate deltas for output layer
+      def calculate_output_deltas(expected_values)
+        output_values = @activation_nodes.last
+        output_deltas = []
+        output_values.each_index do |output_index|
+          error = expected_values[output_index] - output_values[output_index]
+          output_deltas << @derivative_propagation_function.call(
+            output_values[output_index]) * error
         end
+        @deltas = [output_deltas]
       end
-
-      def push(x)
-        @pushed += x
-      end
-
-      def propagate(input = nil)
-        if(input)
-          input = input.to_f
-          @x = input
-          @state = input
-          @childs.each_index do |child_index| 
-            @childs[child_index].push(input * @w[child_index])
-          end
-        else
-          @x = @pushed + @threshold
-          @pushed = 0
-          @state = Neuron.f(@x)
-          if @childs
-            @childs.each_index do |child_index| 
-              @childs[child_index].push(@state * @w[child_index])
+      
+      # Calculate deltas for hidden layers
+      def calculate_internal_deltas
+        prev_deltas = @deltas.last
+        (@activation_nodes.length-2).downto(1) do |layer_index|
+          layer_deltas = []
+          @activation_nodes[layer_index].each_index do |j|
+            error = 0.0
+            @structure[layer_index+1].times do |k|
+              error += prev_deltas[k] * @weights[layer_index][j][k]
             end
+            layer_deltas[j] = (@derivative_propagation_function.call(
+              @activation_nodes[layer_index][j]) * error)
           end
+          prev_deltas = layer_deltas
+          @deltas.unshift(layer_deltas)
         end
       end
-
-      def calc_error
-        if(!@childs && @expected_output)
-          @error = (@expected_output - @state)
-        elsif(@childs)
-          @error = 0
-          @childs.each_index do |child_index|
-            @error += (@childs[child_index].error * @w[child_index])
+      
+      # Update weights after @deltas have been calculated.
+      def update_weights
+        (@weights.length-1).downto(0) do |n|
+          @weights[n].each_index do |i|  
+            @weights[n][i].each_index do |j|  
+              change = @deltas[n][j]*@activation_nodes[n][i]
+              @weights[n][i][j] += ( learning_rate * change + 
+                  momentum * @last_changes[n][i][j])
+              @last_changes[n][i][j] = change
+            end
           end
         end
       end
-
-      def change_weights
-        return if !@childs
-        @childs.each_index do |child_index |
-          delta = @lambda * @childs[child_index].error * (@state) * Neuron.f_prime(@childs[child_index].x)
-          @w[child_index] += (delta + @momentum * @last_delta)
-          @last_delta = delta
+      
+      # Calculate quadratic error for a expected output value 
+      # Error = 0.5 * sum( (expected_value[i] - output_value[i])**2 )
+      def calculate_error(expected_output)
+        output_values = @activation_nodes.last
+        error = 0.0
+        expected_output.each_index do |output_index|
+          error += 
+            0.5*(output_values[output_index]-expected_output[output_index])**2
         end
+        return error
       end
-
-      # Propagation function.
-      # By default: 
-      #   f(x) = 1/(1 + e^(-x))
-      # You can override it with any derivable function.
-      # A usually usefull one is: 
-      #   f(x) = x.
-      # If you override this function, you will have to override
-      # f_prime too.
-      def self.f(x)
-        return 1/(1+Math.exp(-1*(x)))
+      
+      def check_input_dimension(inputs)
+        raise ArgumentError, "Wrong number of inputs. " +
+          "Expected: #{@structure.first}, " +
+          "received: #{inputs}." if inputs!=@structure.first
       end
 
-      # Derived function of the propagation function (self.f)
-      # By default: 
-      #   f_prime(x) = f(x)(1- f(x))
-      # If you override f(x) with:
-      #   f(x) = x.
-      # Then you must override f_prime as:
-      #   f_prime(x) = 1
-      def self.f_prime(x)
-        val = f(x)
-        return val*(1-val)
+      def check_output_dimension(outputs)
+        raise ArgumentError, "Wrong number of outputs. " +
+          "Expected: #{@structure.last}, " +
+          "received: #{outputs}." if outputs!=@structure.last
       end
-
-      private 
-      def init_weight
-        rand/4
-      end
-
+      
     end
-
   end
-
-end
\ No newline at end of file
+end