lib/ai4r/neural_network/backpropagation.rb in ai4r-1.3 vs lib/ai4r/neural_network/backpropagation.rb in ai4r-1.4
- old
+ new
@@ -1,270 +1,293 @@
# Author:: Sergio Fierens
# License:: MPL 1.1
# Project:: ai4r
# Url:: http://ai4r.rubyforge.org/
#
-# Specials thanks to John Miller, for several bugs fixes and comments in the
-# Backpropagation implementation
-#
# You can redistribute it and/or modify it under the terms of
# the Mozilla Public License version 1.1 as published by the
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
-#
-module Ai4r
+require File.dirname(__FILE__) + '/../data/parameterizable'
- # The utility of artificial neural network
- # models lies in the fact that they can be used
- # to infer a function from observations.
- # This is particularly useful in applications
- # where the complexity of the data or task makes the
- # design of such a function by hand impractical.
- # Neural Networks are being used in many businesses and applications. Their
- # ability to learn by example makes them attractive in environments where
- # the business rules are either not well defined or are hard to enumerate and
- # define. Many people believe that Neural Networks can only solve toy problems.
- # Give them a try, and let you decide if they are good enough to solve your
- # needs.
+module Ai4r
+
+ # Artificial Neural Networks are mathematical or computational models based on
+ # biological neural networks.
#
- # In this module you will find an implementation of neural networks
- # using the Backpropagation is a supervised learning technique (described
- # by Paul Werbos in 1974, and further developed by David E.
- # Rumelhart, Geoffrey E. Hinton and Ronald J. Williams in 1986)
+ # More about neural networks:
#
- # More about neural networks and backpropagation:
- #
- # * http://en.wikipedia.org/wiki/Backpropagation
- # * http://en.wikipedia.org/wiki/Neural_networks
+ # * http://en.wikipedia.org/wiki/Artificial_neural_network
+ #
module NeuralNetwork
-
+
# = Introduction
#
- # This is an implementation of neural networks
- # using the Backpropagation is a supervised learning technique (described
+ # This is an implementation of a multilayer perceptron network, using
+ # the backpropagation algorithm for learning.
+ #
+ # Backpropagation is a supervised learning technique (described
# by Paul Werbos in 1974, and further developed by David E.
# Rumelhart, Geoffrey E. Hinton and Ronald J. Williams in 1986)
#
+ # = Features
+ #
+ # * Support for any network architecture (number of layers and neurons)
+ # * Configurable propagation function
+ # * Optional usage of bias
+ # * Configurable momentum
+ # * Configurable learning rate
+ # * Configurable initial weight function
+ # * 100% ruby code, no external dependency
+ #
+ # = Parameters
+ #
+ # Use class method get_parameters_info to obtain details on the algorithm
+ # parameters. Use set_parameters to set values for this parameters.
+ #
+ # * :disable_bias => If true, the alforithm will not use bias nodes.
+ # False by default.
+ # * :initial_weight_function => f(n, i, j) must return the initial
+ # weight for the conection between the node i in layer n, and node j in
+ # layer n+1. By default a random number in [-1, 1) range.
+ # * :propagation_function => By default:
+ # lambda { |x| 1/(1+Math.exp(-1*(x))) }
+ # * :derivative_propagation_function => Derivative of the propagation
+ # function, based on propagation function output.
+ # By default: lambda { |y| y*(1-y) }, where y=propagation_function(x)
+ # * :learning_rate => By default 0.25
+ # * :momentum => By default 0.1. Set this parameter to 0 to disable
+ # momentum
+ #
# = How to use it
#
- # # Create the network
- # net = Ai4r::NeuralNetwork::Backpropagation.new([4, 3, 2]) # 4 inputs
- # # 1 hidden layer with 3 neurons,
- # # 2 outputs
+ # # Create the network with 4 inputs, 1 hidden layer with 3 neurons,
+ # # and 2 outputs
+ # net = Ai4r::NeuralNetwork::Backpropagation.new([4, 3, 2])
+ #
# # Train the network
- # 1..upto(100) do |i|
+ # 1000.times do |i|
# net.train(example[i], result[i])
# end
#
# # Use it: Evaluate data with the trained network
- # net.eval([12, 48, 12, 25]) # => [0.86, 0.01]
- #
- class Backpropagation
-
- DEFAULT_BETA = 0.5
- DEFAULT_LAMBDA = 0.25
- DEFAULT_THRESHOLD = 0.66
-
- # Creates a new network specifying the its architecture.
- # E.g.
- #
- # net = Backpropagation.new([4, 3, 2]) # 4 inputs
- # # 1 hidden layer with 3 neurons,
- # # 2 outputs
- # net = Backpropagation.new([2, 3, 3, 4]) # 2 inputs
- # # 2 hidden layer with 3 neurons each,
- # # 4 outputs
- # net = Backpropagation.new([2, 1]) # 2 inputs
- # # No hidden layer
- # # 1 output
- #
- # Optionally you can customize certain parameters:
+ # net.eval([12, 48, 12, 25])
+ # => [0.86, 0.01]
+ #
+ # More about multilayer perceptron neural networks and backpropagation:
#
- # threshold = A real number which we will call Threshold.
- # Experiments have shown that best values for q are between 0.25 and 1.
+ # * http://en.wikipedia.org/wiki/Backpropagation
+ # * http://en.wikipedia.org/wiki/Multilayer_perceptron
#
- # lambda = The Learning Rate: a real number, usually between 0.05 and 0.25.
- #
- # momentum = A momentum will avoid oscillations during learning, converging
- # to a solution in less iterations.
- def initialize(layer_sizes, threshold=DEFAULT_THRESHOLD, lambda=DEFAULT_LAMBDA, momentum=DEFAULT_BETA)
- @neurons = []
- layer_sizes.reverse.each do |layer_size|
- layer = []
- layer_size.times { layer << Neuron.new(@neurons.last, threshold, lambda, momentum) }
- @neurons << layer
- end
- @neurons.reverse!
+ # = About the project
+ # Author:: Sergio Fierens
+ # License:: MPL 1.1
+ # Url:: http://ai4r.rubyforge.org
+ class Backpropagation
+
+ include Ai4r::Data::Parameterizable
+
+ parameters_info :disable_bias => "If true, the alforithm will not use "+
+ "bias nodes. False by default.",
+ :initial_weight_function => "f(n, i, j) must return the initial "+
+ "weight for the conection between the node i in layer n, and "+
+ "node j in layer n+1. By default a random number in [-1, 1) range.",
+ :propagation_function => "By default: " +
+ "lambda { |x| 1/(1+Math.exp(-1*(x))) }",
+ :derivative_propagation_function => "Derivative of the propagation "+
+ "function, based on propagation function output. By default: " +
+ "lambda { |y| y*(1-y) }, where y=propagation_function(x)",
+ :learning_rate => "By default 0.25",
+ :momentum => "By default 0.1. Set this parameter to 0 to disable "+
+ "momentum."
+
+ attr_accessor :structure, :weights, :activation_nodes
+
+ # Creates a new network specifying the its architecture.
+ # E.g.
+ #
+ # net = Backpropagation.new([4, 3, 2]) # 4 inputs
+ # # 1 hidden layer with 3 neurons,
+ # # 2 outputs
+ # net = Backpropagation.new([2, 3, 3, 4]) # 2 inputs
+ # # 2 hidden layer with 3 neurons each,
+ # # 4 outputs
+ # net = Backpropagation.new([2, 1]) # 2 inputs
+ # # No hidden layer
+ # # 1 output
+ def initialize(network_structure)
+ @structure = network_structure
+ @initial_weight_function = lambda { |n, i, j| ((rand 2000)/1000.0) - 1}
+ @propagation_function = lambda { |x| 1/(1+Math.exp(-1*(x))) } #lambda { |x| Math.tanh(x) }
+ @derivative_propagation_function = lambda { |y| y*(1-y) } #lambda { |y| 1.0 - y**2 }
+ @disable_bias = false
+ @learning_rate = 0.25
+ @momentum = 0.1
end
- # Evaluates the input.
- # E.g.
- # net = Backpropagation.new([4, 3, 2])
- # net.eval([25, 32.3, 12.8, 1.5])
- # # => [0.83, 0.03]
- def eval(input)
- #check input size
- if(input.length != @neurons.first.length)
- raise "Wrong input dimension. Expected: #{@neurons.first.length}, received: #{input.length}"
- end
- #Present input
- input.each_index do |input_index|
- @neurons.first[input_index].propagate(input[input_index])
- end
- #Propagate
- @neurons[1..-1].each do |layer|
- layer.each {|neuron| neuron.propagate}
- end
- output = []
- @neurons.last.each { |neuron| output << neuron.state }
- return output
+ # Evaluates the input.
+ # E.g.
+ # net = Backpropagation.new([4, 3, 2])
+ # net.eval([25, 32.3, 12.8, 1.5])
+ # # => [0.83, 0.03]
+ def eval(input_values)
+ check_input_dimension(input_values.length)
+ init_network if !@weights
+ feedforward(input_values)
+ return @activation_nodes.last.clone
end
-
+
# This method trains the network using the backpropagation algorithm.
#
# input: Networks input
#
# output: Expected output for the given input.
#
- # This method returns the network error (not an absolut amount,
- # the difference between real output and the expected output)
- def train(input, output)
- #check output size
- if(output.length != @neurons.last.length)
- raise "Wrong output dimension. Expected: #{@neurons.last.length}, received: #{output.length}"
+ # This method returns the network error:
+ # => 0.5 * sum( (expected_value[i] - output_value[i])**2 )
+ def train(inputs, outputs)
+ eval(inputs)
+ backpropagate(outputs)
+ calculate_error(outputs)
+ end
+
+ # Initialize (or reset) activation nodes and weights, with the
+ # provided net structure and parameters.
+ def init_network
+ init_activation_nodes
+ init_weights
+ init_last_changes
+ return self
+ end
+
+ protected
+
+ # Propagate error backwards
+ def backpropagate(expected_output_values)
+ check_output_dimension(expected_output_values.length)
+ calculate_output_deltas(expected_output_values)
+ calculate_internal_deltas
+ update_weights
+ end
+
+ # Propagate values forward
+ def feedforward(input_values)
+ input_values.each_index do |input_index|
+ @activation_nodes.first[input_index] = input_values[input_index]
end
- #Eval input
- eval(input)
- #Set expected output
- output.each_index do |output_index|
- @neurons.last[output_index].expected_output = output[output_index]
+ @weights.each_index do |n|
+ @structure[n+1].times do |j|
+ sum = 0.0
+ @activation_nodes[n].each_index do |i|
+ sum += (@activation_nodes[n][i] * @weights[n][i][j])
+ end
+ @activation_nodes[n+1][j] = @propagation_function.call(sum)
+ end
+ end
+ end
+
+ # Initialize neurons structure.
+ def init_activation_nodes
+ @activation_nodes = Array.new(@structure.length) do |n|
+ Array.new(@structure[n], 1.0)
end
- #Calculate error
- @neurons.reverse.each do |layer|
- layer.each {|neuron| neuron.calc_error}
+ if not disable_bias
+ @activation_nodes[0...-1].each {|layer| layer << 1.0 }
end
- #Change weight
- @neurons.each do |layer|
- layer.each {|neuron| neuron.change_weights }
- end
- #return net error
- return @neurons.last.collect { |x| x.calc_error }
end
+
+ # Initialize the weight arrays using function specified with the
+ # initial_weight_function parameter
+ def init_weights
+ @weights = Array.new(@structure.length-1) do |i|
+ nodes_origin = @activation_nodes[i].length
+ nodes_target = @structure[i+1]
+ Array.new(nodes_origin) do |j|
+ Array.new(nodes_target) do |k|
+ @initial_weight_function.call(i, j, k)
+ end
+ end
+ end
+ end
- private
- def print_weight
- @neurons.each_index do |layer_index|
- @neurons[layer_index].each_index do |neuron_index|
- puts "L #{layer_index} N #{neuron_index} W #{@neurons[layer_index][neuron_index].w.inspect}"
+ # Momentum usage need to know how much a weight changed in the
+ # previous training. This method initialize the @last_changes
+ # structure with 0 values.
+ def init_last_changes
+ @last_changes = Array.new(@weights.length) do |w|
+ Array.new(@weights[w].length) do |i|
+ Array.new(@weights[w][i].length, 0.0)
end
end
end
-
- end
-
-
- class Neuron
-
- attr_accessor :state
- attr_accessor :error
- attr_accessor :expected_output
- attr_accessor :w
- attr_accessor :x
-
- def initialize(childs, threshold, lambda, momentum)
- #instance state
- @w = nil
- @childs = childs
- @error = nil
- @state = 0
- @pushed = 0
- @last_delta = 0
- @x = 0
- #Parameters
- @lambda = lambda
- @momentum = momentum
- @threshold = threshold
- #init w
- if(childs)
- @w = []
- childs.each { @w << init_weight }
+
+ # Calculate deltas for output layer
+ def calculate_output_deltas(expected_values)
+ output_values = @activation_nodes.last
+ output_deltas = []
+ output_values.each_index do |output_index|
+ error = expected_values[output_index] - output_values[output_index]
+ output_deltas << @derivative_propagation_function.call(
+ output_values[output_index]) * error
end
+ @deltas = [output_deltas]
end
-
- def push(x)
- @pushed += x
- end
-
- def propagate(input = nil)
- if(input)
- input = input.to_f
- @x = input
- @state = input
- @childs.each_index do |child_index|
- @childs[child_index].push(input * @w[child_index])
- end
- else
- @x = @pushed + @threshold
- @pushed = 0
- @state = Neuron.f(@x)
- if @childs
- @childs.each_index do |child_index|
- @childs[child_index].push(@state * @w[child_index])
+
+ # Calculate deltas for hidden layers
+ def calculate_internal_deltas
+ prev_deltas = @deltas.last
+ (@activation_nodes.length-2).downto(1) do |layer_index|
+ layer_deltas = []
+ @activation_nodes[layer_index].each_index do |j|
+ error = 0.0
+ @structure[layer_index+1].times do |k|
+ error += prev_deltas[k] * @weights[layer_index][j][k]
end
+ layer_deltas[j] = (@derivative_propagation_function.call(
+ @activation_nodes[layer_index][j]) * error)
end
+ prev_deltas = layer_deltas
+ @deltas.unshift(layer_deltas)
end
end
-
- def calc_error
- if(!@childs && @expected_output)
- @error = (@expected_output - @state)
- elsif(@childs)
- @error = 0
- @childs.each_index do |child_index|
- @error += (@childs[child_index].error * @w[child_index])
+
+ # Update weights after @deltas have been calculated.
+ def update_weights
+ (@weights.length-1).downto(0) do |n|
+ @weights[n].each_index do |i|
+ @weights[n][i].each_index do |j|
+ change = @deltas[n][j]*@activation_nodes[n][i]
+ @weights[n][i][j] += ( learning_rate * change +
+ momentum * @last_changes[n][i][j])
+ @last_changes[n][i][j] = change
+ end
end
end
end
-
- def change_weights
- return if !@childs
- @childs.each_index do |child_index |
- delta = @lambda * @childs[child_index].error * (@state) * Neuron.f_prime(@childs[child_index].x)
- @w[child_index] += (delta + @momentum * @last_delta)
- @last_delta = delta
+
+ # Calculate quadratic error for a expected output value
+ # Error = 0.5 * sum( (expected_value[i] - output_value[i])**2 )
+ def calculate_error(expected_output)
+ output_values = @activation_nodes.last
+ error = 0.0
+ expected_output.each_index do |output_index|
+ error +=
+ 0.5*(output_values[output_index]-expected_output[output_index])**2
end
+ return error
end
-
- # Propagation function.
- # By default:
- # f(x) = 1/(1 + e^(-x))
- # You can override it with any derivable function.
- # A usually usefull one is:
- # f(x) = x.
- # If you override this function, you will have to override
- # f_prime too.
- def self.f(x)
- return 1/(1+Math.exp(-1*(x)))
+
+ def check_input_dimension(inputs)
+ raise ArgumentError, "Wrong number of inputs. " +
+ "Expected: #{@structure.first}, " +
+ "received: #{inputs}." if inputs!=@structure.first
end
- # Derived function of the propagation function (self.f)
- # By default:
- # f_prime(x) = f(x)(1- f(x))
- # If you override f(x) with:
- # f(x) = x.
- # Then you must override f_prime as:
- # f_prime(x) = 1
- def self.f_prime(x)
- val = f(x)
- return val*(1-val)
+ def check_output_dimension(outputs)
+ raise ArgumentError, "Wrong number of outputs. " +
+ "Expected: #{@structure.last}, " +
+ "received: #{outputs}." if outputs!=@structure.last
end
-
- private
- def init_weight
- rand/4
- end
-
+
end
-
end
-
-end
\ No newline at end of file
+end