lib/ai4r/neural_network/backpropagation.rb in ai4r-1.3 vs lib/ai4r/neural_network/backpropagation.rb in ai4r-1.4

- old
+ new

@@ -1,270 +1,293 @@ # Author:: Sergio Fierens # License:: MPL 1.1 # Project:: ai4r # Url:: http://ai4r.rubyforge.org/ # -# Specials thanks to John Miller, for several bugs fixes and comments in the -# Backpropagation implementation -# # You can redistribute it and/or modify it under the terms of # the Mozilla Public License version 1.1 as published by the # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt -# -module Ai4r +require File.dirname(__FILE__) + '/../data/parameterizable' - # The utility of artificial neural network - # models lies in the fact that they can be used - # to infer a function from observations. - # This is particularly useful in applications - # where the complexity of the data or task makes the - # design of such a function by hand impractical. - # Neural Networks are being used in many businesses and applications. Their - # ability to learn by example makes them attractive in environments where - # the business rules are either not well defined or are hard to enumerate and - # define. Many people believe that Neural Networks can only solve toy problems. - # Give them a try, and let you decide if they are good enough to solve your - # needs. +module Ai4r + + # Artificial Neural Networks are mathematical or computational models based on + # biological neural networks. # - # In this module you will find an implementation of neural networks - # using the Backpropagation is a supervised learning technique (described - # by Paul Werbos in 1974, and further developed by David E. - # Rumelhart, Geoffrey E. Hinton and Ronald J. Williams in 1986) + # More about neural networks: # - # More about neural networks and backpropagation: - # - # * http://en.wikipedia.org/wiki/Backpropagation - # * http://en.wikipedia.org/wiki/Neural_networks + # * http://en.wikipedia.org/wiki/Artificial_neural_network + # module NeuralNetwork - + # = Introduction # - # This is an implementation of neural networks - # using the Backpropagation is a supervised learning technique (described + # This is an implementation of a multilayer perceptron network, using + # the backpropagation algorithm for learning. + # + # Backpropagation is a supervised learning technique (described # by Paul Werbos in 1974, and further developed by David E. # Rumelhart, Geoffrey E. Hinton and Ronald J. Williams in 1986) # + # = Features + # + # * Support for any network architecture (number of layers and neurons) + # * Configurable propagation function + # * Optional usage of bias + # * Configurable momentum + # * Configurable learning rate + # * Configurable initial weight function + # * 100% ruby code, no external dependency + # + # = Parameters + # + # Use class method get_parameters_info to obtain details on the algorithm + # parameters. Use set_parameters to set values for this parameters. + # + # * :disable_bias => If true, the alforithm will not use bias nodes. + # False by default. + # * :initial_weight_function => f(n, i, j) must return the initial + # weight for the conection between the node i in layer n, and node j in + # layer n+1. By default a random number in [-1, 1) range. + # * :propagation_function => By default: + # lambda { |x| 1/(1+Math.exp(-1*(x))) } + # * :derivative_propagation_function => Derivative of the propagation + # function, based on propagation function output. + # By default: lambda { |y| y*(1-y) }, where y=propagation_function(x) + # * :learning_rate => By default 0.25 + # * :momentum => By default 0.1. Set this parameter to 0 to disable + # momentum + # # = How to use it # - # # Create the network - # net = Ai4r::NeuralNetwork::Backpropagation.new([4, 3, 2]) # 4 inputs - # # 1 hidden layer with 3 neurons, - # # 2 outputs + # # Create the network with 4 inputs, 1 hidden layer with 3 neurons, + # # and 2 outputs + # net = Ai4r::NeuralNetwork::Backpropagation.new([4, 3, 2]) + # # # Train the network - # 1..upto(100) do |i| + # 1000.times do |i| # net.train(example[i], result[i]) # end # # # Use it: Evaluate data with the trained network - # net.eval([12, 48, 12, 25]) # => [0.86, 0.01] - # - class Backpropagation - - DEFAULT_BETA = 0.5 - DEFAULT_LAMBDA = 0.25 - DEFAULT_THRESHOLD = 0.66 - - # Creates a new network specifying the its architecture. - # E.g. - # - # net = Backpropagation.new([4, 3, 2]) # 4 inputs - # # 1 hidden layer with 3 neurons, - # # 2 outputs - # net = Backpropagation.new([2, 3, 3, 4]) # 2 inputs - # # 2 hidden layer with 3 neurons each, - # # 4 outputs - # net = Backpropagation.new([2, 1]) # 2 inputs - # # No hidden layer - # # 1 output - # - # Optionally you can customize certain parameters: + # net.eval([12, 48, 12, 25]) + # => [0.86, 0.01] + # + # More about multilayer perceptron neural networks and backpropagation: # - # threshold = A real number which we will call Threshold. - # Experiments have shown that best values for q are between 0.25 and 1. + # * http://en.wikipedia.org/wiki/Backpropagation + # * http://en.wikipedia.org/wiki/Multilayer_perceptron # - # lambda = The Learning Rate: a real number, usually between 0.05 and 0.25. - # - # momentum = A momentum will avoid oscillations during learning, converging - # to a solution in less iterations. - def initialize(layer_sizes, threshold=DEFAULT_THRESHOLD, lambda=DEFAULT_LAMBDA, momentum=DEFAULT_BETA) - @neurons = [] - layer_sizes.reverse.each do |layer_size| - layer = [] - layer_size.times { layer << Neuron.new(@neurons.last, threshold, lambda, momentum) } - @neurons << layer - end - @neurons.reverse! + # = About the project + # Author:: Sergio Fierens + # License:: MPL 1.1 + # Url:: http://ai4r.rubyforge.org + class Backpropagation + + include Ai4r::Data::Parameterizable + + parameters_info :disable_bias => "If true, the alforithm will not use "+ + "bias nodes. False by default.", + :initial_weight_function => "f(n, i, j) must return the initial "+ + "weight for the conection between the node i in layer n, and "+ + "node j in layer n+1. By default a random number in [-1, 1) range.", + :propagation_function => "By default: " + + "lambda { |x| 1/(1+Math.exp(-1*(x))) }", + :derivative_propagation_function => "Derivative of the propagation "+ + "function, based on propagation function output. By default: " + + "lambda { |y| y*(1-y) }, where y=propagation_function(x)", + :learning_rate => "By default 0.25", + :momentum => "By default 0.1. Set this parameter to 0 to disable "+ + "momentum." + + attr_accessor :structure, :weights, :activation_nodes + + # Creates a new network specifying the its architecture. + # E.g. + # + # net = Backpropagation.new([4, 3, 2]) # 4 inputs + # # 1 hidden layer with 3 neurons, + # # 2 outputs + # net = Backpropagation.new([2, 3, 3, 4]) # 2 inputs + # # 2 hidden layer with 3 neurons each, + # # 4 outputs + # net = Backpropagation.new([2, 1]) # 2 inputs + # # No hidden layer + # # 1 output + def initialize(network_structure) + @structure = network_structure + @initial_weight_function = lambda { |n, i, j| ((rand 2000)/1000.0) - 1} + @propagation_function = lambda { |x| 1/(1+Math.exp(-1*(x))) } #lambda { |x| Math.tanh(x) } + @derivative_propagation_function = lambda { |y| y*(1-y) } #lambda { |y| 1.0 - y**2 } + @disable_bias = false + @learning_rate = 0.25 + @momentum = 0.1 end - # Evaluates the input. - # E.g. - # net = Backpropagation.new([4, 3, 2]) - # net.eval([25, 32.3, 12.8, 1.5]) - # # => [0.83, 0.03] - def eval(input) - #check input size - if(input.length != @neurons.first.length) - raise "Wrong input dimension. Expected: #{@neurons.first.length}, received: #{input.length}" - end - #Present input - input.each_index do |input_index| - @neurons.first[input_index].propagate(input[input_index]) - end - #Propagate - @neurons[1..-1].each do |layer| - layer.each {|neuron| neuron.propagate} - end - output = [] - @neurons.last.each { |neuron| output << neuron.state } - return output + # Evaluates the input. + # E.g. + # net = Backpropagation.new([4, 3, 2]) + # net.eval([25, 32.3, 12.8, 1.5]) + # # => [0.83, 0.03] + def eval(input_values) + check_input_dimension(input_values.length) + init_network if !@weights + feedforward(input_values) + return @activation_nodes.last.clone end - + # This method trains the network using the backpropagation algorithm. # # input: Networks input # # output: Expected output for the given input. # - # This method returns the network error (not an absolut amount, - # the difference between real output and the expected output) - def train(input, output) - #check output size - if(output.length != @neurons.last.length) - raise "Wrong output dimension. Expected: #{@neurons.last.length}, received: #{output.length}" + # This method returns the network error: + # => 0.5 * sum( (expected_value[i] - output_value[i])**2 ) + def train(inputs, outputs) + eval(inputs) + backpropagate(outputs) + calculate_error(outputs) + end + + # Initialize (or reset) activation nodes and weights, with the + # provided net structure and parameters. + def init_network + init_activation_nodes + init_weights + init_last_changes + return self + end + + protected + + # Propagate error backwards + def backpropagate(expected_output_values) + check_output_dimension(expected_output_values.length) + calculate_output_deltas(expected_output_values) + calculate_internal_deltas + update_weights + end + + # Propagate values forward + def feedforward(input_values) + input_values.each_index do |input_index| + @activation_nodes.first[input_index] = input_values[input_index] end - #Eval input - eval(input) - #Set expected output - output.each_index do |output_index| - @neurons.last[output_index].expected_output = output[output_index] + @weights.each_index do |n| + @structure[n+1].times do |j| + sum = 0.0 + @activation_nodes[n].each_index do |i| + sum += (@activation_nodes[n][i] * @weights[n][i][j]) + end + @activation_nodes[n+1][j] = @propagation_function.call(sum) + end + end + end + + # Initialize neurons structure. + def init_activation_nodes + @activation_nodes = Array.new(@structure.length) do |n| + Array.new(@structure[n], 1.0) end - #Calculate error - @neurons.reverse.each do |layer| - layer.each {|neuron| neuron.calc_error} + if not disable_bias + @activation_nodes[0...-1].each {|layer| layer << 1.0 } end - #Change weight - @neurons.each do |layer| - layer.each {|neuron| neuron.change_weights } - end - #return net error - return @neurons.last.collect { |x| x.calc_error } end + + # Initialize the weight arrays using function specified with the + # initial_weight_function parameter + def init_weights + @weights = Array.new(@structure.length-1) do |i| + nodes_origin = @activation_nodes[i].length + nodes_target = @structure[i+1] + Array.new(nodes_origin) do |j| + Array.new(nodes_target) do |k| + @initial_weight_function.call(i, j, k) + end + end + end + end - private - def print_weight - @neurons.each_index do |layer_index| - @neurons[layer_index].each_index do |neuron_index| - puts "L #{layer_index} N #{neuron_index} W #{@neurons[layer_index][neuron_index].w.inspect}" + # Momentum usage need to know how much a weight changed in the + # previous training. This method initialize the @last_changes + # structure with 0 values. + def init_last_changes + @last_changes = Array.new(@weights.length) do |w| + Array.new(@weights[w].length) do |i| + Array.new(@weights[w][i].length, 0.0) end end end - - end - - - class Neuron - - attr_accessor :state - attr_accessor :error - attr_accessor :expected_output - attr_accessor :w - attr_accessor :x - - def initialize(childs, threshold, lambda, momentum) - #instance state - @w = nil - @childs = childs - @error = nil - @state = 0 - @pushed = 0 - @last_delta = 0 - @x = 0 - #Parameters - @lambda = lambda - @momentum = momentum - @threshold = threshold - #init w - if(childs) - @w = [] - childs.each { @w << init_weight } + + # Calculate deltas for output layer + def calculate_output_deltas(expected_values) + output_values = @activation_nodes.last + output_deltas = [] + output_values.each_index do |output_index| + error = expected_values[output_index] - output_values[output_index] + output_deltas << @derivative_propagation_function.call( + output_values[output_index]) * error end + @deltas = [output_deltas] end - - def push(x) - @pushed += x - end - - def propagate(input = nil) - if(input) - input = input.to_f - @x = input - @state = input - @childs.each_index do |child_index| - @childs[child_index].push(input * @w[child_index]) - end - else - @x = @pushed + @threshold - @pushed = 0 - @state = Neuron.f(@x) - if @childs - @childs.each_index do |child_index| - @childs[child_index].push(@state * @w[child_index]) + + # Calculate deltas for hidden layers + def calculate_internal_deltas + prev_deltas = @deltas.last + (@activation_nodes.length-2).downto(1) do |layer_index| + layer_deltas = [] + @activation_nodes[layer_index].each_index do |j| + error = 0.0 + @structure[layer_index+1].times do |k| + error += prev_deltas[k] * @weights[layer_index][j][k] end + layer_deltas[j] = (@derivative_propagation_function.call( + @activation_nodes[layer_index][j]) * error) end + prev_deltas = layer_deltas + @deltas.unshift(layer_deltas) end end - - def calc_error - if(!@childs && @expected_output) - @error = (@expected_output - @state) - elsif(@childs) - @error = 0 - @childs.each_index do |child_index| - @error += (@childs[child_index].error * @w[child_index]) + + # Update weights after @deltas have been calculated. + def update_weights + (@weights.length-1).downto(0) do |n| + @weights[n].each_index do |i| + @weights[n][i].each_index do |j| + change = @deltas[n][j]*@activation_nodes[n][i] + @weights[n][i][j] += ( learning_rate * change + + momentum * @last_changes[n][i][j]) + @last_changes[n][i][j] = change + end end end end - - def change_weights - return if !@childs - @childs.each_index do |child_index | - delta = @lambda * @childs[child_index].error * (@state) * Neuron.f_prime(@childs[child_index].x) - @w[child_index] += (delta + @momentum * @last_delta) - @last_delta = delta + + # Calculate quadratic error for a expected output value + # Error = 0.5 * sum( (expected_value[i] - output_value[i])**2 ) + def calculate_error(expected_output) + output_values = @activation_nodes.last + error = 0.0 + expected_output.each_index do |output_index| + error += + 0.5*(output_values[output_index]-expected_output[output_index])**2 end + return error end - - # Propagation function. - # By default: - # f(x) = 1/(1 + e^(-x)) - # You can override it with any derivable function. - # A usually usefull one is: - # f(x) = x. - # If you override this function, you will have to override - # f_prime too. - def self.f(x) - return 1/(1+Math.exp(-1*(x))) + + def check_input_dimension(inputs) + raise ArgumentError, "Wrong number of inputs. " + + "Expected: #{@structure.first}, " + + "received: #{inputs}." if inputs!=@structure.first end - # Derived function of the propagation function (self.f) - # By default: - # f_prime(x) = f(x)(1- f(x)) - # If you override f(x) with: - # f(x) = x. - # Then you must override f_prime as: - # f_prime(x) = 1 - def self.f_prime(x) - val = f(x) - return val*(1-val) + def check_output_dimension(outputs) + raise ArgumentError, "Wrong number of outputs. " + + "Expected: #{@structure.last}, " + + "received: #{outputs}." if outputs!=@structure.last end - - private - def init_weight - rand/4 - end - + end - end - -end \ No newline at end of file +end