module BackProp class Value def self.wrap(other) other.is_a?(Value) ? other : Value.new(other) end attr_reader :children attr_accessor :value, :label, :gradient, :backstep, :op def initialize(float, label: '', op: nil, children: []) @value = float.to_f @gradient = 0 @children = children if @children.empty? raise "op #{op.inspect} has no children" unless op.nil? else raise "op is required" if op.nil? end @op = op @label = label @backstep = -> {} end def to_s @label.empty? ? ("%.3f" % @value) : format("%s=%.3f", @label, @value) end def display format("%s(%.3f gradient=%.3f", @label.empty? ? @op || 'Value' : @label, @value, @gradient) + (@op.nil? ? '' : format(" %s(%s)", @op, @children.join(', '))) + ')' end def inspect @children.empty? ? self.display : [self.display, @children.map(&:inspect).join("\n\t")].join("\n\t") end # # Primary operations; notice every Value.new(op:) also defines a backstep # The backstep closes over the environment of the method so it can # refer to values present when the method executes # def +(other) other = Value.wrap(other) val = Value.new(@value + other.value, children: [self, other], op: :+) val.backstep = -> { # gradients accumulate to handle a value used multiple times self.gradient += val.gradient other.gradient += val.gradient } val end def *(other) other = Value.wrap(other) val = Value.new(@value * other.value, children: [self, other], op: :*) val.backstep = -> { self.gradient += val.gradient * other.value other.gradient += val.gradient * self.value } val end # Mostly we are squaring(2) or dividing(-1) def **(other) raise("Value is not supported") if other.is_a? Value val = Value.new(@value ** other, children: [self], op: :**) val.backstep = -> { self.gradient += val.gradient * (other * self.value ** (other - 1)) } val end def exp val = Value.new(Math.exp(@value), children: [self], op: :exp) val.backstep = -> { self.gradient += val.gradient * val.value } val end # # Secondary operations defined in terms of primary # def -(other) self + (Value.wrap(other) * Value.new(-1)) end def /(other) self * (Value.wrap(other) ** -1) end # # Activation functions # def tanh val = Value.new(Math.tanh(@value), children: [self], op: :tanh) val.backstep = -> { self.gradient += val.gradient * (1 - val.value ** 2) } val end # 1 / 1 + e^-x def sigmoid ((self * -1).exp + 1) ** -1 end # rectified linear unit; not susceptible to vanishing gradient like above def relu neg = @value < 0 val = Value.new(neg ? 0 : @value, children: [self], op: :relu) val.backstep = -> { self.gradient += val.gradient * (neg ? 0 : 1) } val end # # Backward propagation # def backward self.reset_gradient @gradient = 1.0 self.backprop end def reset_gradient @gradient = 0.0 @children.each(&:reset_gradient) self end def backprop self.backstep.call @children.each(&:backprop) self end end end