module TensorStream module Train # High Level implementation of the gradient descent algorithm class AdamOptimizer < Optimizer include TensorStream::OpHelper attr_accessor :learning_rate def initialize(learning_rate = 0.001, beta1=0.9, beta2=0.999, epsilon = 1e-8, use_locking: false, name: "Adam") @learning_rate = learning_rate @beta1 = beta1 @beta2 = beta2 @epsilon = epsilon # Tensor versions of the constructor arguments, created in _prepare(). @lr_t = nil @beta1_t = nil @beta2_t = nil @epsilon_t = nil # Created in SparseApply if needed. @updated_lr = nil super(name: name, use_locking: use_locking) end protected def get_beta_accumulators graph = TensorStream.get_default_graph [ get_non_slot_variable("beta1_power", graph: graph), get_non_slot_variable("beta2_power", graph: graph)] end def prepare lr = call_if_callable(@learning_rate) beta1 = call_if_callable(@beta1) beta2 = call_if_callable(@beta2) epsilon = call_if_callable(@epsilon) @lr_t = TensorStream.convert_to_tensor(lr, name: "learning_rate") @beta1_t = TensorStream.convert_to_tensor(beta1, name: "beta1") @beta2_t = TensorStream.convert_to_tensor(beta2, name: "beta2") @epsilon_t = TensorStream.convert_to_tensor(epsilon, name: "epsilon") end def create_slots(var_list) first_var = var_list.min_by(&:name) create_non_slot_variable(@beta1, "beta1_power", first_var) create_non_slot_variable(@beta2, "beta2_power", first_var) # Create slots for the first and second moments. var_list.each do |v| zeros_slot(v, "m", @name) zeros_slot(v, "v", @name) end end def apply_dense(grad, var) m = get_slot(var, "m") v = get_slot(var, "v") beta1_power, beta2_power = get_beta_accumulators _op(:apply_adam, var, m, v, TensorStream.cast(beta1_power, var.data_type), TensorStream.cast(beta2_power, var.data_type), TensorStream.cast(@lr_t, var.data_type), TensorStream.cast(@beta1_t, var.data_type), TensorStream.cast(@beta2_t, var.data_type), TensorStream.cast(@epsilon_t, var.data_type), grad, use_locking: @use_locking) end def finish(update_ops, name_scope) TensorStream.control_dependencies(update_ops) do beta1_power, beta2_power = get_beta_accumulators update_beta1 = nil, update_beta2 = nil TensorStream.colocate_with(beta1_power) do update_beta1 = beta1_power.assign(beta1_power * @beta1_t, use_locking: @use_locking) update_beta2 = beta2_power.assign(beta2_power * @beta2_t, use_locking: @use_locking) end TensorStream.group(update_ops + [update_beta1, update_beta2], name: name_scope) end end end end end