lib/torch/optim/adadelta.rb in torch-rb-0.4.1 vs lib/torch/optim/adadelta.rb in torch-rb-0.4.2
- old
+ new
@@ -37,10 +37,10 @@
rho, eps = group[:rho], group[:eps]
state[:step] += 1
if group[:weight_decay] != 0
- grad = grad.add(group[:weight_decay], p.data)
+ grad = grad.add(p.data, alpha: group[:weight_decay])
end
square_avg.mul!(rho).addcmul!(1 - rho, grad, grad)
std = square_avg.add(eps).sqrt!
delta = acc_delta.add(eps).sqrt!.div!(std).mul!(grad)