lib/anomaly/detector.rb in anomaly-0.2.0 vs lib/anomaly/detector.rb in anomaly-0.2.1
- old
+ new
@@ -56,11 +56,11 @@
# Default to Array, since built-in Matrix does not give us a big performance advantage.
cols = @n.times.map { |i| training_examples.map { |r| r[i] } }
@mean = cols.map { |c| alt_mean(c) }
@std = cols.each_with_index.map { |c, i| alt_std(c, @mean[i]) }
end
- @std.map! { |std| (std == 0 || std.nan?) ? Float::MIN : std }
+ @std.map! { |std| (std == 0 || std.nan?) ? 1e-10 : std }
if @eps == 0
# Find the best eps.
epss = (1..9).map { |i| [1, 3, 5, 7, 9].map { |j| (j * 10**(-i)).to_f } }.flatten
f1_scores = epss.map { |eps| [eps, compute_f1_score(test_examples, eps)] }
@@ -72,20 +72,39 @@
@m > 0
end
# Limit the probability of features to [0,1]
# to keep probabilities at same scale.
+ # Use log to prevent underflow
def probability(x)
raise "Train me first" unless trained?
- raise ArgumentError, "First argument must have #{@n} elements" if x.size != @n
- @n.times.map do |i|
- p = normal_pdf(x[i], @mean[i], @std[i])
- (p.nan? || p > 1) ? 1 : p
- end.reduce(1, :*)
+
+ singular = !x.first.is_a?(Array)
+ x = [x] if singular
+
+ y =
+ x.map do |xi|
+ prob = 0
+ @n.times.map do |i|
+ pi = normal_pdf(xi[i], @mean[i], @std[i])
+ prob += Math.log(pi > 1 ? 1 : pi)
+ end
+ Math.exp(prob)
+ end
+
+ singular ? y.first : y
end
def anomaly?(x, eps = @eps)
- probability(x) < eps
+ y = probability(x)
+
+ if y.is_a?(Array)
+ y.map do |yi|
+ yi < eps
+ end
+ else
+ y < eps
+ end
end
protected
SQRT2PI = Math.sqrt(2 * Math::PI)