lib/anomaly/detector.rb in anomaly-0.2.0 vs lib/anomaly/detector.rb in anomaly-0.2.1

- old
+ new

@@ -56,11 +56,11 @@ # Default to Array, since built-in Matrix does not give us a big performance advantage. cols = @n.times.map { |i| training_examples.map { |r| r[i] } } @mean = cols.map { |c| alt_mean(c) } @std = cols.each_with_index.map { |c, i| alt_std(c, @mean[i]) } end - @std.map! { |std| (std == 0 || std.nan?) ? Float::MIN : std } + @std.map! { |std| (std == 0 || std.nan?) ? 1e-10 : std } if @eps == 0 # Find the best eps. epss = (1..9).map { |i| [1, 3, 5, 7, 9].map { |j| (j * 10**(-i)).to_f } }.flatten f1_scores = epss.map { |eps| [eps, compute_f1_score(test_examples, eps)] } @@ -72,20 +72,39 @@ @m > 0 end # Limit the probability of features to [0,1] # to keep probabilities at same scale. + # Use log to prevent underflow def probability(x) raise "Train me first" unless trained? - raise ArgumentError, "First argument must have #{@n} elements" if x.size != @n - @n.times.map do |i| - p = normal_pdf(x[i], @mean[i], @std[i]) - (p.nan? || p > 1) ? 1 : p - end.reduce(1, :*) + + singular = !x.first.is_a?(Array) + x = [x] if singular + + y = + x.map do |xi| + prob = 0 + @n.times.map do |i| + pi = normal_pdf(xi[i], @mean[i], @std[i]) + prob += Math.log(pi > 1 ? 1 : pi) + end + Math.exp(prob) + end + + singular ? y.first : y end def anomaly?(x, eps = @eps) - probability(x) < eps + y = probability(x) + + if y.is_a?(Array) + y.map do |yi| + yi < eps + end + else + y < eps + end end protected SQRT2PI = Math.sqrt(2 * Math::PI)