detector.rb in anomaly-0.2.1

- old
+ new

@@ -56,11 +56,11 @@
         # Default to Array, since built-in Matrix does not give us a big performance advantage.
         cols = @n.times.map { |i| training_examples.map { |r| r[i] } }
         @mean = cols.map { |c| alt_mean(c) }
         @std = cols.each_with_index.map { |c, i| alt_std(c, @mean[i]) }
       end
-      @std.map! { |std| (std == 0 || std.nan?) ? Float::MIN : std }
+      @std.map! { |std| (std == 0 || std.nan?) ? 1e-10 : std }
 
       if @eps == 0
         # Find the best eps.
         epss = (1..9).map { |i| [1, 3, 5, 7, 9].map { |j| (j * 10**(-i)).to_f } }.flatten
         f1_scores = epss.map { |eps| [eps, compute_f1_score(test_examples, eps)] }
@@ -72,20 +72,39 @@
       @m > 0
     end
 
     # Limit the probability of features to [0,1]
     # to keep probabilities at same scale.
+    # Use log to prevent underflow
     def probability(x)
       raise "Train me first" unless trained?
-      raise ArgumentError, "First argument must have #{@n} elements" if x.size != @n
-      @n.times.map do |i|
-        p = normal_pdf(x[i], @mean[i], @std[i])
-        (p.nan? || p > 1) ? 1 : p
-      end.reduce(1, :*)
+
+      singular = !x.first.is_a?(Array)
+      x = [x] if singular
+
+      y =
+        x.map do |xi|
+          prob = 0
+          @n.times.map do |i|
+            pi = normal_pdf(xi[i], @mean[i], @std[i])
+            prob += Math.log(pi > 1 ? 1 : pi)
+          end
+          Math.exp(prob)
+        end
+
+      singular ? y.first : y
     end
 
     def anomaly?(x, eps = @eps)
-      probability(x) < eps
+      y = probability(x)
+
+      if y.is_a?(Array)
+        y.map do |yi|
+          yi < eps
+        end
+      else
+        y < eps
+      end
     end
 
     protected
 
     SQRT2PI = Math.sqrt(2 * Math::PI)