lib/anomaly/detector.rb in anomaly-0.0.2 vs lib/anomaly/detector.rb in anomaly-0.0.3

- old
+ new

@@ -1,55 +1,60 @@ module Anomaly class Detector def initialize(data = nil) - @trained = false + @m = 0 train(data) if data end def train(data) if defined?(NMatrix) d = NMatrix.to_na(data) + @n, @m = d.sizes # Convert these to an array for Marshal.dump @mean = d.mean(1).to_a @std = d.stddev(1).to_a else # Default to Array, since built-in Matrix does not give us a big performance advantage. d = data.to_a - cols = d.first.size.times.map{|i| d.map{|r| r[i]}} + @m = d.size + @n = d.first ? d.first.size : 0 + cols = @n.times.map{|i| d.map{|r| r[i]}} @mean = cols.map{|c| mean(c)} @std = cols.each_with_index.map{|c,i| std(c, @mean[i])} end - @std.map!{|std| (std == 0 or std.nan?) ? Float::MIN : std} - - # raise "Standard deviation cannot be zero" if @std.find_index{|i| i == 0 or i.nan?} - - @trained = true end def trained? - @trained + @m > 0 end + def samples + @m + end + + # Limit the probability of features to [0,1] + # to keep probabilities at same scale. def probability(x) raise "Train me first" unless trained? - raise ArgumentError, "x must have #{@mean.size} elements" if x.size != @mean.size - x.each_with_index.map{|a,i| normal_pdf(a, @mean[i], @std[i]) }.reduce(1, :*) + raise ArgumentError, "x must have #{@n} elements" if x.size != @n + @n.times.map do |i| + p = normal_pdf(x[i], @mean[i], @std[i]) + (p.nan? or p > 1) ? 1 : p + end.reduce(1, :*) end def anomaly?(x, epsilon) probability(x) < epsilon end protected SQRT2PI = Math.sqrt(2*Math::PI) - # Return 1 (exclude feature) if std ~ 0 def normal_pdf(x, mean = 0, std = 1) - p = 1.0/(SQRT2PI*std)*Math.exp(-((x - mean)**2/(2.0*(std**2)))) - p.nan? ? 1 : p + 1/(SQRT2PI*std)*Math.exp(-((x - mean)**2/(2.0*(std**2)))) end # Not used for NArray def mean(x)