lib/anomaly/detector.rb in anomaly-0.0.1 vs lib/anomaly/detector.rb in anomaly-0.0.2

- old
+ new

@@ -1,27 +1,40 @@ module Anomaly class Detector - def initialize(data) - # Use NMatrix if possible - if defined?(NMatrix) and (!defined?(Matrix) or !data.is_a?(Matrix)) - d = data.is_a?(NMatrix) ? data : NMatrix.to_na(data) + def initialize(data = nil) + @trained = false + train(data) if data + end + def train(data) + if defined?(NMatrix) + d = NMatrix.to_na(data) # Convert these to an array for Marshal.dump @mean = d.mean(1).to_a @std = d.stddev(1).to_a else - d = data.is_a?(Matrix) ? data : Matrix.rows(data) - cols = d.column_size.times.map{|i| d.column(i)} + # Default to Array, since built-in Matrix does not give us a big performance advantage. + d = data.to_a + cols = d.first.size.times.map{|i| d.map{|r| r[i]}} @mean = cols.map{|c| mean(c)} @std = cols.each_with_index.map{|c,i| std(c, @mean[i])} end - raise "Standard deviation cannot be zero" if @std.find_index{|i| i == 0 or i.nan?} + @std.map!{|std| (std == 0 or std.nan?) ? Float::MIN : std} + + # raise "Standard deviation cannot be zero" if @std.find_index{|i| i == 0 or i.nan?} + + @trained = true end + def trained? + @trained + end + def probability(x) + raise "Train me first" unless trained? raise ArgumentError, "x must have #{@mean.size} elements" if x.size != @mean.size x.each_with_index.map{|a,i| normal_pdf(a, @mean[i], @std[i]) }.reduce(1, :*) end def anomaly?(x, epsilon) @@ -30,11 +43,13 @@ protected SQRT2PI = Math.sqrt(2*Math::PI) + # Return 1 (exclude feature) if std ~ 0 def normal_pdf(x, mean = 0, std = 1) - 1/(SQRT2PI*std)*Math.exp(-((x - mean)**2/(2.0*(std**2)))) + p = 1.0/(SQRT2PI*std)*Math.exp(-((x - mean)**2/(2.0*(std**2)))) + p.nan? ? 1 : p end # Not used for NArray def mean(x)