lib/anomaly/detector.rb in anomaly-0.0.2 vs lib/anomaly/detector.rb in anomaly-0.0.3
- old
+ new
@@ -1,55 +1,60 @@
module Anomaly
class Detector
def initialize(data = nil)
- @trained = false
+ @m = 0
train(data) if data
end
def train(data)
if defined?(NMatrix)
d = NMatrix.to_na(data)
+ @n, @m = d.sizes
# Convert these to an array for Marshal.dump
@mean = d.mean(1).to_a
@std = d.stddev(1).to_a
else
# Default to Array, since built-in Matrix does not give us a big performance advantage.
d = data.to_a
- cols = d.first.size.times.map{|i| d.map{|r| r[i]}}
+ @m = d.size
+ @n = d.first ? d.first.size : 0
+ cols = @n.times.map{|i| d.map{|r| r[i]}}
@mean = cols.map{|c| mean(c)}
@std = cols.each_with_index.map{|c,i| std(c, @mean[i])}
end
-
@std.map!{|std| (std == 0 or std.nan?) ? Float::MIN : std}
-
- # raise "Standard deviation cannot be zero" if @std.find_index{|i| i == 0 or i.nan?}
-
- @trained = true
end
def trained?
- @trained
+ @m > 0
end
+ def samples
+ @m
+ end
+
+ # Limit the probability of features to [0,1]
+ # to keep probabilities at same scale.
def probability(x)
raise "Train me first" unless trained?
- raise ArgumentError, "x must have #{@mean.size} elements" if x.size != @mean.size
- x.each_with_index.map{|a,i| normal_pdf(a, @mean[i], @std[i]) }.reduce(1, :*)
+ raise ArgumentError, "x must have #{@n} elements" if x.size != @n
+ @n.times.map do |i|
+ p = normal_pdf(x[i], @mean[i], @std[i])
+ (p.nan? or p > 1) ? 1 : p
+ end.reduce(1, :*)
end
def anomaly?(x, epsilon)
probability(x) < epsilon
end
protected
SQRT2PI = Math.sqrt(2*Math::PI)
- # Return 1 (exclude feature) if std ~ 0
def normal_pdf(x, mean = 0, std = 1)
- p = 1.0/(SQRT2PI*std)*Math.exp(-((x - mean)**2/(2.0*(std**2))))
- p.nan? ? 1 : p
+ 1/(SQRT2PI*std)*Math.exp(-((x - mean)**2/(2.0*(std**2))))
end
# Not used for NArray
def mean(x)