lib/anomaly/detector.rb in anomaly-0.1.0 vs lib/anomaly/detector.rb in anomaly-0.2.0
- old
+ new
@@ -1,15 +1,20 @@
module Anomaly
class Detector
+ attr_reader :mean, :std
attr_accessor :eps
- def initialize(examples = nil, opts = {})
+ def initialize(examples = nil, **opts)
@m = 0
- train(examples, opts) if examples
+ train(examples, **opts) if examples
end
- def train(examples, opts = {})
+ def train(examples, eps: 0)
+ # for Numo::NArray
+ # TODO make more efficient when possible
+ examples = examples.to_a
+
raise "No examples" if examples.empty?
raise "Must have at least two columns" if examples.first.size < 2
# Divide into groups since we only want to train with non-anomalies.
anomalies = []
@@ -22,41 +27,46 @@
end
end
raise "Must have at least one non-anomaly" if non_anomalies.empty?
- @eps = (opts[:eps] || 0).to_f
+ @eps = eps
if @eps > 0
# Use all non-anomalies to train.
training_examples = non_anomalies
else
training_examples, test_examples = partition!(non_anomalies)
test_examples.concat(anomalies)
end
# Remove last column.
- training_examples = training_examples.map{|e| e[0..-2]}
+ training_examples = training_examples.map { |e| e[0..-2] }
@m = training_examples.size
@n = training_examples.first.size
- if defined?(NMatrix)
+ if defined?(Numo::SFloat)
+ training_examples = Numo::SFloat.cast(training_examples)
+ # Convert these to an Array for Marshal.dump
+ @mean = training_examples.mean(0).to_a
+ @std = training_examples.stddev(0).to_a
+ elsif defined?(NMatrix)
training_examples = NMatrix.to_na(training_examples)
# Convert these to an Array for Marshal.dump
@mean = training_examples.mean(1).to_a
@std = training_examples.stddev(1).to_a
else
# Default to Array, since built-in Matrix does not give us a big performance advantage.
- cols = @n.times.map{|i| training_examples.map{|r| r[i]}}
- @mean = cols.map{|c| mean(c)}
- @std = cols.each_with_index.map{|c,i| std(c, @mean[i])}
+ cols = @n.times.map { |i| training_examples.map { |r| r[i] } }
+ @mean = cols.map { |c| alt_mean(c) }
+ @std = cols.each_with_index.map { |c, i| alt_std(c, @mean[i]) }
end
- @std.map!{|std| (std == 0 or std.nan?) ? Float::MIN : std}
+ @std.map! { |std| (std == 0 || std.nan?) ? Float::MIN : std }
if @eps == 0
# Find the best eps.
- epss = (1..9).map{|i| [1,3,5,7,9].map{|j| (j*10**(-i)).to_f }}.flatten
- f1_scores = epss.map{|eps| [eps, compute_f1_score(test_examples, eps)] }
- @eps, best_f1 = f1_scores.max_by{|v| v[1]}
+ epss = (1..9).map { |i| [1, 3, 5, 7, 9].map { |j| (j * 10**(-i)).to_f } }.flatten
+ f1_scores = epss.map { |eps| [eps, compute_f1_score(test_examples, eps)] }
+ @eps, _ = f1_scores.max_by { |v| v[1] }
end
end
def trained?
@m > 0
@@ -67,24 +77,24 @@
def probability(x)
raise "Train me first" unless trained?
raise ArgumentError, "First argument must have #{@n} elements" if x.size != @n
@n.times.map do |i|
p = normal_pdf(x[i], @mean[i], @std[i])
- (p.nan? or p > 1) ? 1 : p
+ (p.nan? || p > 1) ? 1 : p
end.reduce(1, :*)
end
def anomaly?(x, eps = @eps)
probability(x) < eps
end
protected
- SQRT2PI = Math.sqrt(2*Math::PI)
+ SQRT2PI = Math.sqrt(2 * Math::PI)
def normal_pdf(x, mean = 0, std = 1)
- 1/(SQRT2PI*std)*Math.exp(-((x - mean)**2/(2.0*(std**2))))
+ 1 / (SQRT2PI * std) * Math.exp(-((x - mean)**2 / (2.0 * (std**2))))
end
# Find best eps.
def partition!(examples, p_last = 0.2)
@@ -97,12 +107,12 @@
tp = 0
fp = 0
fn = 0
examples.each do |example|
act = example.last != 0
- pred = self.anomaly?(example[0..-2], eps)
- if act and pred
+ pred = anomaly?(example[0..-2], eps)
+ if act && pred
tp += 1
elsif pred # and !act
fp += 1
elsif act # and !pred
fn += 1
@@ -118,15 +128,14 @@
score.nan? ? 0.0 : score
end
# Not used for NArray
- def mean(x)
- x.inject(0.0){|a, i| a + i}/x.size
+ def alt_mean(x)
+ x.sum / x.size
end
- def std(x, mean)
- Math.sqrt(x.inject(0.0){|a, i| a + (i - mean) ** 2}/(x.size - 1))
+ def alt_std(x, mean)
+ Math.sqrt(x.sum { |i| (i - mean)**2 }.to_f / (x.size - 1))
end
-
end
end