Sha256: 79633dd0c685666e9f81aba582d51f7d028ceef5414478a81b182b56416f19f2

Contents?: true

Size: 1.57 KB

Versions: 1

Compression:

Stored size: 1.57 KB

Contents

module Anomaly
  class Detector

    def initialize(data = nil)
      @trained = false
      train(data) if data
    end

    def train(data)
      if defined?(NMatrix)
        d = NMatrix.to_na(data)
        # Convert these to an array for Marshal.dump
        @mean = d.mean(1).to_a
        @std = d.stddev(1).to_a
      else
        # Default to Array, since built-in Matrix does not give us a big performance advantage.
        d = data.to_a
        cols = d.first.size.times.map{|i| d.map{|r| r[i]}}
        @mean = cols.map{|c| mean(c)}
        @std = cols.each_with_index.map{|c,i| std(c, @mean[i])}
      end

      @std.map!{|std| (std == 0 or std.nan?) ? Float::MIN : std}

      # raise "Standard deviation cannot be zero" if @std.find_index{|i| i == 0 or i.nan?}

      @trained = true
    end

    def trained?
      @trained
    end

    def probability(x)
      raise "Train me first" unless trained?
      raise ArgumentError, "x must have #{@mean.size} elements" if x.size != @mean.size
      x.each_with_index.map{|a,i| normal_pdf(a, @mean[i], @std[i]) }.reduce(1, :*)
    end

    def anomaly?(x, epsilon)
      probability(x) < epsilon
    end

    protected

    SQRT2PI = Math.sqrt(2*Math::PI)

    # Return 1 (exclude feature) if std ~ 0
    def normal_pdf(x, mean = 0, std = 1)
      p = 1.0/(SQRT2PI*std)*Math.exp(-((x - mean)**2/(2.0*(std**2))))
      p.nan? ? 1 : p
    end

    # Not used for NArray

    def mean(x)
      x.inject(0.0){|a, i| a + i}/x.size
    end

    def std(x, mean)
      Math.sqrt(x.inject(0.0){|a, i| a + (i - mean) ** 2}/(x.size - 1))
    end

  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
anomaly-0.0.2 lib/anomaly/detector.rb