Sha256: 51096a490b64fc1377a8df1c72c761f666ecd683bb10e8589a5ca26ada3ebf7e

Contents?: true

Size: 1.42 KB

Versions: 3

Compression:

Stored size: 1.42 KB

Contents

require 'classifier'

module TWSS

  class Engine

    extend Forwardable

    def_delegators :@classifier, :train, :untrain, :classifications

    DATA_FILE = File.join(File.dirname(__FILE__), '../../data/classifier')

    TRUE = '1'
    FALSE = '0'

    attr_accessor :threshold
    
    def initialize(options = {})
      @data_file = options[:data_file] || DATA_FILE
      @threshold ||= options[:threshold] || 5.0
      @classifier = load_classifier_from_file!(@data_file) || new_classifier
    end

    def classify(str)
      if basic_conditions_met?(str)
        c = @classifier.classifications(str)
        c[TRUE] - c[FALSE] > threshold
      else
        false
      end
    end

    # Dumps the current classifier state to specified path
    def dump_classifier_to_file(f = @data_file)
      o = File.open(f, 'w')
      o.write(Marshal.dump(@classifier))
      o.close
    end

    # Clears out the current classifier instance and nukes the data file
    def clear_state!
      File.delete(@data_file) if File.exists?(@data_file)
      @classifier = new_classifier
    end

    private

    def new_classifier
      Classifier::Bayes.new(TRUE, FALSE)
    end

    def basic_conditions_met?(str)
      str.split(' ').length > 3 # more than 3 words
    end

    # Given a path to a classifier file, load the instance into memory
    def load_classifier_from_file!(f)
      Marshal.load(File.read(f)) rescue nil if File.exists?(f)
    end

  end

end

Version data entries

3 entries across 3 versions & 1 rubygems

Version Path
twss-0.0.3 lib/twss/engine.rb
twss-0.0.2 lib/twss/engine.rb
twss-0.0.1 lib/twss/engine.rb