Module Ankusa::Classifier
In: lib/ankusa/classifier.rb

Methods

Attributes

classnames  [R] 

Public Class methods

[Source]

# File lib/ankusa/classifier.rb, line 6
    def initialize(storage)
      @storage = storage
      @storage.init_tables
      @classnames = @storage.classnames
    end

Public Instance methods

text can be either an array of strings or a string klass is a symbol

[Source]

# File lib/ankusa/classifier.rb, line 14
    def train(klass, text)
      th = TextHash.new(text)
      th.each { |word, count|
        @storage.incr_word_count klass, word, count
        yield word, count if block_given?
      }
      @storage.incr_total_word_count klass, th.word_count
      doccount = (text.kind_of? Array) ? text.length : 1
      @storage.incr_doc_count klass, doccount
      @classnames << klass if not @classnames.include? klass
      # cache is now dirty of these vars
      @doc_count_totals = nil
      @vocab_sizes = nil
      th
    end

text can be either an array of strings or a string klass is a symbol

[Source]

# File lib/ankusa/classifier.rb, line 32
    def untrain(klass, text)
      th = TextHash.new(text)
      th.each { |word, count|
        @storage.incr_word_count klass, word, -count
        yield word, count if block_given?
      }
      @storage.incr_total_word_count klass, -th.word_count
      doccount = (text.kind_of? Array) ? text.length : 1
      @storage.incr_doc_count klass, -doccount
      # cache is now dirty of these vars
      @doc_count_totals = nil
      @vocab_sizes = nil
      th
    end

Protected Instance methods

[Source]

# File lib/ankusa/classifier.rb, line 59
    def doc_count_totals
      @doc_count_totals ||= @storage.doc_count_totals
    end

[Source]

# File lib/ankusa/classifier.rb, line 48
    def get_word_probs(word, classnames)
      probs = Hash.new 0
      @storage.get_word_counts(word).each { |k,v| probs[k] = v if classnames.include? k }
      vs = vocab_sizes
      classnames.each { |cn| 
        # use a laplacian smoother
        probs[cn] = (probs[cn] + 1).to_f / (@storage.get_total_word_count(cn) + vs[cn]).to_f
      }
      probs
    end

[Source]

# File lib/ankusa/classifier.rb, line 63
    def vocab_sizes
      @vocab_sizes ||= @storage.get_vocabulary_sizes
    end

[Validate]