lib/omnicat/classifiers/strategy.rb in omnicat-0.2.2 vs lib/omnicat/classifiers/strategy.rb in omnicat-0.3.0
- old
+ new
@@ -7,116 +7,116 @@
# Copyright:: Copyright (c) 2013 Mustafa Turan
# License:: MIT
#
# The class supplies abstract methods for possible text classifiers
class Strategy < ::OmniCat::Base
- attr_accessor :categories # ::OmniCat::Hash - Hash of categories
+ attr_accessor :categories # Hash - Hash of categories
attr_accessor :category_count # Integer - Total category count
- attr_accessor :category_size_limit # Integer - Max allowed category
- attr_accessor :doc_count # Integer - Total token count
+ attr_accessor :category_size_limit # Integer - Max allowed category size
+ attr_accessor :doc_count # Integer - Total doc count
attr_accessor :token_count # Integer - Total token count
- attr_accessor :uniq_token_count # Integer - Total uniq token count
+ attr_accessor :unique_token_count # Integer - Total uniq token count
def initialize(strategy_hash = {})
@categories = {}
@category_count = strategy_hash[:category_count].to_i
@category_size_limit = strategy_hash[:category_size_limit].to_i
@doc_count = strategy_hash[:doc_count].to_i
@token_count = strategy_hash[:token_count].to_i
- @uniq_token_count = strategy_hash[:uniq_token_count].to_i
+ @unique_token_count = strategy_hash[:unique_token_count].to_i
end
# Abstract method for adding new classification category
#
# ==== Parameters
#
- # * +name+ - Name for category
+ # * +category_name+ - Name for category
#
- def add_category(name)
+ def add_category(category_name)
not_implemented_error(__callee__)
end
# Allows adding multiple classification categories
#
# ==== Parameters
#
- # * +names+ - Array of categories
+ # * +category_names+ - Array of categories
#
- def add_categories(names)
- names.each { |name| add_category(name) }
+ def add_categories(category_names)
+ category_names.each { |category_name| add_category(category_name) }
end
# Abstract method for training the desired category with a document
#
# ==== Parameters
#
- # * +category+ - Name of the category from added categories list
- # * +doc+ - Document text
+ # * +category_name+ - Name of the category from added categories list
+ # * +doc_content+ - Document text
#
- def train(category_name, doc)
+ def train(category_name, doc_content)
not_implemented_error(__callee__)
end
# Train the desired category with multiple documents
#
# ==== Parameters
#
- # * +category+ - Name of the category from added categories list
- # * +docs+ - Array of documents
+ # * +category_name+ - Name of the category from added categories list
+ # * +doc_contents+ - Array of documents
#
- def train_batch(category, docs)
- docs.each { |doc| train(category, doc) }
+ def train_batch(category_name, doc_contents)
+ doc_contents.each { |doc_content| train(category_name, doc_content) }
end
# Abstract method for untraining the desired category with a document
#
# ==== Parameters
#
- # * +category+ - Name of the category from added categories list
- # * +doc+ - Document text
+ # * +category_name+ - Name of the category from added categories list
+ # * +doc_content+ - Document text
#
- def untrain(category_name, doc)
+ def untrain(category_name, doc_content)
not_implemented_error(__callee__)
end
# Untrain the desired category with multiple documents
#
# ==== Parameters
#
- # * +category+ - Name of the category from added categories list
- # * +docs+ - Array of documents
+ # * +category_name+ - Name of the category from added categories list
+ # * +doc_contents+ - Array of documents
#
- def untrain_batch(category, docs)
- docs.each { |doc| untrain(category, doc) }
+ def untrain_batch(category_name, doc_contents)
+ doc_contents.each { |doc_content| untrain(category_name, doc_content) }
end
# Abstract method for classifying the given document
#
# ==== Parameters
#
- # * +doc+ - The document for classification
+ # * +doc_content+ - The document for classification
#
# ==== Returns
#
# * +result+ - OmniCat::Result object
#
- def classify(doc)
+ def classify(doc_content)
not_implemented_error(__callee__)
end
# Classify the multiple documents at a time
#
# ==== Parameters
#
- # * +docs+ - Array of documents
+ # * +doc_contents+ - Array of documents
#
# ==== Returns
#
# * +result_set+ - Array of OmniCat::Result objects
#
- def classify_batch(docs)
- docs.collect { |doc| classify(doc) }
+ def classify_batch(doc_contents)
+ doc_contents.collect { |doc_content| classify(doc_content) }
end
private
# nodoc
def not_implemented_error(method_name)
@@ -155,20 +155,20 @@
def classifiable?
if category_count < 2
raise StandardError,
'At least 2 categories needed for classification process!'
false
- elsif doc_avability? == false
+ elsif doc_availability? == false
raise StandardError,
'Each category must trained with at least one document!'
false
else
true
end
end
# nodoc
- def doc_avability?
+ def doc_availability?
@categories.each do |_, category|
return false if category.doc_count == 0
end
true
end
\ No newline at end of file