lib/omnicat/classifiers/strategy.rb in omnicat-0.2.2 vs lib/omnicat/classifiers/strategy.rb in omnicat-0.3.0

- old
+ new

@@ -7,116 +7,116 @@ # Copyright:: Copyright (c) 2013 Mustafa Turan # License:: MIT # # The class supplies abstract methods for possible text classifiers class Strategy < ::OmniCat::Base - attr_accessor :categories # ::OmniCat::Hash - Hash of categories + attr_accessor :categories # Hash - Hash of categories attr_accessor :category_count # Integer - Total category count - attr_accessor :category_size_limit # Integer - Max allowed category - attr_accessor :doc_count # Integer - Total token count + attr_accessor :category_size_limit # Integer - Max allowed category size + attr_accessor :doc_count # Integer - Total doc count attr_accessor :token_count # Integer - Total token count - attr_accessor :uniq_token_count # Integer - Total uniq token count + attr_accessor :unique_token_count # Integer - Total uniq token count def initialize(strategy_hash = {}) @categories = {} @category_count = strategy_hash[:category_count].to_i @category_size_limit = strategy_hash[:category_size_limit].to_i @doc_count = strategy_hash[:doc_count].to_i @token_count = strategy_hash[:token_count].to_i - @uniq_token_count = strategy_hash[:uniq_token_count].to_i + @unique_token_count = strategy_hash[:unique_token_count].to_i end # Abstract method for adding new classification category # # ==== Parameters # - # * +name+ - Name for category + # * +category_name+ - Name for category # - def add_category(name) + def add_category(category_name) not_implemented_error(__callee__) end # Allows adding multiple classification categories # # ==== Parameters # - # * +names+ - Array of categories + # * +category_names+ - Array of categories # - def add_categories(names) - names.each { |name| add_category(name) } + def add_categories(category_names) + category_names.each { |category_name| add_category(category_name) } end # Abstract method for training the desired category with a document # # ==== Parameters # - # * +category+ - Name of the category from added categories list - # * +doc+ - Document text + # * +category_name+ - Name of the category from added categories list + # * +doc_content+ - Document text # - def train(category_name, doc) + def train(category_name, doc_content) not_implemented_error(__callee__) end # Train the desired category with multiple documents # # ==== Parameters # - # * +category+ - Name of the category from added categories list - # * +docs+ - Array of documents + # * +category_name+ - Name of the category from added categories list + # * +doc_contents+ - Array of documents # - def train_batch(category, docs) - docs.each { |doc| train(category, doc) } + def train_batch(category_name, doc_contents) + doc_contents.each { |doc_content| train(category_name, doc_content) } end # Abstract method for untraining the desired category with a document # # ==== Parameters # - # * +category+ - Name of the category from added categories list - # * +doc+ - Document text + # * +category_name+ - Name of the category from added categories list + # * +doc_content+ - Document text # - def untrain(category_name, doc) + def untrain(category_name, doc_content) not_implemented_error(__callee__) end # Untrain the desired category with multiple documents # # ==== Parameters # - # * +category+ - Name of the category from added categories list - # * +docs+ - Array of documents + # * +category_name+ - Name of the category from added categories list + # * +doc_contents+ - Array of documents # - def untrain_batch(category, docs) - docs.each { |doc| untrain(category, doc) } + def untrain_batch(category_name, doc_contents) + doc_contents.each { |doc_content| untrain(category_name, doc_content) } end # Abstract method for classifying the given document # # ==== Parameters # - # * +doc+ - The document for classification + # * +doc_content+ - The document for classification # # ==== Returns # # * +result+ - OmniCat::Result object # - def classify(doc) + def classify(doc_content) not_implemented_error(__callee__) end # Classify the multiple documents at a time # # ==== Parameters # - # * +docs+ - Array of documents + # * +doc_contents+ - Array of documents # # ==== Returns # # * +result_set+ - Array of OmniCat::Result objects # - def classify_batch(docs) - docs.collect { |doc| classify(doc) } + def classify_batch(doc_contents) + doc_contents.collect { |doc_content| classify(doc_content) } end private # nodoc def not_implemented_error(method_name) @@ -155,20 +155,20 @@ def classifiable? if category_count < 2 raise StandardError, 'At least 2 categories needed for classification process!' false - elsif doc_avability? == false + elsif doc_availability? == false raise StandardError, 'Each category must trained with at least one document!' false else true end end # nodoc - def doc_avability? + def doc_availability? @categories.each do |_, category| return false if category.doc_count == 0 end true end \ No newline at end of file