require 'omnicat'

module OmniCat
  module Classifiers
    #
    # Author::    Mustafa Turan (mailto:mustafaturan.net@gmail.com)
    # Copyright:: Copyright (c) 2013 Mustafa Turan
    # License::   MIT
    #
    # The class supplies abstract methods for possible text classifiers
    class Strategy < ::OmniCat::Base
      attr_accessor :categories # Hash - Hash of categories
      attr_accessor :category_count # Integer - Total category count
      attr_accessor :category_size_limit # Integer - Max allowed category size
      attr_accessor :doc_count # Integer - Total doc count
      attr_accessor :token_count # Integer - Total token count
      attr_accessor :unique_token_count # Integer - Total uniq token count

      def initialize(strategy_hash = {})
        @categories = {}
        @category_count = strategy_hash[:category_count].to_i
        @category_size_limit = strategy_hash[:category_size_limit].to_i
        @doc_count = strategy_hash[:doc_count].to_i
        @token_count = strategy_hash[:token_count].to_i
        @unique_token_count = strategy_hash[:unique_token_count].to_i
      end

      # Abstract method for adding new classification category
      #
      # ==== Parameters
      #
      # * +category_name+ - Name for category
      #
      def add_category(category_name)
        not_implemented_error(__callee__)
      end

      # Allows adding multiple classification categories
      #
      # ==== Parameters
      #
      # * +category_names+ - Array of categories
      #
      def add_categories(category_names)
        category_names.each { |category_name| add_category(category_name) }
      end

      # Abstract method for training the desired category with a document
      #
      # ==== Parameters
      #
      # * +category_name+ - Name of the category from added categories list
      # * +doc_content+ - Document text
      #
      def train(category_name, doc_content)
        not_implemented_error(__callee__)
      end

      # Train the desired category with multiple documents
      #
      # ==== Parameters
      #
      # * +category_name+ - Name of the category from added categories list
      # * +doc_contents+ - Array of documents
      #
      def train_batch(category_name, doc_contents)
        doc_contents.each { |doc_content| train(category_name, doc_content) }
      end

      # Abstract method for untraining the desired category with a document
      #
      # ==== Parameters
      #
      # * +category_name+ - Name of the category from added categories list
      # * +doc_content+ - Document text
      #
      def untrain(category_name, doc_content)
        not_implemented_error(__callee__)
      end

      # Untrain the desired category with multiple documents
      #
      # ==== Parameters
      #
      # * +category_name+ - Name of the category from added categories list
      # * +doc_contents+ - Array of documents
      #
      def untrain_batch(category_name, doc_contents)
        doc_contents.each { |doc_content| untrain(category_name, doc_content) }
      end

      # Abstract method for classifying the given document
      #
      # ==== Parameters
      #
      # * +doc_content+ - The document for classification
      #
      # ==== Returns
      #
      # * +result+ - OmniCat::Result object
      #
      def classify(doc_content)
        not_implemented_error(__callee__)
      end

      # Classify the multiple documents at a time
      #
      # ==== Parameters
      #
      # * +doc_contents+ - Array of documents
      #
      # ==== Returns
      #
      # * +result_set+ - Array of OmniCat::Result objects
      #
      def classify_batch(doc_contents)
        doc_contents.collect { |doc_content| classify(doc_content) }
      end

      private
        # nodoc
        def not_implemented_error(method_name)
          raise NotImplementedError.new("#{self.class.name}##{method_name} method is not implemented!")
        end

      protected
        # nodoc
        def category_exists?(category_name)
          categories.has_key?(category_name)
        end

        # nodoc
        def increment_category_count
          @category_count += 1
        end

        # nodoc
        def decrement_category_count
          @category_count -= 1
        end

        # nodoc
        def increment_doc_counts(category_name)
          @doc_count += 1
          @categories[category_name].doc_count += 1
        end

        # nodoc
        def decrement_doc_counts(category_name)
          @doc_count -= 1
          @categories[category_name].doc_count -= 1
        end

        # nodoc
        def classifiable?
          if category_count < 2
            raise StandardError,
                  'At least 2 categories needed for classification process!'
            false
          elsif doc_availability? == false
            raise StandardError,
                  'Each category must trained with at least one document!'
            false
          else
            true
          end
        end

        # nodoc
        def doc_availability?
          @categories.each do |_, category|
            return false if category.doc_count == 0
          end
          true
        end
    end
  end
end