Sha256: bd571ef68c47a0c1d57026df98d47cc57e870e10d943a8e2ae9d54d883ea1b4b

Contents?: true

Size: 1.24 KB

Versions: 1

Compression:

Stored size: 1.24 KB

Contents

# encoding: UTF-8
require 'digest'
require File.dirname(__FILE__) + '/base'

module OmniCat
  class Doc < ::OmniCat::Base
    attr_reader :content, :content_md5, :count, :tokens

    def initialize(doc_hash = {})
      @auto_classified = doc_hash[:auto_classified] || false
      @content = doc_hash[:content]
      @content_md5 = doc_hash[:content_md5] || Digest::MD5.hexdigest("#{@content}")
      @count = (doc_hash[:count] || 1).to_i
      @tokens = tokenize_with_counts unless @tokens.is_a?(Hash)
    end

    def increment_count
      @count += 1
    end

    def decrement_count
      @count -= 1 if @count > 0
    end

    private
      # nodoc
      def minus_tokens
        body = @content
        OmniCat.config.token_patterns[:minus].each { |p| body.gsub!(p, ' ') }
        body
      end

      # nodoc
      def plus_tokens(body)
        body_tokens = []
        OmniCat.config.token_patterns[:plus].each { |p| body_tokens += body.scan(p) }
        body_tokens
      end

      # nodoc
      def exclude_tokens
        OmniCat.config.exclude_tokens
      end

      # nodoc
      def tokenize_with_counts
        tokenize.hashify_with_counts
      end

      # nodoc
      def tokenize
        plus_tokens(minus_tokens) - exclude_tokens
      end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
omnicat-0.3.0 lib/omnicat/doc.rb