Parent

Files

Ankusa::TextHash

Attributes

word_count[R]

Public Class Methods

atomize(text) click to toggle source
# File lib/ankusa/hasher.rb, line 15
def self.atomize(text)
  text.downcase.to_ascii.tr('-', ' ').gsub(/[^\w\s]/," ").split
end
new(text=nil) click to toggle source
# File lib/ankusa/hasher.rb, line 9
def initialize(text=nil)
  super 0
  @word_count = 0
  add_text(text) unless text.nil?
end
valid_word?(word) click to toggle source

word should be only alphanum chars at this point

# File lib/ankusa/hasher.rb, line 20
def self.valid_word?(word)
  return true unless Ankusa::STOPWORDS.include? word || word.length < 3 || word.numeric?
end

Public Instance Methods

add_text(text) click to toggle source
# File lib/ankusa/hasher.rb, line 24
def add_text(text)
  if text.instance_of? Array
    text.each { |t| add_text t }
  else
    # replace dashes with spaces, then get rid of non-word/non-space characters, 
    # then split by space to get words
    words = TextHash.atomize text
    words.each { |word| add_word(word) if TextHash.valid_word?(word) }
  end
  self
end

Protected Instance Methods

add_word(word) click to toggle source
# File lib/ankusa/hasher.rb, line 38
def add_word(word)
  @word_count += 1
  key = word.stem.intern
  store key, fetch(key, 0)+1
end

[Validate]

Generated with the Darkfish Rdoc Generator 2.