Sha256: 1be2225c0e88ceecc33e97bcf8d8f2f16cff81d2c32e591cd10766e3369fc799
Contents?: true
Size: 1.35 KB
Versions: 3
Compression:
Stored size: 1.35 KB
Contents
module BowTfidf class BagOfWords attr_reader :words, :categories def initialize @words = {} @categories = {} end def add_labeled_data!(data) validate_labeled_data(data) data.each do |category_key, category_words| category = category_by_key(category_key) category_words.each do |word| add_word(word, category) end end compute_tfidf end private def validate_labeled_data(data) raise(ArgumentError, 'Hash with arrays expected') unless data.is_a?(Hash) data.values.each do |array| raise(ArgumentError, 'Hash with arrays expected') unless array.is_a?(Enumerable) raise(ArgumentError, 'Hash with arrays of strings expected') unless array.all? { |value| value.is_a?(String) } end end def add_word(word, category) words[word] = { categories: {} } unless words[word] words[word][:categories][category[:id]] ||= { entries: 0 } words[word][:categories][category[:id]][:entries] += 1 categories[category[:key]][:words] << word end def category_by_key(key) unless categories[key] categories[key] = { id: categories.length, key: key, words: Set[] } end categories[key] end def compute_tfidf Computation.new(self).call end end end
Version data entries
3 entries across 3 versions & 1 rubygems
Version | Path |
---|---|
bow_tfidf-0.1.2 | lib/bow_tfidf/bag_of_words.rb |
bow_tfidf-0.1.1 | lib/bow_tfidf/bag_of_words.rb |
bow_tfidf-0.1.0 | lib/bow_tfidf/bag_of_words.rb |