lib/groupie.rb in groupie-0.2.2 vs lib/groupie.rb in groupie-0.3.0

- old
+ new

@@ -10,35 +10,46 @@ def [](group) @groups[group] ||= Group.new(group) end + def unique_words + @unique_words ||= ( + total_count = @groups.values.map {|group| group.word_counts}.inject{|total, counts| total.merge(counts){|key,o,n| o+n}} + median_index = [total_count.values.size * 3 / 4 - 1, 1].max + median_frequency = total_count.values.sort[median_index] + total_count.select{|word, count| count <= median_frequency}.map(&:first) + ) + end + def classify(entry, strategy=:sum) results = {} total_count = @groups.inject(0) do |sum, name_group| group = name_group.last count = group.count(entry) - if strategy==:sum + case strategy + when :sum sum += count - elsif strategy==:sqrt + when :sqrt, :unique sum += Math::sqrt(count) - elsif strategy==:log + when :log sum += Math::log10(count) if count > 0 else raise "Invalid strategy: #{strategy}" end next sum end return results if 0 == total_count @groups.each do |name, group| count = group.count(entry) - if strategy==:sum + case strategy + when :sum # keep count - elsif strategy==:sqrt + when :sqrt, :unique count = Math::sqrt(count) - elsif strategy==:log + when :log count = Math::log10(count) if count > 0 else raise "Invalid strategy: #{strategy}" end results[name] = count > 0 ? count.to_f / total_count : 0.0 @@ -47,9 +58,12 @@ end # Classify a text by taking the average of all word classifications. def classify_text(words, strategy=:sum) hits = 0 + if strategy==:unique + words = words & unique_words + end group_score_sums = words.inject({}) do |results, word| word_results = classify(word, strategy) next results if word_results.empty? hits += 1 results.merge(word_results) do |key, old, new| \ No newline at end of file