lib/groupie.rb in groupie-0.2.2 vs lib/groupie.rb in groupie-0.3.0
- old
+ new
@@ -10,35 +10,46 @@
def [](group)
@groups[group] ||= Group.new(group)
end
+ def unique_words
+ @unique_words ||= (
+ total_count = @groups.values.map {|group| group.word_counts}.inject{|total, counts| total.merge(counts){|key,o,n| o+n}}
+ median_index = [total_count.values.size * 3 / 4 - 1, 1].max
+ median_frequency = total_count.values.sort[median_index]
+ total_count.select{|word, count| count <= median_frequency}.map(&:first)
+ )
+ end
+
def classify(entry, strategy=:sum)
results = {}
total_count = @groups.inject(0) do |sum, name_group|
group = name_group.last
count = group.count(entry)
- if strategy==:sum
+ case strategy
+ when :sum
sum += count
- elsif strategy==:sqrt
+ when :sqrt, :unique
sum += Math::sqrt(count)
- elsif strategy==:log
+ when :log
sum += Math::log10(count) if count > 0
else
raise "Invalid strategy: #{strategy}"
end
next sum
end
return results if 0 == total_count
@groups.each do |name, group|
count = group.count(entry)
- if strategy==:sum
+ case strategy
+ when :sum
# keep count
- elsif strategy==:sqrt
+ when :sqrt, :unique
count = Math::sqrt(count)
- elsif strategy==:log
+ when :log
count = Math::log10(count) if count > 0
else
raise "Invalid strategy: #{strategy}"
end
results[name] = count > 0 ? count.to_f / total_count : 0.0
@@ -47,9 +58,12 @@
end
# Classify a text by taking the average of all word classifications.
def classify_text(words, strategy=:sum)
hits = 0
+ if strategy==:unique
+ words = words & unique_words
+ end
group_score_sums = words.inject({}) do |results, word|
word_results = classify(word, strategy)
next results if word_results.empty?
hits += 1
results.merge(word_results) do |key, old, new|
\ No newline at end of file