lib/groupie.rb in groupie-0.5.0 vs lib/groupie.rb in groupie-0.6.0

- old
+ new

@@ -1,9 +1,10 @@ # frozen_string_literal: true require_relative 'groupie/version' require_relative 'groupie/group' +require_relative 'groupie/tokenizer' require 'set' # Groupie is a text grouper and classifier, using naive Bayesian filtering. class Groupie # Wrap all errors we raise in this so our own errors are recognizable. @@ -22,17 +23,11 @@ # This attempts to remove most common punctuation marks and types of whitespace. # # @param [String, #to_s] object # @return [Array<String>] def self.tokenize(object) - object - .to_s - .downcase - .gsub(/\s/, ' ') - .gsub(/[$']/, '') - .gsub(/<[^>]+?>|[^\w -.,]/, '') - .split.map { |str| str.gsub(/\A['"]+|[!,."']+\Z/, '') } + Tokenizer.new(object).to_tokens end # Access an existing Group or create a new one. # # @param [Object] group The name of the group to access. @@ -50,10 +45,10 @@ def classify_text(words, strategy = :sum) words &= unique_words if strategy == :unique group_score_sums, hits = calculate_group_scores(words, strategy) group_score_sums.each.with_object({}) do |(group, sum), averages| - averages[group] = hits.positive? ? sum / hits : 0 + averages[group] = sum / hits end end # Classify a single word against all groups, returning the probability distribution. #