module Eco module API module Organization # Class to find out duplicates in the People Manager # # @attr_writer attribute [String, Proc, nil] the target attribute to be read. class PeopleSimilarity < Eco::API::Organization::People include Eco::Data::FuzzyMatch attr_accessor :attribute # @!group Config # @return [String, Proc, nil] the target attribute to be read. def attribute=(attr) @attribute ||= "name" end # Define the order or relevant of per user matches # @param values[Array] the algorithms' results it should be ordered by # * Possible values: `:dice`, `:levenshtein`, `:jaro_winkler`, `:ngrams`, `:words_ngrams`, `:chars_position` def order=(values) @order = values end def order @order ||= [:words_ngrams, :dice] end # Define the order or relevant of per user matches # @param value [Float] the threshold that all of the algorithms should comply with def threshold=(value) @threshold = value end def threshold @threshold ||= 0.15 end # @!endgroup # @!group Searchers # It gathers those that have the same `email` # @return [Hash] where `keys` are `email`s and `values` an `Array` def repeated_emails init_caches @by_email.select do |email, people| people.count > 1 end end # @!endgroup # @!group Analysers # Analyses People bases on `options` # @return [Hash] where the _keys_ are the people `id`s and the _values_ the `Eco::Data::FuzzyMatch::Results` def analyse(**options) options = { read: self.attribute }.merge(options) each_with_object({}) do |person, results| results[person.id] = find_all_with_score(person, **options) end end # Launches a reanalyis on `analysed` based on `options` # @param analysed [Hash] where the _keys_ are the people `id`s and the _values_ the `Eco::Data::FuzzyMatch::Results` def re_analyse(analysed, **options) analysed.each_with_object({}) do |(id, results), out| out[id] = results.relevant_results(**options) end end # @!group Helpers # @return [String] well structured text def analysis(analysed, format: :txt) case when format == :txt analysed.each_with_object("") do |(id, results), out| msg = results.results.map {|r| r.print}.join("\n ") "'#{self[id].identify}':\n " + msg end end end # @note # 1. Unless `:analysed` is provided, it launches an analysis cutting with Jaro Winker min 0.5 # 2. It then re-sorts and cuts based on `options` # @return [Hash] where the _keys_ are the people `id`s and the _values_ the `Eco::Data::FuzzyMatch::Results` def print_analysis(**options) analysed = options[:analysed] || results_with_false_positives.analyse(**options) analysed.each_with_object({}) do |(id, results), out| puts analysis(analysed) end end # @!endgroup protected def on_change remove_instance_variable(@fuzzy_match) super end private end end end end