class Eco::API::UseCases::Default::People::Analyse < Eco::API::Common::Loaders::UseCase name "analyse-people" type :export attr_reader :people def main(people, session, options, usecase) options[:end_get] = false @people = people case when case_options[:identify_duplicates] identify_duplicates when case_options[:identify_unnamed] identify_unnamed else session.logger.info("No analysis operation was specified") end.tap do |people_involved| if people_involved to_csv(people_involved) if to_csv? create_people_backup(people_involved) if results_people_backup? end end end private def identify_unnamed similarity_analytics.unnamed.tap do |unnamed| if unnamed.empty? session.logger.info("There were no people with no name!!") end end end def identify_duplicates analysed = similarity_screening if case_options[:ignore_matching_words] puts "Fine tune results by ignoring matching words..." analysed = strict_similarity(analysed) end similarity_analytics.newSimilarity(analysed).tap do |related_people| if related_people.empty? session.logger.info("There were no possible duplicates identified!!") else report = similarity_analytics.report(analysed, format: :txt) save!(report) end end end def strict_similarity(analysed) similarity_analytics.ignore_matching_words(analysed, **{ threshold: 0.5, order: [:ngrams] }) end def similarity_screening similarity_analytics.attribute = field_similarity options = { threshold: 0.4, order: [:average, :dice] }.tap do |opts| opts.merge!(needle_read: facet_field_proc) if facet_field? opts.merge!(unique_words: true) if unique_words? end analysed = similarity_analytics.analyse(**options) puts "Got #{analysed.count} results after basic screening with #{options}" return analysed if case_options[:only_screening] options = {threshold: 0.5, order: [:average]} puts "Going to rearrange results... with #{options}" similarity_analytics.rearrange(analysed, **options).tap do |analysed| puts "... got #{analysed.count} results after rearranging" end end def similarity_analytics @analytics ||= people.similarity end def create_people_backup(cut = people, file = results_people_backup) session.file_manager.save_json(cut, file) end def to_csv(data = people, file = csv_file) opts = {} opts.deep_merge!(export: {file: {name: file, format: :csv}}) opts.deep_merge!(export: {options: {nice_header: true}}) opts.deep_merge!(export: {options: {internal_names: true}}) #opts.deep_merge!(export: {options: {split_schemas: true}}) session.process_case("to-csv", type: :export, people: data, options: opts.merge(options.slice(:export))) end def unique_words? case_options[:unique_words] end def field_similarity return :name unless use_field? use_field_proc end def use_field_proc proc_value_access(use_field) end def facet_field_proc proc_value_access(facet_field) end def use_field case_options.dig(:use_field) end def use_field? !!use_field end def facet_field case_options.dig(:facet_field) end def facet_field? !!facet_field end def csv_file case_options.dig(:csv_file) end def to_csv? !!csv_file end def results_people_backup case_options.dig(:backup_people) end def results_people_backup? !!results_people_backup end def case_options options.dig(:usecase, :analyse_people) || {} end def output_file @output_file ||= options.dig(:output, :file) || "analytics.txt" end def save!(data) ext = File.extname(output_file).downcase.delete(".") session.logger.info("Generating file '#{output_file}'") File.open(output_file, "w") do |fd| if ext == "txt" fd << data elsif ext == "html" puts "html is still not supported" exit(1) elsif ext == "json" puts "json is still not supported" exit(1) end end end # A way to use command line to specify part # => i.e. details[first-name] AND details[surname] def proc_value_access(expression) #return expression.to_sym if expression.start_with?(":") subexpressions = expression.split(" AND ") Proc.new do |person| values = subexpressions.map {|exp| attribute_access(person, exp)} values.compact.join(" ") end end # A way to use command line to specify part # => i.e. person.details[first-name] def attribute_access(person, expression) parts = expression.split(".") parts_to_value(person, parts).tap do |value| unless value.is_a?(String) || !value raise "Something is wrong with #{expression} to parts #{parts}. Expecting String, obtained: #{value.class}" end end end def parts_to_value(obj, parts) parts.reduce(obj) do |object, part| get_attr(object, part) end end def get_attr(obj, part) case when !obj nil when part.is_a?(Symbol) || obj.respond_to?(part.to_sym) obj.send(part.to_sym) when part.start_with?(":") get_attr(obj, part[1..-1]) when part.start_with?("details[") if (obj.respond_to?(:details)) && details = obj.details if match = part.match(/details\[(?.*)\]/) details[match[:field]] else raise "Review your -use-field expression. It should read: person.details[target-alt_id]" end end when part.start_with?("account") obj.account if obj.respond_to?(:account) when part.start_with?("person") obj else raise "Review your expression. Cannot recognize '#{part}' as part of '#{obj.class}'" end end end