class Eco::API::UseCases::Default::People::Analyse < Eco::API::Common::Loaders::UseCase name "analyse-people" type :export attr_reader :people def main(_people, _session, options, _usecase) options[:end_get] = false if case_options[:identify_duplicates] identify_duplicates elsif case_options[:identify_unnamed] identify_unnamed else log(:info) { "No analysis operation was specified" } end.tap do |people_involved| next unless people_involved to_csv(people_involved) if to_csv? create_people_backup(people_involved) if results_people_backup? end end private def identify_unnamed similarity_analytics.unnamed.tap do |unnamed| next unless unnamed.empty? log(:info) { "There were no people with no name!!" } end end def identify_duplicates analysed = similarity_screening if case_options[:ignore_matching_words] log(:info) { "Fine tune results by ignoring matching words..." } analysed = strict_similarity(analysed) end similarity_analytics.newSimilarity(analysed).tap do |related_people| if related_people.empty? log(:info) { "There were no possible duplicates identified!!" } else report = similarity_analytics.report(analysed, format: :txt) save!(report) end end end def strict_similarity(analysed) similarity_analytics.ignore_matching_words( analysed, threshold: 0.5, order: [:ngrams] ) end def similarity_screening similarity_analytics.attribute = field_similarity options = { threshold: 0.4, order: %i[average dice] }.tap do |opts| opts.merge!(needle_read: facet_field_proc) if facet_field? opts.merge!(unique_words: true) if unique_words? end analysed = similarity_analytics.analyse(**options) log(:info) { "Got #{analysed.count} results after basic screening with #{options}" } return analysed if case_options[:only_screening] options = {threshold: 0.5, order: [:average]} log(:info) { "Going to rearrange results... with #{options}" } similarity_analytics.rearrange(analysed, **options).tap do |results| log(:info) { "... got #{results.count} results after rearranging" } end end def similarity_analytics @similarity_analytics ||= people.similarity end def create_people_backup(cut = people, file = results_people_backup) session.file_manager.save_json(cut, file) end def to_csv(data = people, file = csv_file) opts = {} opts.deep_merge!(export: {file: {name: file, format: :csv}}) opts.deep_merge!(export: {options: {nice_header: true}}) opts.deep_merge!(export: {options: {internal_names: true}}) # opts.deep_merge!(export: {options: {split_schemas: true}}) session.process_case("to-csv", type: :export, people: data, options: opts.merge(options.slice(:export))) end def unique_words? case_options[:unique_words] end def field_similarity return :name unless use_field? use_field_proc end def use_field_proc proc_value_access(use_field) end def facet_field_proc proc_value_access(facet_field) end def use_field case_options[:use_field] end def use_field? !!use_field end def facet_field case_options[:facet_field] end def facet_field? !!facet_field end def csv_file case_options[:csv_file] end def to_csv? !!csv_file end def results_people_backup case_options[:backup_people] end def results_people_backup? !!results_people_backup end def case_options options.dig(:usecase, :analyse_people) || {} end def output_file @output_file ||= options.dig(:output, :file) || "analytics.txt" end def save!(data) ext = File.extname(output_file).downcase.delete(".") session.logger.info("Generating file '#{output_file}'") File.open(output_file, "w") do |fd| case ext when "txt" fd << data when "html" puts "html is still not supported" exit(1) when "json" puts "json is still not supported" exit(1) end end end # A way to use command line to specify part # => i.e. details[first-name] AND details[surname] def proc_value_access(expression) #return expression.to_sym if expression.start_with?(":") subexpressions = expression.split(" AND ") proc do |person| values = subexpressions.map {|exp| attribute_access(person, exp)} values.compact.join(" ") end end # A way to use command line to specify part # => i.e. person.details[first-name] def attribute_access(person, expression) parts = expression.split(".") parts_to_value(person, parts).tap do |value| unless value.is_a?(String) || !value raise "Something is wrong with #{expression} to parts #{parts}. Expecting String, obtained: #{value.class}" end end end def parts_to_value(obj, parts) parts.reduce(obj) do |object, part| get_attr(object, part) end end def get_attr(obj, part) return unless obj is_method = part.is_a?(Symbol) || obj.respond_to?(part.to_sym) return obj.send(part.to_sym) if is_method return get_attr(obj, part[1..]) if part.start_with?(":") if part.start_with?("details[") via_details = obj.respond_to?(:details) && (details = obj.details) return unless via_details match = part.match(/details\[(?.*)\]/) return details[match[:field]] if match raise "Review your -use-field expression. It should read: person.details[target-alt_id]" end if part.start_with?("account") return obj.respond_to?(:account) ? obj.account : nil end return obj if part.start_with?("person") raise "Review your expression. Cannot recognize '#{part}' as part of '#{obj.class}'" end end