lib/eco/api/usecases/default/people/analyse_people_case.rb in eco-helpers-2.6.4 vs lib/eco/api/usecases/default/people/analyse_people_case.rb in eco-helpers-2.7.0
- old
+ new
@@ -2,85 +2,86 @@
name "analyse-people"
type :export
attr_reader :people
- def main(people, session, options, usecase)
+ def main(_people, _session, options, _usecase)
options[:end_get] = false
- @people = people
- case
- when case_options[:identify_duplicates]
+ if case_options[:identify_duplicates]
identify_duplicates
- when case_options[:identify_unnamed]
+ elsif case_options[:identify_unnamed]
identify_unnamed
else
- session.logger.info("No analysis operation was specified")
+ log(:info) { "No analysis operation was specified" }
end.tap do |people_involved|
- if people_involved
- to_csv(people_involved) if to_csv?
- create_people_backup(people_involved) if results_people_backup?
- end
+ next unless people_involved
+
+ to_csv(people_involved) if to_csv?
+ create_people_backup(people_involved) if results_people_backup?
end
end
private
def identify_unnamed
similarity_analytics.unnamed.tap do |unnamed|
- if unnamed.empty?
- session.logger.info("There were no people with no name!!")
- end
+ next unless unnamed.empty?
+ log(:info) { "There were no people with no name!!" }
end
end
def identify_duplicates
analysed = similarity_screening
if case_options[:ignore_matching_words]
- puts "Fine tune results by ignoring matching words..."
+ log(:info) { "Fine tune results by ignoring matching words..." }
analysed = strict_similarity(analysed)
end
similarity_analytics.newSimilarity(analysed).tap do |related_people|
if related_people.empty?
- session.logger.info("There were no possible duplicates identified!!")
+ log(:info) { "There were no possible duplicates identified!!" }
else
report = similarity_analytics.report(analysed, format: :txt)
save!(report)
end
end
end
def strict_similarity(analysed)
- similarity_analytics.ignore_matching_words(analysed, **{
+ similarity_analytics.ignore_matching_words(
+ analysed,
threshold: 0.5,
- order: [:ngrams]
- })
+ order: [:ngrams]
+ )
end
def similarity_screening
similarity_analytics.attribute = field_similarity
options = {
threshold: 0.4,
- order: [:average, :dice]
+ order: %i[average dice]
}.tap do |opts|
opts.merge!(needle_read: facet_field_proc) if facet_field?
- opts.merge!(unique_words: true) if unique_words?
+ opts.merge!(unique_words: true) if unique_words?
end
analysed = similarity_analytics.analyse(**options)
- puts "Got #{analysed.count} results after basic screening with #{options}"
+ log(:info) {
+ "Got #{analysed.count} results after basic screening with #{options}"
+ }
return analysed if case_options[:only_screening]
options = {threshold: 0.5, order: [:average]}
- puts "Going to rearrange results... with #{options}"
- similarity_analytics.rearrange(analysed, **options).tap do |analysed|
- puts "... got #{analysed.count} results after rearranging"
+ log(:info) { "Going to rearrange results... with #{options}" }
+
+ similarity_analytics.rearrange(analysed, **options).tap do |results|
+ log(:info) { "... got #{results.count} results after rearranging" }
end
end
def similarity_analytics
- @analytics ||= people.similarity
+ @similarity_analytics ||= people.similarity
end
def create_people_backup(cut = people, file = results_people_backup)
session.file_manager.save_json(cut, file)
end
@@ -88,11 +89,12 @@
def to_csv(data = people, file = csv_file)
opts = {}
opts.deep_merge!(export: {file: {name: file, format: :csv}})
opts.deep_merge!(export: {options: {nice_header: true}})
opts.deep_merge!(export: {options: {internal_names: true}})
- #opts.deep_merge!(export: {options: {split_schemas: true}})
+ # opts.deep_merge!(export: {options: {split_schemas: true}})
+
session.process_case("to-csv", type: :export, people: data, options: opts.merge(options.slice(:export)))
end
def unique_words?
case_options[:unique_words]
@@ -110,35 +112,35 @@
def facet_field_proc
proc_value_access(facet_field)
end
def use_field
- case_options.dig(:use_field)
+ case_options[:use_field]
end
def use_field?
!!use_field
end
def facet_field
- case_options.dig(:facet_field)
+ case_options[:facet_field]
end
def facet_field?
!!facet_field
end
def csv_file
- case_options.dig(:csv_file)
+ case_options[:csv_file]
end
def to_csv?
!!csv_file
end
def results_people_backup
- case_options.dig(:backup_people)
+ case_options[:backup_people]
end
def results_people_backup?
!!results_people_backup
end
@@ -150,19 +152,20 @@
def output_file
@output_file ||= options.dig(:output, :file) || "analytics.txt"
end
def save!(data)
- ext = File.extname(output_file).downcase.delete(".")
+ ext = File.extname(output_file).downcase.delete(".")
session.logger.info("Generating file '#{output_file}'")
File.open(output_file, "w") do |fd|
- if ext == "txt"
+ case ext
+ when "txt"
fd << data
- elsif ext == "html"
+ when "html"
puts "html is still not supported"
exit(1)
- elsif ext == "json"
+ when "json"
puts "json is still not supported"
exit(1)
end
end
end
@@ -170,11 +173,11 @@
# A way to use command line to specify part
# => i.e. details[first-name] AND details[surname]
def proc_value_access(expression)
#return expression.to_sym if expression.start_with?(":")
subexpressions = expression.split(" AND ")
- Proc.new do |person|
+ proc do |person|
values = subexpressions.map {|exp| attribute_access(person, exp)}
values.compact.join(" ")
end
end
@@ -194,29 +197,30 @@
get_attr(object, part)
end
end
def get_attr(obj, part)
- case
- when !obj
- nil
- when part.is_a?(Symbol) || obj.respond_to?(part.to_sym)
- obj.send(part.to_sym)
- when part.start_with?(":")
- get_attr(obj, part[1..-1])
- when part.start_with?("details[")
- if (obj.respond_to?(:details)) && details = obj.details
- if match = part.match(/details\[(?<field>.*)\]/)
- details[match[:field]]
- else
- raise "Review your -use-field expression. It should read: person.details[target-alt_id]"
- end
- end
- when part.start_with?("account")
- obj.account if obj.respond_to?(:account)
- when part.start_with?("person")
- obj
- else
- raise "Review your expression. Cannot recognize '#{part}' as part of '#{obj.class}'"
+ return unless obj
+
+ is_method = part.is_a?(Symbol) || obj.respond_to?(part.to_sym)
+ return obj.send(part.to_sym) if is_method
+ return get_attr(obj, part[1..]) if part.start_with?(":")
+
+ if part.start_with?("details[")
+ via_details = obj.respond_to?(:details) && (details = obj.details)
+ return unless via_details
+
+ match = part.match(/details\[(?<field>.*)\]/)
+ return details[match[:field]] if match
+
+ raise "Review your -use-field expression. It should read: person.details[target-alt_id]"
end
+
+ if part.start_with?("account")
+ return obj.respond_to?(:account) ? obj.account : nil
+ end
+
+ return obj if part.start_with?("person")
+
+ raise "Review your expression. Cannot recognize '#{part}' as part of '#{obj.class}'"
end
end