# Provide service for for sorting an array of hash based on the values at a specified key in the hash. module Qa module LinkedData class DeepSortService # @param [Array>>] the array of hashes to sort # @param [sort_key] the key in the hash on whose value the array will be sorted # @param [Symbol] preferred language to appear first in the list; defaults to no preference # @return instance of this class # @example the_array parameter # [ # {:uri=>[#], # :id=>[#], # :label=>[#], # :altlabel=>[], # :sort=>[#]}, # {:uri=>[#], # :id=>[#], # :label=>[#], # :altlabel=>[], # :sort=>[#]} # ] def initialize(the_array, sort_key, preferred_language = nil) @sortable_elements = the_array.map { |element| DeepSortElement.new(element, sort_key, preferred_language) } end # Sort an array of hash on the specified sort key. The value in the hash at sort key is expected to be an array # with one or more values that are RDF::Literals that translate to a number (e.g. 2), a string number (e.g. "3"), # a string (e.g. "hello"), or a language qualified string (e.g. "hello"@en). # The sort occurs in the following precedence. # * preference for numeric sort (if only one value each and both are integers or a string that can be converted to an integer) # * single value sort (if only one value each and at least one is not an integer) # * multiple values sort (if either has multiple values) # @return the sorted array # @example returned sorted array # [ # {:uri=>[#], # :id=>[#], # :label=>[#], # :altlabel=>[], # :sort=>[#]}, # {:uri=>[#], # :id=>[#], # :label=>[#], # :altlabel=>[], # :sort=>[#]} # ] def sort @sortable_elements.sort.map(&:element) end class DeepSortElement attr_reader :element, :literals, :preferred_language private :preferred_language delegate :size, to: :@literals def initialize(element, sort_key, preferred_language) element[sort_key] = Qa::LinkedData::LanguageSortService.new(element[sort_key], preferred_language).sort @element = element @literals = element[sort_key] @preferred_language = preferred_language @includes_preferred_language = includes_preferred_language? @all_same_language = all_same_language? end def <=>(other) return numeric_comparator(other) if integer? && other.integer? return single_value_comparator(other) if single? && other.single? multiple_value_comparator(other) end # @return true if there is a single literal that is an integer or a string that can be converted to an integer; otherwise, false def integer? return false unless single? (/\A[-+]?\d+\z/ === literal.to_s) # rubocop:disable Style/CaseEquality end def integer(idx = 0) Integer(literal(idx).to_s) end # @return true if there is only one value; otherwise, false def single? @single ||= literals.size == 1 end def literal(idx = 0) literals[idx] end def downcase_string(idx = 0) to_downcase(literal(idx)) end def language(lit = literals.first) return nil unless Qa::LinkedData::LanguageService.literal_has_language_marker? lit lit.language end def includes_preferred_language? return @includes_preferred_language if @includes_preferred_language.present? # literals are sorted by language with preferred language first in the list @includes_preferred_language = (language == preferred_language) end def all_same_language? return @all_same_language if @all_same_language.present? # literals are sorted by language, so if first = last, then all are the same @all_same_language = (language(literals.first) == language(literals.last)) end def languages filtered_literals_by_language.keys end def filtered_literals(filter_language) filtered_literals_by_language.fetch(filter_language, []) end private # If both test values are single value and both are integers, do a numeric sort def numeric_comparator(other) integer <=> other.integer end # If both test values are single value and at least one is not numeric, do a string sort taking language into consideration # * sort values if neither has a language marker or they both have the same language marker # * otherwise, sort language markers def single_value_comparator(other) return downcase_string <=> other.downcase_string if same_language?(literal, other.literal) compare_languages(language, other.language) end def compare_languages(lang, other_lang) return -1 if preferred_language? lang return 1 if preferred_language? other_lang return -1 if other_lang.blank? return 1 if lang.blank? lang <=> other_lang end # If at least one of the test values has multiple values, sort the multiple values taking language into consideration # * if both lists have all the same language or no language markers at all, just sort the lists and compare each element # * if either list has the preferred language, try to sort the two lists by element after filtering for the preferred language # * otherwise, sort by language until there is a difference def multiple_value_comparator(other) return single_language_list_comparator(other) if all_same_language? && other.all_same_language? return specified_language_list_comparator(other, preferred_language) if includes_preferred_language? && other.includes_preferred_language? multi_language_list_comparator(other) end def single_language_list_comparator(other) list_comparator(literals, other.literals) end def specified_language_list_comparator(other, lang) filtered = filtered_literals(lang) other_filtered = other.filtered_literals(lang) return -1 if !filtered.empty? && other_filtered.empty? return 1 if filtered.empty? && !other_filtered.empty? list_comparator(filtered, other_filtered) end # Walk through language sorted lists # * for each language, determine how closely the list of terms matches # * prioritize the list that gets the most low values def multi_language_list_comparator(other) combined_languages = languages.concat(other.languages).uniq by_language_comparisons = {} combined_languages.each do |lang| cmp = list_comparator(filtered_literals(lang), other.filtered_literals(lang)) by_language_comparisons[lang] = cmp end cmp_sum = by_language_comparisons.values.sum return 1 if cmp_sum.positive? return -1 if cmp_sum.negative? 0 end def list_comparator(list, other_list) # if an element doesn't have any terms in a language, the other element sorts lower return -1 if other_list.empty? return 1 if list.empty? shorter_list_size = [list.size, other_list.size].min cmp = 0 0.upto(shorter_list_size - 1) do |idx| cmp = to_downcase(list[idx]) <=> to_downcase(other_list[idx]) return cmp unless cmp.zero? end return cmp if list.size == other_list.size other_list.size < list.size ? 1 : -1 # didn't find any diffs, shorter list is considered lower end def same_language?(lit, other_lit) return false if only_one_has_language_marker?(lit, other_lit) return true if neither_have_language_markers?(lit, other_lit) lit.language == other_lit.language end def neither_have_language_markers?(lit, other_lit) !language?(lit) && !language?(other_lit) end def only_one_has_language_marker?(lit, other_lit) (!language?(lit) && language?(other_lit)) || (language?(lit) && !language?(other_lit)) end def language?(lit) Qa::LinkedData::LanguageService.literal_has_language_marker? lit end def preferred_language?(lang) preferred_language.present? ? lang == preferred_language : false end def to_downcase(lit) lit.to_s.downcase end def filtered_literals_by_language @filtered_literals_by_language ||= create_all_filters end def create_all_filters bins = {} 0.upto(size - 1) do |idx| lang = language(literals[idx]) filter = bins.fetch(lang, []) filter << literal(idx) bins[lang] = filter end bins end end private_constant :DeepSortElement end end end