module Eco module Data module FuzzyMatch module StringHelpers # Downcases and trims def normalize_string(value) case value when Array value.map {|val| normalize_string(val)} when Symbol normalize_string(value.to_sym) when String value.downcase.strip end end def get_words(str, normalized: false) return [] unless str str = normalize_string(str) unless normalized str.scan(/[a-zA-Z'-]+/) end # Keeps the start order of the `words` and consecutive `words` together/consecutive. # @param str [String] the input string with the words. # @param range [Integer, Range] determine the lenght of the generated values. # @return [Array] combinations of `range` length of `words`. def string_ngrams(str, range=2..3, normalized: false) ngrams(get_words(str, normalized: normalized), range) end # Keeps the start order of the `words` of the input `Array` `words`. # It does **not** keep consecutive `words` together (it can jump/skip items). # @param str [String] the input string with the words. # @param range [Integer, Range] determine the lenght of the generated values. # @return [Array] combinations of `range` length of `words` def string_combinations(str, range=2..3, normalized: false) combinations(get_words(str, normalized: normalized), range) .map {|comb| comb.join(' ')} end # It includes `combinations` that break the initial order of the `Array`. # It does **not** keep consecutive `words` together (it can jump/skip items). # @param str [String] the input string with the words. # @param range [Integer, Range] determine the lenght of the generated values. # @return [Array] permutations of `range` length of `words` def string_permutations(str, range=2..3, normalized: false) permutations(get_words(str, normalized: normalized), range) .map {|comb| comb.join(' ')} end # Keeps the start order of the `charts` and consecutive `charts` together/consecutive. # @param str [String] the input `word` string. # @param range [Integer, Range] determine the lenght of the generated values. # @return [Array] combinations of `range` length of `words`. def word_ngrams(str, range=2..3, normalized: false) str = normalize_string(str) unless normalized ngrams(str.to_s.chars, range) .map {|comb| no_blanks(comb)} end def no_blanks(str) return nil unless str && str.is_a?(String) str.tr(' ', '') end end end end end