Sha256: 10e780d57603887bd642b9ee6f59f2f36f08546d3d99c33ca9e75415e13e7233

Contents?: true

Size: 1.52 KB

Versions: 2

Compression:

Stored size: 1.52 KB

Contents

module KeywordMatcher
  class Process
    attr_reader :group, :words

    FUZZINESS = 1
    MIN_WORD_LENGTH_FOR_FUZZY = 4

    def initialize(group, words)
      @group = group
      @words = words
    end

    def found?
      in_any?(group.or) && negation_found?.blank?
    end

    private

    def negation_found?
      return if group.not.blank?

      in_any?(group.not)
    end

    def in_any?(groups)
      groups.map do |values|
        values.map do |terms|
          match = false
          terms.each do |term|
            words.each do |word|
              match = true if condition(term, word)
            end
          end
          match
        end.include?(false).blank?
      end.include?(true)
    end

    def matched?(term, word)
      return word == (quoted?(term) ? term[1..-2] : term) if precise?(term)

      ::DamerauLevenshtein.distance(term, word) <= FUZZINESS
    end

    def condition(term, word)
      synonym = find_synonym(term)
      synonym.present? ? (matched?(term, word) || matched?(synonym, word)) : matched?(term, word)
    end

    def find_synonym(term)
      synonyms_h.map { |k, v| term.gsub(k, v) if term.match?(k) }.reject(&:blank?).try(:first)
    end

    def synonyms_h
      {
        %r{([0-9]+)гр} => '\1г',
        %r{([0-9])([,|.])(.*)} => '\1-\3'
      }
    end

    def precise?(term)
      quoted?(term) || (quoted?(term).blank? && term.length < MIN_WORD_LENGTH_FOR_FUZZY)
    end

    def quoted?(term)
      regex = /(["'])(?:(?=(\\?))\2.)*?\1/
      term.match?(regex)
    end
  end
end

Version data entries

2 entries across 2 versions & 1 rubygems

Version Path
keyword_matcher-0.6.1 lib/keyword_matcher/process.rb
keyword_matcher-0.6.0 lib/keyword_matcher/process.rb