Sha256: aea7d77abe471ee50648e8569621efebb31d737b6c802d2e01d8a3e0aa828741

Contents?: true

Size: 1.84 KB

Versions: 311

Compression:

Stored size: 1.84 KB

Contents

# frozen_string_literal: true

module Bundler
  class SimilarityDetector
    SimilarityScore = Struct.new(:string, :distance)

    # initialize with an array of words to be matched against
    def initialize(corpus)
      @corpus = corpus
    end

    # return an array of words similar to 'word' from the corpus
    def similar_words(word, limit = 3)
      words_by_similarity = @corpus.map {|w| SimilarityScore.new(w, levenshtein_distance(word, w)) }
      words_by_similarity.select {|s| s.distance <= limit }.sort_by(&:distance).map(&:string)
    end

    # return the result of 'similar_words', concatenated into a list
    # (eg "a, b, or c")
    def similar_word_list(word, limit = 3)
      words = similar_words(word, limit)
      if words.length == 1
        words[0]
      elsif words.length > 1
        [words[0..-2].join(", "), words[-1]].join(" or ")
      end
    end

    protected

    # https://www.informit.com/articles/article.aspx?p=683059&seqNum=36
    def levenshtein_distance(this, that, ins = 2, del = 2, sub = 1)
      # ins, del, sub are weighted costs
      return nil if this.nil?
      return nil if that.nil?
      dm = [] # distance matrix

      # Initialize first row values
      dm[0] = (0..this.length).collect {|i| i * ins }
      fill = [0] * (this.length - 1)

      # Initialize first column values
      (1..that.length).each do |i|
        dm[i] = [i * del, fill.flatten]
      end

      # populate matrix
      (1..that.length).each do |i|
        (1..this.length).each do |j|
          # critical comparison
          dm[i][j] = [
            dm[i - 1][j - 1] + (this[j - 1] == that[i - 1] ? 0 : sub),
            dm[i][j - 1] + ins,
            dm[i - 1][j] + del,
          ].min
        end
      end

      # The last value in matrix is the Levenshtein distance between the strings
      dm[that.length][this.length]
    end
  end
end

Version data entries

311 entries across 311 versions & 8 rubygems

Version Path
rubygems-update-3.5.9 bundler/lib/bundler/similarity_detector.rb
bundler-2.5.9 lib/bundler/similarity_detector.rb
rubygems-update-3.5.8 bundler/lib/bundler/similarity_detector.rb
bundler-2.5.8 lib/bundler/similarity_detector.rb
rubygems-update-3.5.7 bundler/lib/bundler/similarity_detector.rb
bundler-2.5.7 lib/bundler/similarity_detector.rb
direct7-0.0.13 vendor/bundle/ruby/2.7.0/gems/bundler-2.4.21/lib/bundler/similarity_detector.rb
rubygems-update-3.5.6 bundler/lib/bundler/similarity_detector.rb
bundler-2.5.6 lib/bundler/similarity_detector.rb
direct7-0.0.12 vendor/bundle/ruby/2.7.0/gems/bundler-2.4.21/lib/bundler/similarity_detector.rb
rubygems-update-3.5.5 bundler/lib/bundler/similarity_detector.rb
bundler-2.5.5 lib/bundler/similarity_detector.rb
harbr-0.2.10 vendor/bundle/ruby/3.2.0/gems/bundler-2.4.21/lib/bundler/similarity_detector.rb
harbr-0.2.9 vendor/bundle/ruby/3.2.0/gems/bundler-2.4.21/lib/bundler/similarity_detector.rb
harbr-0.2.8 vendor/bundle/ruby/3.2.0/gems/bundler-2.4.21/lib/bundler/similarity_detector.rb
harbr-0.2.7 vendor/bundle/ruby/3.2.0/gems/bundler-2.4.21/lib/bundler/similarity_detector.rb
harbr-0.2.6 vendor/bundle/ruby/3.2.0/gems/bundler-2.4.21/lib/bundler/similarity_detector.rb
harbr-0.2.5 vendor/bundle/ruby/3.2.0/gems/bundler-2.4.21/lib/bundler/similarity_detector.rb
harbr-0.2.4 vendor/bundle/ruby/3.2.0/gems/bundler-2.4.21/lib/bundler/similarity_detector.rb
harbr-0.2.3 vendor/bundle/ruby/3.2.0/gems/bundler-2.4.21/lib/bundler/similarity_detector.rb