Sha256: f24007f27309371906b40da7df9c63202189be63b350b41cda15173cb5be04a2

Contents?: true

Size: 1.68 KB

Versions: 1

Compression:

Stored size: 1.68 KB

Contents

class String 
  # Generate n-grams for a string.
  # options are:
  #  :min_length : minimum length of the n-grams (defaults to 1)
  #  :max_length : maximum length of the n-grams (defaults to self.length)
  #  :pad : pad wiht '_' to generate all possible n-grams (defaults to false)
  def n_grams(options = {})
    # TODO: recursive?
    # TODO: use min/max length for loop index instead of looping
    # all and then use if test to decide if to add or not
    min_length = options[:min_length] || 1
    max_length = options[:max_length] || self.length
    pad = options[:pad] || false
    value = options[:preserve_case] ? self : self.downcase
    value = "_#{value}#{'_' * (value.length - 1)}" if pad
    res = []
    0.upto(value.length - 1) do |index|
      index.upto(value.length - 1) do |len|
        if value[index..len].length >= min_length && value[index..len].length <= max_length
            res << value[index..len] 
        end
      end
    end
    res  
  end
  
  # Ask Babel about the language of this text.
  # Convenience method, just calls Babel.guess().
  # See Babel.guess for description.
  def language(options = {})
    Babel.guess(self, options)
  end
  # Ask Bable about the languages this text could be.
  # It will return all the registered languages with the most probable
  # Language first. You might want to restrict this before presenting to 
  # the user.
  def languages(options = {})
    Babel.distances(self, options).map() {|item| item.first}
  end
  
  # Tell Babel that this text is in a given language.
  # Convenience method, just calls Babel.learn().
  # See Babel.learn for description
  def language=(lang, options = {})
    Babel.learn(lang, self, options)
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
simplificator-babel-0.1.0 lib/babel/string_extensions.rb