lib/babel/string_extensions.rb in simplificator-babel-0.0.4 vs lib/babel/string_extensions.rb in simplificator-babel-0.1.0

- old
+ new

@@ -1,42 +1,47 @@ class String - - # TODO: recursive? - def ngrams(options = {}) + # Generate n-grams for a string. + # options are: + # :min_length : minimum length of the n-grams (defaults to 1) + # :max_length : maximum length of the n-grams (defaults to self.length) + # :pad : pad wiht '_' to generate all possible n-grams (defaults to false) + def n_grams(options = {}) + # TODO: recursive? + # TODO: use min/max length for loop index instead of looping + # all and then use if test to decide if to add or not min_length = options[:min_length] || 1 max_length = options[:max_length] || self.length pad = options[:pad] || false value = options[:preserve_case] ? self : self.downcase value = "_#{value}#{'_' * (value.length - 1)}" if pad res = [] - # TODO: use min/max length for loop index instead of looping - # all and then use if test to decide if to add or not 0.upto(value.length - 1) do |index| index.upto(value.length - 1) do |len| if value[index..len].length >= min_length && value[index..len].length <= max_length res << value[index..len] end end end res end - # def byte_grams(options = {}) - # min_length = options[:min_length] || 1 - # max_length = options[:max_length] || self.length - # value = options[:preserve_case] ? self : self.downcase - # res = [] - # - # end - - - # Ask Babel about the language of this text - # Can return nil if no language found + # Ask Babel about the language of this text. + # Convenience method, just calls Babel.guess(). + # See Babel.guess for description. def language(options = {}) Babel.guess(self, options) end + # Ask Bable about the languages this text could be. + # It will return all the registered languages with the most probable + # Language first. You might want to restrict this before presenting to + # the user. + def languages(options = {}) + Babel.distances(self, options).map() {|item| item.first} + end - # Tell Babel that this text is in a given language + # Tell Babel that this text is in a given language. + # Convenience method, just calls Babel.learn(). + # See Babel.learn for description def language=(lang, options = {}) Babel.learn(lang, self, options) end end