lib/picky/tokenizer.rb in picky-4.6.3 vs lib/picky/tokenizer.rb in picky-4.6.4

- old
+ new

@@ -4,27 +4,45 @@ # Defines tokenizing processes used both in indexing and querying. # class Tokenizer - extend API::Tokenizer - + extend Picky::Helpers::Identification include API::Tokenizer::CharacterSubstituter def self.default_indexing_with options = {} - @indexing = extract_tokenizer options + @indexing = from options end def self.indexing @indexing ||= new end def self.default_searching_with options = {} - @searching = extract_tokenizer options + @searching = from options end def self.searching @searching ||= new end + + def self.from thing, index_name = nil, category_name = nil + return unless thing + + if thing.respond_to? :tokenize + thing + else + if thing.respond_to? :[] + Picky::Tokenizer.new thing + else + raise <<-ERROR +indexing options #{identifier_for(index_name, category_name)}should be either +* a Hash +or +* an object that responds to #tokenize(text) => [[token1, token2, ...], [original1, original2, ...]] +ERROR + end + end + end def to_s reject_condition_location = @reject_condition.to_s[/:(\d+) \(lambda\)/, 1] <<-TOKENIZER Removes characters: #{@removes_characters_regexp ? "/#{@removes_characters_regexp.source}/" : '-'} @@ -118,11 +136,11 @@ substituter?? substituter.substitute(text) : text end # Reject tokens after tokenizing based on the given criteria. # - def rejects_token_if &condition + def rejects_token_if condition @reject_condition = condition end def reject tokens tokens.reject! &@reject_condition end @@ -159,17 +177,21 @@ attr_reader :substituter alias substituter? substituter def initialize options = {} - substitutes_characters_with options[:substitutes_characters_with] if options[:substitutes_characters_with] - removes_characters options[:removes_characters] if options[:removes_characters] - stopwords options[:stopwords] if options[:stopwords] - splits_text_on options[:splits_text_on] || /\s/ - normalizes_words options[:normalizes_words] if options[:normalizes_words] - max_words options[:max_words] - rejects_token_if &(options[:rejects_token_if] || :blank?) - case_sensitive options[:case_sensitive] unless options[:case_sensitive].nil? + options = default_options.merge options + options.each do |method_name, value| + send method_name, value unless value.nil? + end + rescue NoMethodError => e + raise %Q{The option "#{e.name}" is not a valid option for a Picky tokenizer.\nPlease see https://github.com/floere/picky/wiki/Indexing-configuration for valid options.} + end + def default_options + { + splits_text_on: /\s/, + rejects_token_if: :blank?.to_proc + } end # Returns a number of tokens, generated from the given text, # based on the parameters given. # \ No newline at end of file