query.rb in picky-2.1.0

- old
+ new
@@ -1,79 +1,59 @@
 # encoding: utf-8
 #
 module Internals
 
   module Tokenizers
-  
+
     # There are a few class methods that you can use to configure how a query works.
     #
     # removes_characters regexp
     # illegal_after_normalizing regexp
     # stopwords regexp
     # contracts_expressions regexp, to_string
     # splits_text_on regexp
     # normalizes_words [[/regexp1/, 'replacement1'], [/regexp2/, 'replacement2']]
     #
     class Query < Base
-    
+
       def self.default= new_default
         @default = new_default
       end
       def self.default
         @default ||= new
       end
-    
+
       attr_reader :maximum_tokens
-    
+
       def initialize options = {}
         super options
         @maximum_tokens = options[:maximum_tokens] || 5
       end
-    
-      def preprocess text
-        remove_illegals text             # Remove illegal characters
-        remove_non_single_stopwords text # remove stop words
-        text
-      end
-    
-      # Split the text and put some back together.
-      #
-      # TODO Make the same as in indexing?
-      #
-      def pretokenize text
-        split text
-      end
-    
+
       # Let each token process itself.
       # Reject, limit, and partialize tokens.
       #
+      # In querying we work with real tokens (in indexing it's just symbols).
+      #
       def process tokens
-        tokens.tokenize_with self
-        tokens.reject              # Reject any tokens that don't meet criteria
-        tokens.cap maximum_tokens  # Cut off superfluous tokens
-        tokens.partialize_last     # Set certain tokens as partial
+        tokens.reject                # Reject any tokens that don't meet criteria.
+        tokens.cap maximum_tokens    # Cut off superfluous tokens.
+        tokens.partialize_last       # Set certain tokens as partial.
         tokens
       end
-    
-      # Called by the token.
+
+      # Converts words into real tokens.
       #
-      # TODO Perhaps move to Normalizer?
-      #
-      def normalize text
-        text = substitute_characters text # Substitute special characters
-        text.downcase!                    # Downcase all text
-        normalize_with_patterns text      # normalize
-        text.to_sym                       # symbolize
+      def tokens_for words
+        Internals::Query::Tokens.processed words, downcase?
       end
-    
-      # Returns a token for a word.
-      # The basic query tokenizer uses new tokens.
+      # Returns a tokens object.
       #
-      def token_for word
-        Internals::Query::Token.processed word
+      def empty_tokens
+        Internals::Query::Tokens.new
       end
-    
+
     end
-    
+
   end
-  
+
 end
\ No newline at end of file