module Ultrasphinx
  class Search
    module Internals

      # These methods are kept stateless to ease debugging
      
      private
      
      def build_request_with_options opts
      
        request = Sphinx::Client.new
      
        request.SetServer(
          Ultrasphinx::CLIENT_SETTINGS['server_host'], 
          Ultrasphinx::CLIENT_SETTINGS['server_port']
        )
        
        # Force extended query mode
        request.SetMatchMode(Sphinx::Client::SPH_MATCH_EXTENDED) 
      
        offset, limit = opts['per_page'] * (opts['page'] - 1), opts['per_page']
        
        request.SetLimits offset, limit, [offset + limit, MAX_MATCHES].min
        request.SetSortMode SPHINX_CLIENT_PARAMS['sort_mode'][opts['sort_mode']], opts['sort_by'].to_s
      
        if weights = opts['weights']
          # Order the weights hash according to the field order for Sphinx, and set the missing fields to 1.0
          request.SetWeights(Fields.instance.types.select{|n,t| t == 'text'}.map(&:first).sort.inject([]) do |array, field|
            array << (weights[field] || 1.0)
          end)
        end
      
        unless opts['class_names'].compact.empty?
          request.SetFilter 'class_id', opts['class_names'].map{|m| MODELS_TO_IDS[m.to_s]}
        end        
      
        # Extract ranged raw filters 
        # Some of this mangling might not be necessary
        opts['filters'].each do |field, value|
          begin
            case value
              when Fixnum, Float, BigDecimal, NilClass, Array
                request.SetFilter field, Array(value)
              when Range
                min, max = [value.begin, value.end].map do |x|
                  x._to_numeric
                end
                raise NoMethodError unless min <=> max and max <=> min
                min, max = max, min if min > max
                request.SetFilterRange field, min, max
              when String
                opts['parsed_query'] << " @#{field} #{value}"
              else
                raise NoMethodError
            end
          rescue NoMethodError => e
            raise Sphinx::SphinxArgumentError, "filter: #{field.inspect}:#{value.inspect} is invalid"
          end
        end
        
        request
      end    
      
      def get_subtotals(original_request, query)
        request = original_request._deep_dup
        request.instance_eval { @filters.delete_if {|f| f['attr'] == 'class_id'} }
        
        facets = get_facets(request, query, 'class_id')
        
        # Not using the standard facet caching here
        Hash[*(MODELS_TO_IDS.map do |klass, id|
          [klass, facets[id] || 0]
        end.flatten)]
      end
      
      def get_facets(original_request, query, original_facet)
        request, facet = original_request._deep_dup, original_facet        
        facet += "_facet" if Fields.instance.types[original_facet] == 'text'            
        
        raise UsageError, "Field #{original_facet} does not exist or was not configured for faceting" unless Fields.instance.types[facet]

        # Set the facet query parameter and modify per-page setting so we snag all the facets
        request.SetGroupBy(facet, Sphinx::Client::SPH_GROUPBY_ATTR, '@count desc')
        limit = self.class.client_options['max_facets']
        request.SetLimits 0, limit, [limit, MAX_MATCHES].min
        
        # Run the query
        matches = request.Query(query)['matches']
                
        # Map the facets back to something sane
        facets = {}
        matches.each do |match|
          match = match.last['attrs'] # :(
          raise ResponseError if facets[match['@groupby']]
          facets[match['@groupby']] = match['@count']
        end
                
        # Invert crc's, if we have them
        reverse_map_facets(facets, original_facet)
      end
      
      def reverse_map_facets(facets, facet) 
        facets = facets.dup
      
        if Fields.instance.types[facet] == 'text'        
          # Apply the map, rebuilding if the cache is missing or out-of-date
          facets = Hash[*(facets.map do |crc, value|
            rebuild_facet_cache(facet) unless FACET_CACHE[facet] and FACET_CACHE[facet].has_key?(crc)
            [FACET_CACHE[facet][crc], value]
          end.flatten)]
        end
        
        facets        
      end
      
      def rebuild_facet_cache(facet)
        # Cache the reverse CRC map for the textual facet if it hasn't been done yet
        # XXX not necessarily optimal since it requires a direct DB hit once per mongrel
        Ultrasphinx.say "caching CRC reverse map for text facet #{facet}"
        
        Fields.instance.classes[facet].each do |klass|
          # you can only use a facet from your own self right now; no includes allowed
          field = (MODEL_CONFIGURATION[klass.name]['fields'].detect do |field_hash|
            field_hash['as'] == facet
          end)['field']
      
          klass.connection.execute("SELECT #{field} AS value, CRC32(#{field}) AS crc FROM #{klass.table_name} GROUP BY #{field}").each_hash do |hash|
            (FACET_CACHE[facet] ||= {})[hash['crc'].to_i] = hash['value']
          end                            
        end
        FACET_CACHE[facet]
      end

      def reify_results(sphinx_ids)
    
        # Order by position and then toss the rest of the data
        sphinx_ids = sphinx_ids.sort_by do |key, value| 
          value['index'] or raise ConfigurationError, "Your Sphinx client is not properly patched."
        end.map(&:first)
    
        # Inverse-modulus map the sphinx ids to the table-specific ids
        ids = Hash.new([])
        sphinx_ids.each do |id|
          ids[MODELS_TO_IDS.invert[id % MODELS_TO_IDS.size]] += [id / MODELS_TO_IDS.size] # yay math
        end
        raise Sphinx::SphinxResponseError, "impossible document id in query result" unless ids.values.flatten.size == sphinx_ids.size
    
        # Fetch them for real
        results = []
        ids.each do |model, id_set|
          klass = model.constantize
          
          finder = self.class.client_options['finder_methods'].detect do |method_name|
            klass.respond_to? method_name
          end
          
          # Ultrasphinx.say "using #{klass.name}.#{finder} as finder method"
    
          begin
            # XXX Does not use Memcached's multiget
            results += case instances = id_set.map { |id| klass.send(finder, id) }
              when Hash
                instances.values
              when Array
                instances
              else
                Array(instances)
            end
          rescue ActiveRecord::ActiveRecordError => e
            raise Sphinx::SphinxResponseError, e.inspect
          end
        end
    
        # Put them back in order
        results.sort_by do |r| 
          raise Sphinx::SphinxResponseError, "Bogus ActiveRecord id for #{r.class}:#{r.id}" unless r.id
          
          model_index = MODELS_TO_IDS[r.class.base_class.name]
          raise UsageError, "#{r.class.base_class} is not an indexed class. Maybe you indexed an STI child class instead of the base class?" unless model_index
          
          index = (sphinx_ids.index(sphinx_id = r.id * MODELS_TO_IDS.size + model_index))
          raise Sphinx::SphinxResponseError, "Bogus reverse id for #{r.class}:#{r.id} (Sphinx:#{sphinx_id})" unless index
          
          index / sphinx_ids.size.to_f
        end
        
        # Add an accessor for absolute search rank for each record
        results.each_with_index do |r, index|
          i = per_page * (current_page - 1) + index
          r._metaclass.send('define_method', 'result_index') { i }
        end
        
        results        
      end  

      
      def strip_bogus_characters(s)
        # Used to remove some garbage before highlighting
        s.gsub(/<.*?>|\.\.\.|\342\200\246|\n|\r/, " ").gsub(/http.*?( |$)/, ' ') if s
      end
      
      def strip_query_commands(s)
        # XXX Hack for query commands, since sphinx doesn't intelligently parse the query in excerpt mode
        # Also removes apostrophes in the middle of words so that they don't get split in two.
        s.gsub(/(^|\s)(AND|OR|NOT|\@\w+)(\s|$)/i, "").gsub(/(\w)\'(\w)/, '\1\2')
      end 
    
    end
  end  
end