module Ultrasphinx
  class Search
    module Internals

      # These methods are kept stateless to ease debugging
      
      private
      
      def build_request_with_options opts
      
        request = Riddle::Client.new
        request.instance_eval do          
          @server = Ultrasphinx::CLIENT_SETTINGS['server_host']
          @port = Ultrasphinx::CLIENT_SETTINGS['server_port']          
          @match_mode = :extended # Force extended query mode
          @offset = opts['per_page'] * (opts['page'] - 1)
          @limit = opts['per_page']
          @max_matches = [@offset + @limit, MAX_MATCHES].min
        end
          
        # Sorting
        sort_by = opts['sort_by']
        unless sort_by.blank?
          if opts['sort_mode'].to_s == 'relevance'
            # If you're sorting by a field you don't want 'relevance' order
            raise UsageError, "Sort mode 'relevance' is not valid with a sort_by field"
          end
          request.sort_by = sort_by.to_s
        end
        
        if sort_mode = SPHINX_CLIENT_PARAMS['sort_mode'][opts['sort_mode']]
          request.sort_mode = sort_mode
        else
          raise UsageError, "Sort mode #{opts['sort_mode'].inspect} is invalid"
        end        

        # Weighting
        weights = opts['weights']
        if weights.any?
          # Order according to the field order for Sphinx, and set the missing fields to 1.0
          request.weights = (Fields.instance.types.select{|n,t| t == 'text'}.map(&:first).sort.inject([]) do |array, field|
            array << (weights[field] || 1.0)
          end)
        end
        
        # Class names
        unless Array(opts['class_names']).empty?
          request.filters << Riddle::Client::Filter.new(
            'class_id', 
            (opts['class_names'].map do |model| 
              MODELS_TO_IDS[model.to_s] or 
                MODELS_TO_IDS[model.to_s.constantize.base_class.to_s] or 
                raise UsageError, "Invalid class name #{model.inspect}"
            end), 
            false)
        end          

        # Extract raw filters 
        # XXX We should coerce based on the Field values, not on the class
        Array(opts['filters']).each do |field, value|          
          field = field.to_s
          unless Fields.instance.types[field]
            raise UsageError, "field #{field.inspect} is invalid"
          end
          begin
            case value
              when Integer, Float, BigDecimal, NilClass, Array
                # Just bomb the filter in there
                request.filters << Riddle::Client::Filter.new(field, Array(value), false)
              when Range
                # Make sure ranges point in the right direction
                min, max = [value.begin, value.end].map {|x| x._to_numeric }
                raise NoMethodError unless min <=> max and max <=> min
                min, max = max, min if min > max
                request.filters << Riddle::Client::Filter.new(field, min..max, false)
              when String
                # XXX Hack to move text filters into the query
                opts['parsed_query'] << " @#{field} #{value}"
              else
                raise NoMethodError
            end
          rescue NoMethodError => e
            raise UsageError, "filter value #{value.inspect} for field #{field.inspect} is invalid"
          end
        end
        
        request
      end    
      
      def get_subtotals(original_request, query)
        request = original_request._deep_dup
        request.instance_eval { @filters.delete_if {|filter| filter.attribute == 'class_id'} }
        
        facets = get_facets(request, query, 'class_id')
        
        # Not using the standard facet caching here
        Hash[*(MODELS_TO_IDS.map do |klass, id|
          [klass, facets[id] || 0]
        end.flatten)]
      end
      
      def get_facets(original_request, query, original_facet)
        request, facet = original_request._deep_dup, original_facet        
        facet += "_facet" if Fields.instance.types[original_facet] == 'text'            
        
        unless Fields.instance.types[facet]
          if facet == original_facet
            raise UsageError, "Field #{original_facet} does not exist" 
          else
            raise UsageError, "Field #{original_facet} is a text field, but was not configured for text faceting"
          end
        end
        
        # Set the facet query parameter and modify per-page setting so we snag all the facets
        request.instance_eval do
          @group_by = facet
          @group_function = :attr
          @group_clauses = '@count desc'
          @offset = 0
          @limit = Ultrasphinx::Search.client_options['max_facets']
          @max_matches = [@limit, MAX_MATCHES].min
        end
        
        # Run the query
        begin
          matches = request.query(query, UNIFIED_INDEX_NAME)[:matches]
        rescue DaemonError
          raise ConfigurationError, "Index seems out of date. Run 'rake ultrasphinx:index'"
        end
                
        # Map the facets back to something sane
        facets = {}
        matches.each do |match|
          attributes = match[:attributes]
          raise DaemonError if facets[attributes['@groupby']]
          facets[attributes['@groupby']] = attributes['@count']
        end
                
        # Invert hash's, if we have them
        reverse_map_facets(facets, original_facet)
      end
      
      def reverse_map_facets(facets, facet) 
        facets = facets.dup
      
        if Fields.instance.types[facet] == 'text'        
          # Apply the map, rebuilding if the cache is missing or out-of-date
          facets = Hash[*(facets.map do |hash, value|
            rebuild_facet_cache(facet) unless FACET_CACHE[facet] and FACET_CACHE[facet].has_key?(hash)
            [FACET_CACHE[facet][hash], value]
          end.flatten)]
        end
        
        facets        
      end
      
      def rebuild_facet_cache(facet)
        # Cache the reverse hash map for the textual facet if it hasn't been done yet
        # XXX not necessarily optimal since it requires a direct DB hit once per mongrel
        Ultrasphinx.say "caching hash reverse map for text facet #{facet}"
        
        Fields.instance.classes[facet].each do |klass|
          # you can only use a facet from your own self right now; no includes allowed
          field = MODEL_CONFIGURATION[klass.name]['fields'].detect do |field_hash|
            field_hash['as'] == facet
          end
                    
          raise ConfigurationError, "Model #{klass.name} has the requested '#{facet}' field, but it was not configured for faceting" unless field
          field = field['field']
          
          if hash_stored_procedure = ADAPTER_SQL_FUNCTIONS[ADAPTER]['hash_stored_procedure']
            klass.connection.execute(hash_stored_procedure)
          end
                
          klass.connection.execute("SELECT #{field} AS value, #{ADAPTER_SQL_FUNCTIONS[ADAPTER]['hash']._interpolate(field)} AS hash FROM #{klass.table_name} GROUP BY #{field}").each_hash do |hash|
            (FACET_CACHE[facet] ||= {})[hash['hash'].to_i] = hash['value']
          end                            
        end
        FACET_CACHE[facet]
      end
      
      # Inverse-modulus map the Sphinx ids to the table-specific ids
      def convert_sphinx_ids(sphinx_ids)    
        sphinx_ids.sort_by do |item| 
          item[:index]
        end.map do |item|
          class_name = MODELS_TO_IDS.invert[item[:doc] % MODELS_TO_IDS.size]
          raise DaemonError, "Impossible Sphinx document id #{item[:doc]} in query result" unless class_name
          [class_name, item[:doc] / MODELS_TO_IDS.size]
        end
      end

      # Fetch them for real
      def reify_results(ids)
        results = []
        
        ids.each do |klass_name, id|
        
          # What class and class method are we using to get the record?
          klass = klass_name.constantize
          finder = Ultrasphinx::Search.client_options['finder_methods'].detect do |method_name|
            klass.respond_to? method_name
          end
          
          # Load it
          record = begin
            # XXX Does not use Memcached's multiget, or MySQL's, for that matter
            klass.send(finder, id)
          rescue ActiveRecord::RecordNotFound => e
            if Ultrasphinx::Search.client_options['ignore_missing_records']
              # XXX Should maybe adjust the total_found count, etc
            else
              raise(e)
            end
          end  
          
          # Add it to the list. Cache_fu does funny things with returned record organization.
          results += record.is_a?(Hash) ? record.values : Array(record)                
        end
    
        # Add an accessor for absolute search rank for each record (does anyone use this?)
        results.each_with_index do |result, index|
          i = per_page * (current_page - 1) + index
          result._metaclass.send('define_method', 'result_index') { i }
        end
        
        results        
      end  
      
      def perform_action_with_retries
        tries = 0
        begin
          yield
        rescue NoMethodError,
            Riddle::VersionError,
            Riddle::ResponseError,
            Errno::ECONNREFUSED, 
            Errno::ECONNRESET, 
            Errno::EPIPE => e
          tries += 1
          if tries <= Ultrasphinx::Search.client_options['max_retries']
            say "restarting query (#{tries} attempts already) (#{e})"            
            sleep(Ultrasphinx::Search.client_options['retry_sleep_time']) 
            retry
          else
            say "query failed"
            raise DaemonError, e.to_s
          end
        end
      end
      
      def strip_bogus_characters(s)
        # Used to remove some garbage before highlighting
        s.gsub(/<.*?>|\.\.\.|\342\200\246|\n|\r/, " ").gsub(/http.*?( |$)/, ' ') if s
      end
      
      def strip_query_commands(s)
        # XXX Hack for query commands, since Sphinx doesn't intelligently parse the query in excerpt mode
        # Also removes apostrophes in the middle of words so that they don't get split in two.
        s.gsub(/(^|\s)(AND|OR|NOT|\@\w+)(\s|$)/i, "").gsub(/(\w)\'(\w)/, '\1\2')
      end 
    
    end
  end  
end