# ActsAsIndexed
# Copyright (c) 2007 - 2011 Douglas F Shearer.
# http://douglasfshearer.com
# Distributed under the MIT license as included with this plugin.

module ActsAsIndexed #:nodoc:
  class SearchAtom

    # Contains a hash of records.
    # { 'record_id' => [pos1, pos2, pos] }
    #--
    # Weighting:
    # http://www.perlmonks.com/index.pl?node_id=27509
    # W(T, D) = tf(T, D) * log ( DN / df(T))
    # weighting = frequency_in_this_record * log (total_number_of_records / number_of_matching_records)

    attr_reader :records

    def initialize(records={})
      @records = records
    end

    # Returns true if the given record is present.
    def include_record?(record_id)
      @records.include?(record_id)
    end

    # Adds +record_id+ to the stored records.
    def add_record(record_id)
      @records[record_id] = [] unless include_record?(record_id)
    end

    # Adds +pos+ to the array of positions for +record_id+.
    def add_position(record_id, pos)
      add_record(record_id)
      @records[record_id] << pos
    end

    # Returns all record IDs stored in this Atom.
    def record_ids
      @records.keys
    end

    # Returns an array of positions for +record_id+ stored in this Atom.
    def positions(record_id)
      @records[record_id]
    end

    # Removes +record_id+ from this Atom.
    def remove_record(record_id)
      @records.delete(record_id)
    end

    # Creates a new SearchAtom with the combined records from self and other
    def +(other)
      SearchAtom.new(@records.clone.merge!(other.records) { |key, _old, _new|
                                                            _old + _new
                                                          })
    end

    # Creates a new SearchAtom with records in other removed from self.
    def -(other)
      records = @records.clone.reject { |name, records| other.records.include?(name) }
      SearchAtom.new(records)
    end

    # Returns at atom containing the records and positions of +self+ preceded by +former+
    # "former latter" or "big dog" where "big" is the former and "dog" is the latter.
    def preceded_by(former)
      matches = SearchAtom.new
      latter = {}
      former.record_ids.each do |rid|
        latter[rid] = @records[rid] if @records[rid]
      end
      # Iterate over each record in latter.
      latter.each do |record_id,pos|

        # Iterate over each position.
        pos.each do |p|
          # Check if previous position is in former.
          if former.include_position?(record_id,p-1)
            matches.add_record(record_id) unless matches.include_record?(record_id)
            matches.add_position(record_id,p)
          end
        end

      end
      matches
    end

    # Returns a hash of record_ids and weightings for each record in the
    # atom.
    def weightings(records_size)
      out = {}
      @records.each do |r_id, pos|

        # Fixes a bug when the records_size is zero. i.e. The only record
        # contaning the word has been deleted.
        if records_size < 1
          out[r_id] = 0.0
          next
        end

        # weighting = frequency * log (records.size / records_with_atom)
        ## parndt 2010/05/03 changed to records_size.to_f to avoid -Infinity Errno::ERANGE exceptions
        ## which would happen for example Math.log(1 / 20) == -Infinity but Math.log(1.0 / 20) == -2.99573227355399
        out[r_id] = pos.size * Math.log(records_size.to_f / @records.size)
      end
      out
    end

    protected

    def include_position?(record_id,pos)
      @records[record_id].include?(pos)
    end

  end
end