module Picky

  module Sources

    # Describes a database source. Needs a SELECT statement
    # (with id in it), and a file option or the options from an AR config file.
    #
    # The select statement can be as complicated as you want,
    # as long as it has an id in it and as long as it can be
    # used in a CREATE TABLE AS statement.
    # (working on that last one)
    #
    # Examples:
    #  Sources::DB.new('SELECT id, title, author, year FROM books') # Uses the config from app/db.yml by default.
    #  Sources::DB.new('SELECT id, title, author, year FROM books', file: 'app/some_db.yml')
    #  Sources::DB.new('SELECT b.id, b.title, b.author, b.publishing_year as year FROM books b INNER JOIN ON ...', file: 'app/some_db.yml')
    #  Sources::DB.new('SELECT id, title, author, year FROM books', adapter: 'mysql', host:'localhost', ...)
    #
    class DB < Base

      # The select statement that was passed in.
      #
      attr_reader :select_statement

      # The database adapter.
      #
      attr_reader :database

      # The database connection options that were either passed in or loaded from the given file.
      #
      attr_reader :connection_options, :options

      @@traversal_id = :__picky_id

      def initialize select_statement, options = { file: 'app/db.yml' }
        check_gem

        @select_statement = select_statement
        @database         = create_database_adapter
        @options          = options
      end

      # Tries to require the active_record gem.
      #
      def check_gem # :nodoc:
        require 'active_record'
      rescue LoadError
        warn_gem_missing 'active_record', 'the (ActiveRecord) DB source'
        exit 1
      end

      def to_s
        parameters = [select_statement.inspect]
        parameters << options unless options.empty?
        %Q{#{self.class.name}(#{parameters.join(', ')})}
      end

      # Creates a database adapter for use with this source.
      #
      def create_database_adapter # :nodoc:
        # THINK Do not use ActiveRecord directly? Use set_table_name etc?
        #
        adapter_class = Class.new ActiveRecord::Base
        adapter_class.abstract_class = true
        adapter_class
      end

      # Configure the backend.
      #
      # Options:
      #  Either
      #  * file => 'some/filename.yml' # With an active record configuration.
      #  Or
      #  * The configuration as a hash.
      #
      def configure options # :nodoc:
        @connection_options = if filename = options[:file]
          File.open(File.join(PICKY_ROOT, filename)) { |file| YAML::load(file) }
        else
          options
        end
        self
      end

      # Connect the backend.
      #
      # Will raise unless connection options have been given.
      #
      def connect_backend
        configure @options
        raise "Database backend not configured" unless connection_options
        database.establish_connection connection_options
      end

      # Take a snapshot of the data.
      #
      # Uses CREATE TABLE AS with the given SELECT statement to create a snapshot of the data.
      #
      def take_snapshot index
        timed_exclaim %Q{  "#{index.identifier}": Taking snapshot of database data.}

        origin = snapshot_table_name index.name
        on_database = database.connection

        # Drop the table if it exists.
        #
        on_database.drop_table origin if on_database.table_exists?(origin)

        # The adapters currently do not support this.
        #
        on_database.execute "CREATE TABLE #{origin} AS #{select_statement}"

        # Add a column that Picky uses to traverse the table's entries.
        #
        on_database.add_column origin, @@traversal_id, :primary_key, :null => :false

        # Execute any special queries this index needs executed.
        #
        on_database.execute index.after_indexing if index.after_indexing
      end

      # Counts all the entries that are used for the index.
      #
      def count index_name
        database.connection.select_value("SELECT COUNT(#{@@traversal_id}) FROM #{snapshot_table_name(index_name)}").to_i
      end

      # The name of the snapshot table created by Picky.
      #
      def snapshot_table_name index_name
        "picky_#{index_name}_index"
      end

      # Harvests the data to index in chunks.
      #
      def harvest category, &block
        (0..count(category.index_name)).step(chunksize) do |offset|
          get_data category, offset, &block
        end
      end

      # Gets the data from the backend.
      #
      def get_data category, offset, &block # :nodoc:
        select_statement = harvest_statement_with_offset category, offset

        # THINK Not really nice like this. Rewrite if possible.
        #
        if database.connection.adapter_name == "PostgreSQL"
          id_key   = 'id'
          text_key = category.from.to_s
          database.connection.execute(select_statement).each do |hash|
            id, text = hash.values_at id_key, text_key
            yield id, text if text
          end
        else
          database.connection.execute(select_statement).each do |id, text|
            yield id, text if text
          end
        end
      end

      # Builds a harvest statement for getting data to index.
      #
      def harvest_statement_with_offset category, offset
        statement = harvest_statement category

        statement += statement.include?('WHERE') ? ' AND' : ' WHERE'

        "#{statement} st.#{@@traversal_id} > #{offset} LIMIT #{chunksize}"
      end

      # The harvest statement used to pull data from the snapshot table.
      #
      def harvest_statement category
        "SELECT id, #{category.from} FROM #{snapshot_table_name(category.index_name)} st"
      end

      # The amount of records that are loaded each chunk.
      #
      def chunksize
        25_000
      end

    end

  end

end