module Sequel
  class Dataset
    COMMA_SEPARATOR = ', '.freeze
    COUNT_OF_ALL_AS_COUNT = SQL::Function.new(:count, LiteralString.new('*'.freeze)).as(:count)
    ARRAY_ACCESS_ERROR_MSG = 'You cannot call Dataset#[] with an integer or with no arguments.'.freeze
    ARG_BLOCK_ERROR_MSG = 'Must use either an argument or a block, not both'.freeze
    IMPORT_ERROR_MSG = 'Using Sequel::Dataset#import an empty column array is not allowed'.freeze

    # Returns the first record matching the conditions. Examples:
    #
    #   ds[:id=>1] => {:id=1}
    def [](*conditions)
      raise(Error, ARRAY_ACCESS_ERROR_MSG) if (conditions.length == 1 and conditions.first.is_a?(Integer)) or conditions.length == 0
      first(*conditions)
    end

    # Update all records matching the conditions
    # with the values specified. Examples:
    #
    #   ds[:id=>1] = {:id=>2} # SQL: UPDATE ... SET id = 2 WHERE id = 1
    def []=(conditions, values)
      filter(conditions).update(values)
    end

    # Returns the average value for the given column.
    def avg(column)
      aggregate_dataset.get{avg(column)}
    end
    
    # Returns true if no records exist in the dataset, false otherwise
    def empty?
      get(1).nil?
    end
    
    # If a integer argument is
    # given, it is interpreted as a limit, and then returns all 
    # matching records up to that limit.  If no argument is passed,
    # it returns the first matching record.  If any other type of
    # argument(s) is passed, it is given to filter and the
    # first matching record is returned. If a block is given, it is used
    # to filter the dataset before returning anything.  Examples:
    # 
    #   ds.first => {:id=>7}
    #   ds.first(2) => [{:id=>6}, {:id=>4}]
    #   ds.order(:id).first(2) => [{:id=>1}, {:id=>2}]
    #   ds.first(:id=>2) => {:id=>2}
    #   ds.first("id = 3") => {:id=>3}
    #   ds.first("id = ?", 4) => {:id=>4}
    #   ds.first{|o| o.id > 2} => {:id=>5}
    #   ds.order(:id).first{|o| o.id > 2} => {:id=>3}
    #   ds.first{|o| o.id > 2} => {:id=>5}
    #   ds.first("id > ?", 4){|o| o.id < 6} => {:id=>5}
    #   ds.order(:id).first(2){|o| o.id < 2} => [{:id=>1}]
    def first(*args, &block)
      ds = block ? filter(&block) : self

      if args.empty?
        ds.single_record
      else
        args = (args.size == 1) ? args.first : args
        if Integer === args
          ds.limit(args).all
        else
          ds.filter(args).single_record
        end
      end
    end

    # Return the column value for the first matching record in the dataset.
    # Raises an error if both an argument and block is given.
    #
    #   ds.get(:id)
    #   ds.get{|o| o.sum(:id)}
    def get(column=nil, &block)
      if column
        raise(Error, ARG_BLOCK_ERROR_MSG) if block
        select(column).single_value
      else
        select(&block).single_value
      end
    end

    # Returns a dataset grouped by the given column with count by group,
    # order by the count of records (in ascending order). Column aliases
    # may be supplied, and will be included in the select clause.
    #
    # Examples:
    #
    #   ds.group_and_count(:name).all => [{:name=>'a', :count=>1}, ...]
    #   ds.group_and_count(:first_name, :last_name).all => [{:first_name=>'a', :last_name=>'b', :count=>1}, ...]
    #   ds.group_and_count(:first_name___name).all => [{:name=>'a', :count=>1}, ...]
    def group_and_count(*columns)
      group(*columns.map{|c| unaliased_identifier(c)}).select(*(columns + [COUNT_OF_ALL_AS_COUNT])).order(:count)
    end

    # Inserts multiple records into the associated table. This method can be
    # to efficiently insert a large amounts of records into a table. Inserts
    # are automatically wrapped in a transaction.
    # 
    # This method is called with a columns array and an array of value arrays:
    #
    #   dataset.import([:x, :y], [[1, 2], [3, 4]])
    #
    # This method also accepts a dataset instead of an array of value arrays:
    #
    #   dataset.import([:x, :y], other_dataset.select(:a___x, :b___y))
    #
    # The method also accepts a :slice or :commit_every option that specifies
    # the number of records to insert per transaction. This is useful especially
    # when inserting a large number of records, e.g.:
    #
    #   # this will commit every 50 records
    #   dataset.import([:x, :y], [[1, 2], [3, 4], ...], :slice => 50)
    def import(columns, values, opts={})
      return @db.transaction{insert(columns, values)} if values.is_a?(Dataset)

      return if values.empty?
      raise(Error, IMPORT_ERROR_MSG) if columns.empty?
      
      if slice_size = opts[:commit_every] || opts[:slice]
        offset = 0
        loop do
          @db.transaction(opts){multi_insert_sql(columns, values[offset, slice_size]).each{|st| execute_dui(st)}}
          offset += slice_size
          break if offset >= values.length
        end
      else
        statements = multi_insert_sql(columns, values)
        @db.transaction{statements.each{|st| execute_dui(st)}}
      end
    end
    
    # Returns the interval between minimum and maximum values for the given 
    # column.
    def interval(column)
      aggregate_dataset.get{max(column) - min(column)}
    end

    # Reverses the order and then runs first.  Note that this
    # will not necessarily give you the last record in the dataset,
    # unless you have an unambiguous order.  If there is not
    # currently an order for this dataset, raises an Error.
    def last(*args, &block)
      raise(Error, 'No order specified') unless @opts[:order]
      reverse.first(*args, &block)
    end
    
    # Maps column values for each record in the dataset (if a column name is
    # given), or performs the stock mapping functionality of Enumerable. 
    # Raises an error if both an argument and block are given. Examples:
    #
    #   ds.map(:id) => [1, 2, 3, ...]
    #   ds.map{|r| r[:id] * 2} => [2, 4, 6, ...]
    def map(column=nil, &block)
      if column
        raise(Error, ARG_BLOCK_ERROR_MSG) if block
        super(){|r| r[column]}
      else
        super(&block)
      end
    end

    # Returns the maximum value for the given column.
    def max(column)
      aggregate_dataset.get{max(column)}
    end

    # Returns the minimum value for the given column.
    def min(column)
      aggregate_dataset.get{min(column)}
    end

    # This is a front end for import that allows you to submit an array of
    # hashes instead of arrays of columns and values:
    # 
    #   dataset.multi_insert([{:x => 1}, {:x => 2}])
    #
    # Be aware that all hashes should have the same keys if you use this calling method,
    # otherwise some columns could be missed or set to null instead of to default
    # values.
    #
    # You can also use the :slice or :commit_every option that import accepts.
    def multi_insert(hashes, opts={})
      return if hashes.empty?
      columns = hashes.first.keys
      import(columns, hashes.map{|h| columns.map{|c| h[c]}}, opts)
    end

    # Returns a Range object made from the minimum and maximum values for the
    # given column.
    def range(column)
      if r = aggregate_dataset.select{[min(column).as(v1), max(column).as(v2)]}.first
        (r[:v1]..r[:v2])
      end
    end
    
    # Returns a hash with key_column values as keys and value_column values as
    # values.  Similar to to_hash, but only selects the two columns.
    def select_hash(key_column, value_column)
      select(key_column, value_column).to_hash(hash_key_symbol(key_column), hash_key_symbol(value_column))
    end
    
    # Selects the column given (either as an argument or as a block), and
    # returns an array of all values of that column in the dataset.  If you
    # give a block argument that returns an array with multiple entries,
    # the contents of the resulting array are undefined.
    def select_map(column=nil, &block)
      ds = naked.ungraphed
      ds = if column
        raise(Error, ARG_BLOCK_ERROR_MSG) if block
        ds.select(column)
      else
        ds.select(&block)
      end
      ds.map{|r| r.values.first}
    end
    
    # The same as select_map, but in addition orders the array by the column.
    def select_order_map(column=nil, &block)
      ds = naked.ungraphed
      ds = if column
        raise(Error, ARG_BLOCK_ERROR_MSG) if block
        ds.select(column).order(unaliased_identifier(column))
      else
        ds.select(&block).order(&block)
      end
      ds.map{|r| r.values.first}
    end

    # Returns the first record in the dataset.
    def single_record
      clone(:limit=>1).each{|r| return r}
      nil
    end

    # Returns the first value of the first record in the dataset.
    # Returns nil if dataset is empty.
    def single_value
      if r = naked.ungraphed.single_record
        r.values.first
      end
    end
    
    # Returns the sum for the given column.
    def sum(column)
      aggregate_dataset.get{sum(column)}
    end

    # Returns a string in CSV format containing the dataset records. By 
    # default the CSV representation includes the column titles in the
    # first line. You can turn that off by passing false as the 
    # include_column_titles argument.
    #
    # This does not use a CSV library or handle quoting of values in
    # any way.  If any values in any of the rows could include commas or line
    # endings, you shouldn't use this.
    def to_csv(include_column_titles = true)
      n = naked
      cols = n.columns
      csv = ''
      csv << "#{cols.join(COMMA_SEPARATOR)}\r\n" if include_column_titles
      n.each{|r| csv << "#{cols.collect{|c| r[c]}.join(COMMA_SEPARATOR)}\r\n"}
      csv
    end
    
    # Returns a hash with one column used as key and another used as value.
    # If rows have duplicate values for the key column, the latter row(s)
    # will overwrite the value of the previous row(s). If the value_column
    # is not given or nil, uses the entire hash as the value.
    def to_hash(key_column, value_column = nil)
      inject({}) do |m, r|
        m[r[key_column]] = value_column ? r[value_column] : r
        m
      end
    end

    # Creates a unique table alias that hasn't already been used in the dataset.
    # table_alias can be any type of object accepted by alias_symbol.
    # The symbol returned will be the implicit alias in the argument,
    # possibly appended with "_N" if the implicit alias has already been
    # used, where N is an integer starting at 0 and increasing until an
    # unused one is found.
    def unused_table_alias(table_alias)
      table_alias = alias_symbol(table_alias)
      used_aliases = []
      used_aliases += opts[:from].map{|t| alias_symbol(t)} if opts[:from]
      used_aliases += opts[:join].map{|j| j.table_alias ? alias_alias_symbol(j.table_alias) : alias_symbol(j.table)} if opts[:join]
      if used_aliases.include?(table_alias)
        i = 0
        loop do
          ta = :"#{table_alias}_#{i}"
          return ta unless used_aliases.include?(ta)
          i += 1 
        end
      else
        table_alias
      end
    end

    private

    # Return a plain symbol given a potentially qualified or aliased symbol,
    # specifying the symbol that is likely to be used as the hash key
    # for the column when records are returned.
    def hash_key_symbol(s)
      raise(Error, "#{s.inspect} is not a symbol") unless s.is_a?(Symbol)
      _, c, a = split_symbol(s)
      (a || c).to_sym
    end

    # Return the unaliased part of the identifier.  Handles both
    # implicit aliases in symbols, as well as SQL::AliasedExpression
    # objects.  Other objects are returned as is.
    def unaliased_identifier(c)
      case c
      when Symbol
        c_table, column, _ = split_symbol(c)
        c_table ? column.to_sym.qualify(c_table) : column.to_sym
      when SQL::AliasedExpression
        c.expression
      else
        c
      end
    end
  end
end