require "pg_column_byte_packer/version"
require "pg_query"

module PgColumnBytePacker
  def self.sql_type_alignment_cache
    @@sql_type_alignment_cache ||= {}
  end

  def self.ordering_key_for_column(connection:, name:, sql_type:, type_schema: nil, primary_key:, nullable:, has_default:)
    alignment = PgColumnBytePacker.sql_type_alignment_cache[sql_type] ||= (
      if type_schema.nil?
        fake_query = "CREATE TABLE t(c #{sql_type});"
        parsed = begin
           PgQuery.parse(fake_query)
         rescue PgQuery::ParseError
           nil
         end

        if parsed && (column_def = parsed.tree[0]["RawStmt"]["stmt"]["CreateStmt"]["tableElts"][0]["ColumnDef"])
          type_identifiers = column_def["typeName"]["TypeName"]["names"].map { |s| s["String"]["str"] }
          case type_identifiers.size
          when 1
            # Do nothing; we already have a bare type.
          when 2
            type_schema, bare_type = type_identifiers
          else
            raise ArgumentError, "Unexpected number of identifiers in type declaration for column: `#{name}`, identifiers: #{type_identifiers.inspect}"
          end
        end
      end

      bare_type = if type_schema
        if sql_type.start_with?("#{type_schema}.")
          sql_type.sub("#{type_schema}.", "")
        elsif sql_type.start_with?("\"#{type_schema}\".")
          sql_type.sub("\"#{type_schema}\".", "")
        else
          sql_type
        end
      else
        sql_type
      end

      # Ignore array designations. This seems like something we could
      # so with the parsing above, and we could, and, in fact, that
      # would also almost certain allow us to rip out most of the
      # ActiveRecord generate alias type name matching below, but
      # it would also mean a more thorough refactor below for types
      # with size designations (e.g., when ActiveRecord generates
      # "float(23)"). So we use this simple regex cleanup for now.
      bare_type = bare_type.sub(/(\[\])+\Z/, "")

      # Sort out the alignment. Most of the type name matching is to
      # support the naming variants that ActiveRecord generates (often
      # they're aliases, like "integer", for which PostgreSQL internally
      # has a different canonical name, like "int4"). There are also
      # a few cases we have to handle where the output from pgdump
      # doesn't match the canonical name in pg_type; e.g., "float8" is
      # canonical, but pgdump outputs "double precision".
      case bare_type
      when "bigint", "double precision", /\Atimestamp.*/, /\Abigserial( primary key)?/
        8 # Actual alignment for these types.
      when "integer", "date", "decimal", "real", /\Aserial( primary key)?/
        4 # Actual alignment for these types.
      when "bytea"
        # These types generally have an alignment of 4, but values of at most 127 bytes
        # long they are optimized into 2 byte alignment.
        # Since we'd expect any binary fields to be relatively long, we'll assume they
        # won't fit into the optimized case.
        4
      when "text", "citext", "character varying"
        # These types generally have an alignment of 4, but values of at most 127 bytes
        # long they are optimized into 2 byte alignment.
        # Since we don't have a good heuristic for determining which columns are likely
        # to be long or short, we currently just slot them all after the columns we
        # believe will always be long.
        # If desired we could also differentiate on length limits if set.
        3
      when /\Acharacter varying\(\d+\)/
        if (limit = /\Acharacter varying\((\d+)\)/.match(sql_type)[1])
          if limit.to_i <= 127
            2
          else
            4
          end
        end
      when /\Afloat(\(\d+\))?/
        precision_match = /\Afloat\((\d+)\)?/.match(sql_type)
        if precision_match
          # Precision here is a number of binary digits;
          # see https://www.postgresql.org/docs/10/datatype-numeric.html
          # for more information.
          if precision_match[1].to_i >= 25
            8 # Double precision
          else
            4 # Real
          end
        else
          8 # Default is double precision
        end
      when "smallint", "boolean"
        2 # Actual alignment for these types.
      else
        typtype, typalign = connection.select_rows(<<~SQL, "Type Lookup").first
          SELECT typ.typtype, typ.typalign
          FROM pg_type typ
          JOIN pg_namespace nsp ON nsp.oid = typ.typnamespace
          WHERE typname = '#{connection.quote_string(bare_type)}'
            #{type_schema ? "AND nsp.nspname = '#{connection.quote_string(type_schema)}'" : ""}
        SQL

        if typtype.nil?
          raise ArgumentError, "Got sql_type: `#{sql_type}` and type_schema: `#{type_schema}` but was unable to find entry in pg_type."
        end

        if typtype == "e"
          4
        else
          case typalign
          when "c"
            0
          when "s"
            2
          when "i"
            4
          when "d"
            8
          else
            0
          end
        end
      end
    )

    # Ordering components in order of most importance to least importance.
    [
      -alignment, # Sort alignment descending.
      primary_key ? 0 : 1, # Sort PRIMARY KEY first.
      nullable ? 1 : 0, # Sort NOT NULL first.
      has_default && nullable ? 0 : 1, # Sort DEFAULT first (but only when also nullable).
      name, # Sort name ascending.
    ]
  end
end

require "pg_column_byte_packer/schema_statements"
require "pg_column_byte_packer/pg_dump"