module FlydataCore
module TableDef

class MysqlTableDef
  # Check and set the varchar(char) size which is converted from
  # length to byte size.
  # On Mysql the record size of varchar(char) is a length of characters.
  # ex) varchar(6) on mysql -> varchar(18) on flydata
  PROC_override_varchar = ->(type, mysql_type, flydata_type) do
    return type unless %w(char varchar).include?(mysql_type)
    if type =~ /\((\d+)\)/
      # expect 3 byte UTF-8 character
      "#{flydata_type}(#{$1.to_i * 3})"
    else
      raise "Invalid varchar type. It must be a bug... type:#{type}"
    end
  end

  PROC_override_varbinary = ->(type, mysql_type, flydata_type) do
    return type unless %w(binary varbinary).include?(mysql_type)
    if type =~ /\((\d+)\)/
      # expect 2 bytes for each original byte + 2 bytes for the prefix
      # ex) 4E5DFF => "0x4e5dff"
      "#{flydata_type}(#{$1.to_i * 2 + 2})"
    else
      raise "Invalid varbinary type. It must be a bug... type:#{type}"
    end
  end

  TYPE_MAP_M2F = {
    'bigint' => {type: 'int8', def_width: '20'},
    'binary' => {type: 'binary', def_width: '1', override: PROC_override_varbinary},
    'bit' => {type: 'bit', def_width: '1'},
    'blob' => {type: 'varbinary(65535)'},
    'bool' => {type: 'int1', def_width: '1'},
    'boolean' => {type: 'int1', def_width: '1'},
    'char' => {type: 'varchar', def_width: '1', override: PROC_override_varchar},
    'date' => {type: 'date'},
    'datetime' => {type: 'datetime'},
    'dec' => {type: 'numeric', def_width: '10,0'},
    'decimal' => {type: 'numeric', def_width: '10,0'},
    'double' => {type: 'float8'},
    'double precision' => {type: 'float8'},
    'enum' => {type: 'enum'},
    'fixed' => {type: 'numeric', def_width: '10,0'},
    'float' => {type: 'float4'},
    'int' => {type: 'int4', def_width: '11'},
    'integer' => {type: 'int4', def_width: '11'},
    'longblob' => {type: 'varbinary(4294967295)'},
    'longtext' => {type: 'text'},
    'mediumblob' => {type: 'varbinary(16777215)'},
    'mediumint' => {type: 'int3', def_width: '9'},
    'mediumtext' => {type: 'text'},
    'numeric' => {type: 'numeric', def_width: '10,0'},
    'set' => {type: 'set'},
    'smallint' => {type: 'int2', def_width: '6'},
    'text' => {type: 'text'},
    'time' => {type: 'time'},
    'timestamp' => {type: 'datetime'},
    'tinyblob' => {type: 'varbinary(255)'},
    'tinyint' => {type: 'int1', def_width: '4'},
    'tinytext' => {type: 'text'},
    'varbinary' => {type: 'varbinary', override: PROC_override_varbinary},
    'varchar' => {type: 'varchar', override: PROC_override_varchar},
    'year' => {type: 'year'},
  }

  def self.convert_to_flydata_type(type)
    TYPE_MAP_M2F.each do |mysql_type, type_hash|
      flydata_type = type_hash[:type]
      if /^#{mysql_type}\(|^#{mysql_type}$/.match(type)
        ret_type = type.gsub(/^#{mysql_type}/, flydata_type)
        if type_hash.has_key?(:def_width) && !/\(.+\)/.match(ret_type)
          ret_type += "(#{type_hash[:def_width]})"
        end
        if type_hash[:override]
          ret_type = type_hash[:override].call(ret_type, mysql_type, flydata_type)
        end
        return ret_type
      end
    end
    nil
  end

  def self.create(io, options = {})
    params = _create(io, options)
    params ? self.new(*params) : nil
  end

  def initialize(table_def, table_name, columns, column_def, default_charset,
                 default_charset_mysql, comment)
    @table_def = table_def
    @table_name = table_name
    @columns = columns
    @column_def = column_def
    @default_charset = default_charset
    @default_charset_mysql = default_charset_mysql
    @comment = comment
  end

  def self._create(io, options)
    table_def = ''
    table_name = nil
    columns = []
    column_def = {}
    default_charset = nil
    default_charset_mysql = nil
    comment = nil

    position = :before_create_table

    io.each_line do |line|
      case position
      when :before_create_table
        if line =~ /CREATE TABLE `(.*?)`/
          position = :in_create_table
          table_name = $1
          table_def += line.chomp
          next
        end

      when :in_create_table
        table_def += line.chomp

        stripped_line = line.strip
        # `col_smallint` smallint(6) DEFAULT NULL,
        if stripped_line.start_with?('`')
          column = parse_one_column_def(line)
          columns << column
          coldef = line.strip.gsub(/,$/, '')
          column_def[column[:column]] = coldef
        # PRIMARY KEY (`id`)
        elsif stripped_line.start_with?("PRIMARY KEY")
          parse_key(line, columns)
        #) ENGINE=InnoDB DEFAULT CHARSET=utf8 COMMENT='test table';
        elsif stripped_line.start_with?(')')
          if line =~ /DEFAULT CHARSET\s*=\s*([a-z0-9]+)/
            default_charset_mysql = $1
            default_charset = flydata_charset(default_charset_mysql)
          end
          comment = $1 if /COMMENT='((?:\\'|[^'])*)'/.match(line)
          position = :after_create_table
        elsif stripped_line.start_with?("KEY")
          # index creation.  No action required.
        elsif stripped_line.start_with?("CONSTRAINT")
          # constraint definition.  No acction required.
        elsif stripped_line.start_with?("UNIQUE KEY")
          parse_key(line, columns, :unique)
        else
          # "Unknown table definition. Skip. (#{line})"
        end

      when :after_create_table
        unless columns.any? {|column| column[:primary_key]} or options[:skip_primary_key_check]
          raise TableDefError, {error: "no primary key defined", table: table_name}
        end
        break
      end
    end
    position == :after_create_table ? [table_def, table_name, columns, column_def, default_charset, default_charset_mysql, comment] : nil
  end
  attr_reader :columns, :column_def, :table_name, :default_charset_mysql

  def to_flydata_tabledef
    tabledef = { table_name: @table_name,
                 columns: @columns,
               }
    tabledef[:default_charset] = @default_charset if @default_charset
    tabledef[:comment] = @comment if @comment

    tabledef
  end

  def self.parse_one_column_def(query)
    line = query.strip
    line = line[0..-2] if line.end_with?(',')
    pos = 0
    cond = :column_name
    column = {}

    while pos < line.length
      case cond
      when :column_name  #`column_name` ...
        pos = line.index(' ', 1)
        column[:column] = if line[0] == '`'
                          line[1..pos-2]
                        else
                          line[0..pos-1]
                        end
        cond = :column_type
        pos += 1
      when :column_type  #... formattype(,,,) ...
        pos += 1 until line[pos] != ' '
        start_pos = pos
        pos += 1 until line[pos].nil? || line[pos] =~ /\s|\(/

        # meta
        if line[pos] == '('
          #TODO: implement better parser
          pos = line.index(')', pos)
          pos += 1
        end

        # type
        type = line[start_pos..pos-1]
        column[:type] = convert_to_flydata_type(type)

        cond = :options
      when :options
        column[:type] += ' unsigned' if line =~ /unsigned/i
        column[:auto_increment] = true if line =~ /AUTO_INCREMENT/i
        column[:not_null] = true if line =~ /NOT NULL/i
        column[:unique] = true if line =~ /UNIQUE/i
        if /DEFAULT\s+((?:[bx]?'(?:\\'|[^'])*')|(?:[^'\s]+\b))/i.match(line)
          val = $1
          column[:default] = val == "NULL" ? nil : val
        end
        if /COMMENT\s+'(((?:\\'|[^'])*))'/i.match(line)
          column[:comment] = $1
        end
        if /CHARACTER SET\s+([a-z0-9]+)/i.match(line)
          column[:charset] = flydata_charset($1)
        end
        if block_given?
          column = yield(column, query, pos)
        end
        break
      else
        raise "Invalid condition. It must be a bug..."
      end
    end
    column
  end

  # 'binary' is a fixed length column type.  mysqldump exports a binary column
  # value at its full length by adding trailing 0's to the size of the column
  # (ex. '0xAB00920000' for type 'binary(5)')
  # FlyData Transport Format does not allow such paddings so they need to be
  # removed.  On the target database, padding may be added according to the
  # destination DB's convention.
  MYSQL_BINARY_PADDING_CHAR = '00'
  PROC_convert_binary_value = -> (value) do
    return nil unless value
    while value.end_with?(MYSQL_BINARY_PADDING_CHAR)
      value = value[0...-MYSQL_BINARY_PADDING_CHAR.size]
    end
    value
  end

  VALUE_CONVERTERS = {
    'binary' => PROC_convert_binary_value,
  }

  def self.convert_to_flydata_value(value, type)
    if converter = VALUE_CONVERTERS[type]
      value = converter.call(value)
    end
    value
  end

  # Charset naming conversion rule. mysql => ruby
  #
  # mysql
  #   http://dev.mysql.com/doc/refman/5.6/en/charset-charsets.html
  # mysql(supported CJK character sets)
  #   http://dev.mysql.com/doc/refman/5.6/en/faqs-cjk.html#qandaitem-A-11-1-1
  # For ruby, you can see encoding list with "Encoding.list"
  CHARSET_ENCODE_RULE = {
    "armscii8" => nil,
    "ascii"    => Encoding::US_ASCII,
    "big5"     => Encoding::Big5,
    "binary"   => Encoding::ASCII_8BIT,
    "cp1250"   => Encoding::Windows_1250,
    "cp1251"   => Encoding::Windows_1251,
    "cp1256"   => Encoding::Windows_1256,
    "cp1257"   => Encoding::Windows_1257,
    "cp850"    => Encoding::CP850,
    "cp852"    => Encoding::CP852,
    "cp866"    => Encoding::IBM866,
    "cp932"    => Encoding::Windows_31J,
    "dec8"     => nil,
    "eucjpms"  => Encoding::EucJP_ms,
    "euckr"    => Encoding::EUC_KR,
    "gb2312"   => Encoding::EUC_CN,
    "gbk"      => Encoding::GBK,
    "geostd8"  => nil,
    "greek"    => Encoding::ISO_8859_7,
    "hebrew"   => Encoding::ISO_8859_8,
    "hp8"      => nil,
    "keybcs2"  => nil,
    "koi8r"    => Encoding::KOI8_R,
    "koi8u"    => Encoding::KOI8_U,
    "latin1"   => Encoding::ISO_8859_1,
    "latin2"   => Encoding::ISO_8859_2,
    "latin5"   => Encoding::ISO_8859_9,
    "latin7"   => Encoding::ISO_8859_13,
    "macce"    => Encoding::MacCentEuro,
    "macroman" => Encoding::MacRoman,
    "sjis"     => Encoding::SHIFT_JIS,
    "swe7"     => nil,
    "tis620"   => Encoding::TIS_620,
    "ucs2"     => Encoding::UTF_16BE,
    "ujis"     => Encoding::EUC_JP,
    "utf16"    => Encoding::UTF_16,
    "utf16le"  => Encoding::UTF_16LE,
    "utf32"    => Encoding::UTF_32,
    "utf8"     => Encoding::UTF_8,
    "utf8mb4"  => Encoding::UTF_8,
  }

  def self.ruby_encoding(mysql_charset)
    return nil if mysql_charset.to_s.empty?
    raise "Unsupported charset:#{mysql_charset}." unless CHARSET_ENCODE_RULE.has_key?(mysql_charset)
    charset = CHARSET_ENCODE_RULE[mysql_charset]
    return charset
  end

  def self.flydata_charset(mysql_charset)
    ruby_encoding(mysql_charset).to_s.upcase.gsub('-', '_')
  end

  private

  def self.parse_key(line, columns, type = :primary_key)
    line = /\((?:`.*?`(?:\(.*?\))?(?:,\s*)?)+\)/.match(line)[0]
    keys = line.scan(/`(.*?)`/).collect{|item| item[0]}

    keys.each do |key|
      column = columns.detect {|column|
        column[:column] === key
      }
      raise "Key #{key} must exist in the definition " if column.nil?
      column[type] = true
    end
  end

end

end
end