require 'flydata-core/table_def/base' require 'flydata-core/string_utils' module FlydataCore module TableDef class MysqlTableDef < Base # Check and set the varchar(char) size which is converted from # length to byte size. # On Mysql the record size of varchar(char) is a length of characters. # ex) varchar(6) on mysql -> varchar(18) on flydata PROC_override_varchar = ->(type, mysql_type, flydata_type) do return type unless %w(char varchar).include?(mysql_type) if type =~ /\((\s*\d+\s*)\)/ # expect 3 byte UTF-8 character "#{flydata_type}(#{$1.to_i * 3})" else raise "Invalid varchar type. It must be a bug... type:#{type}" end end PROC_override_varbinary = ->(type, mysql_type, flydata_type) do return type unless %w(binary varbinary).include?(mysql_type) if type =~ /\((\s*\d+\s*)\)/ # expect 2 bytes for each original byte + 2 bytes for the prefix # ex) 4E5DFF => "0x4e5dff" "#{flydata_type}(#{$1.to_i * 2 + 2})" else raise "Invalid varbinary type. It must be a bug... type:#{type}" end end TYPE_MAP_M2F = { 'bigint' => {type: 'int8', def_width: '20'}, 'binary' => {type: 'binary', def_width: '1', override: PROC_override_varbinary}, 'bit' => {type: 'bit', def_width: '1'}, 'blob' => {type: 'varbinary(65535)'}, 'bool' => {type: 'int1', def_width: '1'}, 'boolean' => {type: 'int1', def_width: '1'}, 'char' => {type: 'varchar', def_width: '1', override: PROC_override_varchar}, 'date' => {type: 'date'}, 'datetime' => {type: 'datetime'}, 'dec' => {type: 'numeric', def_width: '10,0'}, 'decimal' => {type: 'numeric', def_width: '10,0'}, 'double' => {type: 'float8'}, 'double precision' => {type: 'float8'}, 'enum' => {type: 'enum'}, 'fixed' => {type: 'numeric', def_width: '10,0'}, 'float' => {type: 'float4'}, 'int' => {type: 'int4', def_width: '11'}, 'integer' => {type: 'int4', def_width: '11'}, 'longblob' => {type: 'varbinary(4294967295)'}, 'longtext' => {type: 'text'}, 'mediumblob' => {type: 'varbinary(16777215)'}, 'mediumint' => {type: 'int3', def_width: '9'}, 'mediumtext' => {type: 'text'}, 'numeric' => {type: 'numeric', def_width: '10,0'}, 'set' => {type: 'set'}, 'smallint' => {type: 'int2', def_width: '6'}, 'text' => {type: 'text'}, 'time' => {type: 'time'}, 'timestamp' => {type: 'datetime'}, 'tinyblob' => {type: 'varbinary(255)'}, 'tinyint' => {type: 'int1', def_width: '4'}, 'tinytext' => {type: 'text'}, 'varbinary' => {type: 'varbinary', override: PROC_override_varbinary}, 'varchar' => {type: 'varchar', override: PROC_override_varchar}, 'year' => {type: 'year'}, } def self.convert_to_flydata_type(type) TYPE_MAP_M2F.each do |mysql_type, type_hash| flydata_type = type_hash[:type] if /^#{mysql_type}\(|^#{mysql_type}$/.match(type) ret_type = type.gsub(/^#{mysql_type}/, flydata_type) if type_hash.has_key?(:def_width) && !/\(.+\)/.match(ret_type) ret_type += "(#{type_hash[:def_width]})" end if type_hash[:override] ret_type = type_hash[:override].call(ret_type, mysql_type, flydata_type) end return ret_type end end nil end def self._create(io, options) table_def = '' table_name = nil columns = [] column_def = {} unique_keys = [] default_charset = nil default_source_charset = nil comment = nil position = :before_create_table io.each_line do |line| line = FlydataCore::StringUtils.replace_invalid_utf8_char(line) case position when :before_create_table if line =~ /CREATE TABLE `(.*?)`/ position = :in_create_table table_name = $1 table_def += line.chomp next end when :in_create_table table_def += line.chomp stripped_line = line.strip # `col_smallint` smallint(6) DEFAULT NULL, if stripped_line.start_with?('`') column = parse_one_column_def(line) columns << column coldef = line.strip.gsub(/,$/, '') column_def[column[:column]] = coldef # PRIMARY KEY (`id`) elsif stripped_line.start_with?("PRIMARY KEY") parse_key(line, columns) #) ENGINE=InnoDB DEFAULT CHARSET=utf8 COMMENT='test table'; elsif stripped_line.start_with?(')') if line =~ /DEFAULT CHARSET\s*=\s*([a-z0-9]+)/ default_source_charset = $1 default_charset = flydata_charset(default_source_charset) end comment = $1 if /COMMENT='((?:\\'|[^'])*)'/.match(line) position = :after_create_table elsif stripped_line.start_with?("KEY") # index creation. No action required. elsif stripped_line.start_with?("CONSTRAINT") # constraint definition. No acction required. elsif stripped_line.start_with?("UNIQUE KEY") unique_key, include_null_column = parse_key(line, columns, :unique) unique_keys << unique_key unless include_null_column else # "Unknown table definition. Skip. (#{line})" end when :after_create_table break end end position == :after_create_table ? [table_def, table_name, columns, column_def, unique_keys, default_charset, default_source_charset, comment] : nil end # Replaces a MySQL string with an empty string ('') def self.strip_string(line) line.gsub(/'(\\'|''|[^'])*'/, "''") end def self.parse_one_column_def(query) line = query.strip line = line[0..-2] if line.end_with?(',') pos = 0 cond = :column_name column = {} while pos < line.length case cond when :column_name #`column_name` ... pos = line[0] == '`' ? line.index('`', 1) + 1 : line.index(' ', 1) column[:column] = if line[0] == '`' line[1..pos-2] else line[0..pos-1] end cond = :column_type pos += 1 when :column_type #... formattype(,,,) ... pos += 1 until line[pos] != ' ' start_pos = pos pos += 1 until line[pos].nil? || line[pos] =~ /\s|\(/ # meta if line[pos] == '(' #TODO: implement better parser pos = line.index(')', pos) pos += 1 end # type type = line[start_pos..pos-1] column[:type] = convert_to_flydata_type(type) cond = :options when :options strip_line = strip_string(line) column[:type] += ' unsigned' if strip_line =~ /unsigned/i column[:auto_increment] = true if strip_line =~ /AUTO_INCREMENT/i column[:not_null] = true if strip_line =~ /NOT NULL/i column[:unique] = true if strip_line =~ /UNIQUE/i if /DEFAULT\s+((?:[bx]?'(?:\\'|[^'])*')|(?:[^'\s]+\b))/i.match(line) val = $1 column[:default] = val == "NULL" ? nil : val end if /COMMENT\s+'(((?:\\'|[^'])*))'/i.match(line) column[:comment] = $1 end if /CHARACTER SET\s+([a-z0-9]+)/i.match(line) column[:charset] = flydata_charset($1) end if block_given? column = yield(column, query, pos) end break else raise "Invalid condition. It must be a bug..." end end column end # 'binary' is a fixed length column type. mysqldump exports a binary column # value at its full length by adding trailing 0's to the size of the column # (ex. '0xAB00920000' for type 'binary(5)') # FlyData Transport Format does not allow such paddings so they need to be # removed. On the target database, padding may be added according to the # destination DB's convention. MYSQL_BINARY_PADDING_CHAR = '00' PROC_convert_binary_value = -> (value) do return nil unless value while value.end_with?(MYSQL_BINARY_PADDING_CHAR) value = value[0...-MYSQL_BINARY_PADDING_CHAR.size] end value end VALUE_CONVERTERS = { 'binary' => PROC_convert_binary_value, } def self.convert_to_flydata_value(value, type) if converter = VALUE_CONVERTERS[type] value = converter.call(value) end value end # Charset naming conversion rule. mysql => ruby # # mysql # http://dev.mysql.com/doc/refman/5.6/en/charset-charsets.html # mysql(supported CJK character sets) # http://dev.mysql.com/doc/refman/5.6/en/faqs-cjk.html#qandaitem-A-11-1-1 # For ruby, you can see encoding list with "Encoding.list" CHARSET_ENCODE_RULE = { "armscii8" => nil, "ascii" => Encoding::US_ASCII, "big5" => Encoding::Big5, "binary" => Encoding::ASCII_8BIT, "cp1250" => Encoding::Windows_1250, "cp1251" => Encoding::Windows_1251, "cp1256" => Encoding::Windows_1256, "cp1257" => Encoding::Windows_1257, "cp850" => Encoding::CP850, "cp852" => Encoding::CP852, "cp866" => Encoding::IBM866, "cp932" => Encoding::Windows_31J, "dec8" => nil, "eucjpms" => Encoding::EucJP_ms, "euckr" => Encoding::EUC_KR, "gb2312" => Encoding::EUC_CN, "gbk" => Encoding::GBK, "geostd8" => nil, "greek" => Encoding::ISO_8859_7, "hebrew" => Encoding::ISO_8859_8, "hp8" => nil, "keybcs2" => nil, "koi8r" => Encoding::KOI8_R, "koi8u" => Encoding::KOI8_U, "latin1" => Encoding::ISO_8859_1, "latin2" => Encoding::ISO_8859_2, "latin5" => Encoding::ISO_8859_9, "latin7" => Encoding::ISO_8859_13, "macce" => Encoding::MacCentEuro, "macroman" => Encoding::MacRoman, "sjis" => Encoding::SHIFT_JIS, "swe7" => nil, "tis620" => Encoding::TIS_620, "ucs2" => Encoding::UTF_16BE, "ujis" => Encoding::EUC_JP, "utf16" => Encoding::UTF_16, "utf16le" => Encoding::UTF_16LE, "utf32" => Encoding::UTF_32, "utf8" => Encoding::UTF_8, "utf8mb4" => Encoding::UTF_8, } def self.ruby_encoding(mysql_charset) return nil if mysql_charset.to_s.empty? raise "Unsupported charset:#{mysql_charset}." unless CHARSET_ENCODE_RULE.has_key?(mysql_charset) charset = CHARSET_ENCODE_RULE[mysql_charset] return charset end def self.flydata_charset(mysql_charset) ruby_encoding(mysql_charset).to_s.upcase.gsub('-', '_') end private def self.parse_key(line, columns, type = :primary_key) line = /\((?:`.*?`(?:\(.*?\))?(?:,\s*)?)+\)/.match(line)[0] keys = line.scan(/`(.*?)`/).collect{|item| item[0]} include_null_column = false keys.each do |key| column = columns.detect {|column| column[:column] === key } raise "Key #{key} must exist in the definition " if column.nil? column[type] = true include_null_column = true unless column[:not_null] end [keys, include_null_column] end end end end