require 'date' require 'flydata-core/errors' require 'flydata-core/table_def/value_conv' module FlydataCore module TableDef class NameTooLongError < RuntimeError; end class RedshiftTableDef TYPE_MAP_F2R = { 'binary' => {type: 'varchar', use_params: true, default_value: ''}, 'bit' => {type: 'bigint', default_value: '0'}, 'boolean' => {type: 'boolean', default_value: false}, 'varbit' => {type: 'bigint', default_value: '0'}, 'bytea' => {type: 'varchar(max)', default_value: ''}, 'char' => {type: 'char', use_params: true, default_value: ''}, 'date' => {type: 'date', default_value: '0000-01-01'}, 'datetime' => {type: 'timestamp', default_value: '0000-01-01'}, 'datetimetz' => {type: 'timestamp', default_value: '0000-01-01'}, 'enum' => {type: 'varchar encode bytedict', default_value: ''}, 'float4' => {type: 'float4', default_value: '0'}, 'float4 unsigned' => {type: 'float4', default_value: '0'}, 'float8' => {type: 'float8', default_value: '0'}, 'float8 unsigned' => {type: 'float8', default_value: '0'}, 'int1' => {type: 'int2', default_value: '0'}, 'int1 unsigned' => {type: 'int2', unsigned: true, default_value: '0'}, 'int2' => {type: 'int2', default_value: '0'}, 'int2 unsigned' => {type: 'int4', unsigned: true, default_value: '0'}, 'int3' => {type: 'int4', default_value: '0'}, 'int3 unsigned' => {type: 'int4', unsigned: true, default_value: '0'}, 'int4' => {type: 'int4', default_value: '0'}, 'int4 unsigned' => {type: 'int8', unsigned: true, default_value: '0'}, 'int8' => {type: 'int8', default_value: '0'}, 'int8 unsigned' => {type: 'numeric(20,0)', unsigned: true, default_value: '0'}, 'money' => {type: 'numeric', use_params: true, max_size: [38,37], default_value: '0'}, 'numeric' => {type: 'numeric', use_params: true, default_params: [18,8], max_size: [38,37], default_value: '0'}, 'numeric unsigned' => {type: 'numeric', use_params: true, default_params: [18,8], max_size: [38,37], default_value: '0'}, 'set' => {type: 'varchar encode bytedict', default_value: ''}, 'text' => {type: 'varchar(max)', default_value: ''}, 'time' => {type: 'timestamp', default_value: '0000-01-01'}, 'timetz' => {type: 'timestamp', default_value: '0000-01-01'}, 'varbinary' => {type: 'varchar', use_params: true, max_size: 65535, default_value: ''}, 'varchar' => {type: 'varchar', use_params: true, max_size: 65535, default_value: ''}, 'year' => {type: 'date', default_value: '0001-01-01'}, '_unsupported' => {type: 'varchar(max)', default_value: ''}, } def self.from_flydata_tabledef(flydata_tabledef, options = {}) options[:flydata_ctl_table] = true unless options.has_key?(:flydata_ctl_table) schema_name = options[:schema_name] begin tabledef = "" tabledef += create_schema_sql(schema_name) if options[:flydata_ctl_table] && !schema_name.to_s.empty? tabledef += create_flydata_ctl_table_sql(schema_name) if options[:flydata_ctl_table] options[:backup_postfix] = "_flydata#{Time.now.strftime('%Y%m%d%H%M%S')}" tabledef += "BEGIN;\n" tabledef += remove_table_sql(flydata_tabledef, schema_name, options) unless options[:ctl_only] tabledef += create_table_sql(flydata_tabledef, schema_name, options) unless options[:ctl_only] tabledef += comment_sql(flydata_tabledef, schema_name) unless options[:ctl_only] tabledef += flydata_ctl_sql(flydata_tabledef, schema_name) tabledef += "COMMIT;\n" tabledef += drop_backup_table_sql(flydata_tabledef, schema_name, options) unless options[:ctl_only] rescue => e # Catch errors from generating schema. Generally an unsupported data type raise TableDefError, {error: "Errors generating schema. Please contact us for further instructions", table: flydata_tabledef[:table_name]} end end FLYDATA_CTL_COLUMNS_TABLE = "flydata_ctl_columns" FLYDATA_CTL_TABLES_TABLE = "flydata_ctl_tables" CREATE_SCHEMA_SQL = < ? @ \\[ \\\\ \\] ^ { \\| } ~ `" def self.convert_to_valid_name(key) match = /[^#{self::VALID_IDENTIFIER_CHARACTERS.join}]/ name = key.downcase.gsub(match, '_') do_convert = yield name if block_given? name = "_#{name}" if do_convert if name.length > MAX_COLUMNNAME_LENGTH raise NameTooLongError, "Converted name is too long. size: #{name.size} name: #{name}" end name end REDSHIFT_RESERVED_COLUMNS = %w[oid tableoid xmin cmin xmax cmax ctid deletexid insertxid] REDSHIFT_RESERVED_COLUMNS_HASH = REDSHIFT_RESERVED_COLUMNS.inject({}) {|h, word| h[word.to_sym] = true; h} def self.redshift_reserved_column_name?(name) return false unless name REDSHIFT_RESERVED_COLUMNS_HASH[name.to_sym] == true end def self.escape(text) text.gsub(/(['\\])/, "\\\\\\1") end def self.check_and_replace_max(params, max_size_a, flydata_type) final_params = [] case flydata_type when 'numeric', 'numeric unsigned' p = params.split(",") raise "Wrong number of params for #{flydata_type} type." if p.length != max_size_a.length above_decimal = p[0].to_i - p[1].to_i below_decimal = p[1].to_i # use max precision as the max number of above-dicimal digits max_above_decimal = max_size_a[0] final_above_decimal = above_decimal > max_above_decimal ? max_above_decimal : above_decimal max_below_decimal = max_size_a[0] - final_above_decimal max_below_decimal = max_size_a[1] if max_below_decimal > max_size_a[1] final_below_decimal = below_decimal > max_below_decimal ? max_below_decimal : below_decimal final_params << (final_above_decimal + final_below_decimal) << final_below_decimal else params.split(",").each_with_index do |param, i| final_params << (/\d+/.match(param) && max_size_a[i] && param.to_i > max_size_a[i].to_i ? max_size_a[i] : param) end end final_params.join(",") end APACHE_TIMESTAMP_REGEXP = Regexp.new('^(?\[[0-3]\d\/\D{3}\/[1-2]\d{3}:[0-2]\d:[0-5]\d:[0-5]\d ?[\+\-]\d{2}:?\d{2}\])$') TIME_REGEXP = Regexp.new('^(?-)?(?\d{2,3}):(?[0-5][0-9]):(?[0-5][0-9](\.\d+)?)\s*(?[+-][0-2][0-9](:?\d{2})?)?$') def self.parse_timestamp(value) value_str = value.to_s return nil if value_str.empty? if value.kind_of?(Integer) or /^\d+$/ === value_str # Unix epoch in UTC t = DateTime.strptime(value_str, '%s') elsif APACHE_TIMESTAMP_REGEXP.match(value_str) # apache time format t = DateTime.strptime(value, "[%d/%b/%Y:%H:%M:%S %Z]") elsif time_match = TIME_REGEXP.match(value_str) if time_match[:tz] # let DateTime.parse handle timezone t = DateTime.parse("0001-01-01 #{value_str}") else # use own converter which takes care of negative hours t = convert_time_into_timestamp(time_match) end elsif /^(\d+)\.(\d+)$/ === value_str # epoch with fraction epoch = $1 fraction = $2 t = DateTime.strptime(epoch, '%s') t = DateTime.parse(t.strftime("%Y-%m-%dT%H:%M:%S.#{fraction}")) else t = DateTime.parse(convert_date(value_str)) end t = t.new_offset(0) # Redshift Plug-in uses UTC t.strftime('%Y-%m-%d %H:%M:%S.%6N') rescue ArgumentError => ae # '0000-00-00 00:00:00' is valid for mysql datetime column if value_str.start_with?('0000-00-00 00:00:00') return '0001-01-01 00:00:00.000000' else raise ae end end STANDARD_DATETIME = DateTime.new(1, 1, 1) def self.convert_time_into_timestamp(time_match) sign = time_match[:sign] ? -1 : 1 STANDARD_DATETIME.dup + sign * Rational(time_match[:hour].to_i, 24) + sign * Rational(time_match[:minute].to_i, 1440) + sign * Rational(time_match[:second].to_f, 86400) end def self.parse_date(value) return nil if value.nil? value_str = value.to_s dt = Date.parse(convert_date(value_str)) dt.strftime('%Y-%m-%d') rescue ArgumentError => ae # '0000-00-00' is valid for mysql date column return '0001-01-01' if value_str == '0000-00-00' raise ae end CONVERT_ZERO_DATE_REGEXP = /^0000|-00/ CONVERT_ZERO_DATE_HASH_TABLE = {"0000" => "0001", "-00" => "-01" } def self.convert_date(value) value = convert_year_into_date(value) # Convert any 00 value to 01 value.gsub(CONVERT_ZERO_DATE_REGEXP, CONVERT_ZERO_DATE_HASH_TABLE) end def self.convert_year_into_date(value) return value unless /^\d+$/.match(value) # Only positive integers are allowed for year type val_i = value.to_i if val_i == 0 # '0' or '0000' '0001-01-01' # '0001-01-01' is the "base" date elsif value.length == 4 "#{value}-01-01" elsif value.length == 2 "#{val_i >= 70 ? "19" : "20"}#{value}-01-01" else value # Return the value as is end end end end end