require 'date' require 'flydata-core/errors' module FlydataCore module TableDef class NameTooLongError < RuntimeError; end class RedshiftTableDef TYPE_MAP_F2R = { 'binary' => {type: 'varchar', use_params: true, default_value: ''}, 'bit' => {type: 'bigint', default_value: '0'}, 'char' => {type: 'char', use_params: true, default_value: ''}, 'date' => {type: 'date', default_value: '0000-01-01'}, 'datetime' => {type: 'timestamp', default_value: '0000-01-01'}, 'enum' => {type: 'varchar encode bytedict', default_value: ''}, 'float4' => {type: 'float4', default_value: '0'}, 'float4 unsigned' => {type: 'float4', default_value: '0'}, 'float8' => {type: 'float8', default_value: '0'}, 'float8 unsigned' => {type: 'float8', default_value: '0'}, 'int1' => {type: 'int2', default_value: '0'}, 'int1 unsigned' => {type: 'int2', unsigned: true, default_value: '0'}, 'int2' => {type: 'int2', default_value: '0'}, 'int2 unsigned' => {type: 'int4', unsigned: true, default_value: '0'}, 'int3' => {type: 'int4', default_value: '0'}, 'int3 unsigned' => {type: 'int4', unsigned: true, default_value: '0'}, 'int4' => {type: 'int4', default_value: '0'}, 'int4 unsigned' => {type: 'int8', unsigned: true, default_value: '0'}, 'int8' => {type: 'int8', default_value: '0'}, 'int8 unsigned' => {type: 'numeric(20,0)', unsigned: true, default_value: '0'}, 'numeric' => {type: 'numeric', use_params: true, max_size: [38,37], default_value: '0'}, 'numeric unsigned' => {type: 'numeric', use_params: true, max_size: [38,37], default_value: '0'}, 'set' => {type: 'varchar encode bytedict', default_value: ''}, 'text' => {type: 'varchar(max)', default_value: ''}, 'time' => {type: 'timestamp', default_value: '0000-01-01'}, 'varbinary' => {type: 'varchar', use_params: true, max_size: 65535, default_value: ''}, 'varchar' => {type: 'varchar', use_params: true, max_size: 65535, default_value: ''}, 'year' => {type: 'date', default_value: '0001-01-01'}, } def self.from_flydata_tabledef(flydata_tabledef, options = {}) options[:flydata_ctl_table] = true unless options.has_key?(:flydata_ctl_table) schema_name = options[:schema_name] begin tabledef = "" tabledef += create_schema_sql(schema_name) if options[:flydata_ctl_table] && !schema_name.to_s.empty? tabledef += create_flydata_ctl_table_sql(schema_name) if options[:flydata_ctl_table] tabledef += create_table_sql(flydata_tabledef, schema_name) unless options[:ctl_only] tabledef += comment_sql(flydata_tabledef, schema_name) unless options[:ctl_only] tabledef += flydata_ctl_sql(flydata_tabledef, schema_name) rescue => e # Catch errors from generating schema. Generally an unsupported data type raise TableDefError, {error: "errors generating schema. Please contact us for further instructions", table: flydata_tabledef[:table_name]} end end FLYDATA_CTL_COLUMNS_TABLE = "flydata_ctl_columns" FLYDATA_CTL_TABLES_TABLE = "flydata_ctl_tables" CREATE_SCHEMA_SQL = < ? @ \\[ \\\\ \\] ^ { \\| } ~ `" def self.convert_to_valid_name(key) match = /[^#{self::VALID_IDENTIFIER_CHARACTERS.join}]/ name = key.downcase.gsub(match, '_') do_convert = yield name if block_given? name = "_#{name}" if do_convert if name.length > MAX_COLUMNNAME_LENGTH raise NameTooLongError, "Converted name is too long. size: #{name.size} name: #{name}" end name end REDSHIFT_RESERVED_COLUMNS = %w[oid tableoid xmin cmin xmax cmax ctid deletexid insertxid] REDSHIFT_RESERVED_COLUMNS_HASH = REDSHIFT_RESERVED_COLUMNS.inject({}) {|h, word| h[word.to_sym] = true; h} def self.redshift_reserved_column_name?(name) return false unless name REDSHIFT_RESERVED_COLUMNS_HASH[name.to_sym] == true end def self.escape(text) text.gsub(/(['\\])/, "\\\\\\1") end def self.check_and_replace_max(params, max_size_a) final_params = [] params.split(",").each_with_index do |param, i| final_params << (/\d+/.match(param) && max_size_a[i] && param.to_i > max_size_a[i].to_i ? max_size_a[i] : param) end final_params.join(",") end APACHE_TIMESTAMP_REGEXP = Regexp.new('^(?\[[0-3]\d\/\D{3}\/[1-2]\d{3}:[0-2]\d:[0-5]\d:[0-5]\d ?[\+\-]\d{2}:?\d{2}\])$') TIME_REGEXP = Regexp.new('^(?-)?(?\d{2,3}):(?[0-5][0-9]):(?[0-5][0-9](\.\d+)?)$') def self.parse_timestamp(value) value_str = value.to_s return nil if value_str.empty? if value.kind_of?(Integer) or /^\d+$/ === value_str # Unix epoch in UTC t = DateTime.strptime(value_str, '%s') elsif APACHE_TIMESTAMP_REGEXP.match(value_str) # apache time format t = DateTime.strptime(value, "[%d/%b/%Y:%H:%M:%S %Z]") elsif time_match = TIME_REGEXP.match(value_str) t = convert_time_into_timestamp(time_match) else t = DateTime.parse(convert_date(value_str)) end t = t.new_offset(0) # Redshift Plug-in uses UTC t.strftime('%Y-%m-%d %H:%M:%S.%6N') rescue ArgumentError => ae # '0000-00-00 00:00:00' is valid for mysql datetime column if value_str.start_with?('0000-00-00 00:00:00') return '0001-01-01 00:00:00.000000' else raise ae end end STANDARD_DATETIME = DateTime.new(1, 1, 1) def self.convert_time_into_timestamp(time_match) sign = time_match[:sign] ? -1 : 1 STANDARD_DATETIME.dup + sign * Rational(time_match[:hour].to_i, 24) + sign * Rational(time_match[:minute].to_i, 1440) + sign * Rational(time_match[:second].to_f, 86400) end def self.parse_date(value) return nil if value.nil? value_str = value.to_s dt = Date.parse(convert_date(value_str)) dt.strftime('%Y-%m-%d') rescue ArgumentError => ae # '0000-00-00' is valid for mysql date column return '0001-01-01' if value_str == '0000-00-00' raise ae end CONVERT_ZERO_DATE_REGEXP = /^0000|-00/ CONVERT_ZERO_DATE_HASH_TABLE = {"0000" => "0001", "-00" => "-01" } def self.convert_date(value) value = convert_year_into_date(value) # Convert any 00 value to 01 value.gsub(CONVERT_ZERO_DATE_REGEXP, CONVERT_ZERO_DATE_HASH_TABLE) end def self.convert_year_into_date(value) return value unless /^\d+$/.match(value) # Only positive integers are allowed for year type val_i = value.to_i if val_i == 0 # '0' or '0000' '0001-01-01' # '0001-01-01' is the "base" date elsif value.length == 4 "#{value}-01-01" elsif value.length == 2 "#{val_i >= 70 ? "19" : "20"}#{value}-01-01" else value # Return the value as is end end end end end