module PostgresUpsert class Writer def initialize(klass, source, options = {}) @klass = klass @options = options.reverse_merge({ :delimiter => ",", :format => :csv, :header => true, :key_column => @klass.primary_key, :update_only => false}) @source = source.instance_of?(String) ? File.open(source, 'r') : source @columns_list = get_columns generate_temp_table_name end def write import org.postgresql.copy.CopyManager if @columns_list.empty? raise "Either the :columns option or :header => true are required" end csv_options = @options[:format] == :binary ? "BINARY" : "DELIMITER '#{@options[:delimiter]}' CSV" copy_table = @temp_table_name columns_string = columns_string_for_copy ActiveRecord::Base.connection_pool.with_connection do |conn| create_temp_table(conn) copy_manager = CopyManager.new(conn.raw_connection.connection) stream = copy_manager.copy_in("COPY #{copy_table} #{columns_string} FROM STDIN WITH #{csv_options}") while line = read_input_line do next if line.strip.size == 0 line = line.to_java_bytes stream.write_to_copy(line, 0, line.length) end stream.end_copy upsert_from_temp_table(conn) drop_temp_table(conn) end end private def get_columns columns_list = @options[:columns] || [] if @options[:format] != :binary && @options[:header] #if header is present, we need to strip it from io, whether we use it for the columns list or not. line = @source.gets if columns_list.empty? columns_list = line.strip.split(@options[:delimiter]) end end columns_list = columns_list.map{|c| @options[:map][c.to_s] } if @options[:map] return columns_list end def columns_string_for_copy str = get_columns_string str.empty? ? str : "(#{str})" end def columns_string_for_select columns = @columns_list.clone columns << "created_at" if @klass.column_names.include?("created_at") columns << "updated_at" if @klass.column_names.include?("updated_at") str = get_columns_string(columns) end def columns_string_for_insert columns = @columns_list.clone columns << "created_at" if @klass.column_names.include?("created_at") columns << "updated_at" if @klass.column_names.include?("updated_at") str = get_columns_string(columns) end def select_string_for_insert columns = @columns_list.clone str = get_columns_string(columns) str << ",'#{DateTime.now.utc}'" if @klass.column_names.include?("created_at") str << ",'#{DateTime.now.utc}'" if @klass.column_names.include?("updated_at") str end def select_string_for_create columns = @columns_list.map(&:to_sym) columns << @options[:key_column].to_sym unless columns.include?(@options[:key_column].to_sym) get_columns_string(columns) end def get_columns_string(columns = nil) columns ||= @columns_list columns.size > 0 ? "\"#{columns.join('","')}\"" : "" end def get_table_name if @options[:table] connection.quote_table_name(@options[:table]) else @klass.quoted_table_name end end def generate_temp_table_name @temp_table_name = "#{@klass.table_name}_temp_#{rand(1000)}" end def read_input_line if @options[:format] == :binary begin return @source.readpartial(10240) rescue EOFError end else line = @source.gets return line end end def upsert_from_temp_table(conn) update_from_temp_table(conn) insert_from_temp_table(conn) unless @options[:update_only] end def update_from_temp_table(conn) conn.execute <<-SQL UPDATE #{get_table_name} AS d #{update_set_clause} FROM #{@temp_table_name} as t WHERE t.#{@options[:key_column]} = d.#{@options[:key_column]} AND d.#{@options[:key_column]} IS NOT NULL; SQL end def update_set_clause command = @columns_list.map do |col| "\"#{col}\" = t.\"#{col}\"" end command << "\"updated_at\" = '#{DateTime.now.utc}'" if @klass.column_names.include?("updated_at") "SET #{command.join(',')}" end def insert_from_temp_table(conn) columns_string = columns_string_for_insert select_string = select_string_for_insert conn.execute <<-SQL INSERT INTO #{get_table_name} (#{columns_string}) SELECT #{select_string} FROM #{@temp_table_name} as t WHERE NOT EXISTS (SELECT 1 FROM #{get_table_name} as d WHERE d.#{@options[:key_column]} = t.#{@options[:key_column]}) AND t.#{@options[:key_column]} IS NOT NULL; SQL end def create_temp_table(conn) columns_string = select_string_for_create conn.execute <<-SQL SET client_min_messages=WARNING; DROP TABLE IF EXISTS #{@temp_table_name}; CREATE TEMP TABLE #{@temp_table_name} AS SELECT #{columns_string} FROM #{get_table_name} WHERE 0 = 1; SQL end def drop_temp_table(conn) conn.execute <<-SQL DROP TABLE #{@temp_table_name} SQL end end end