lib/csvsql/db.rb in csvsql-0.1.0 vs lib/csvsql/db.rb in csvsql-0.1.1

- old
+ new

@@ -1,147 +1,150 @@ # frozen_string_literal: true require 'digest' -module Csvsql - class Db - BATCH_LINES = 10000 - CACHE_DIR = File.join(Dir.home, '.csvsql_cache') +class Csvsql::Db + BATCH_ROWS = 10000 + CACHE_DIR = File.join(Dir.home, '.csvsql_cache') + FileUtils.mkdir_p(CACHE_DIR) unless Dir.exists?(CACHE_DIR) - FileUtils.mkdir_p(CACHE_DIR) unless Dir.exists?(CACHE_DIR) + attr_reader :use_cache, :csv_path, :csv_io, :db, :batch_rows - attr_reader :use_cache, :csv_path, :csv_io, :db + def self.clear_cache! + require 'fileutils' + FileUtils.rm_f(Dir.glob(File.join(CACHE_DIR, '*'))) + end - def self.clear_cache! - require 'fileutils' - FileUtils.rm_f(Dir.glob(File.join(CACHE_DIR, '*'))) - end + def initialize(use_cache: false, batch_rows: nil) + @db = nil + @csv_io = nil + @csv_path = nil + @use_cache = use_cache + @batch_rows = batch_rows || BATCH_ROWS + end - def initialize(use_cache: false) - @db = nil - @csv_path = nil - @use_cache = use_cache - end + # action: + # raise: default + # exit + def sql_error_action=(action) + @sql_error_action = action.to_sym + end - # action: - # raise: default - # exit - def sql_error_action=(action) - @sql_error_action = action.to_sym - end + def execute(sql) + db.execute(sql) + rescue SQLite3::SQLException => e + process_sql_error(sql, e) + end - def execute(sql) - db.execute(sql) - rescue SQLite3::SQLException => e - process_sql_error(sql, e) - end + def prepare(sql) + db.prepare(sql) + rescue SQLite3::SQLException => e + process_sql_error(sql, e) + end - def prepare(sql) - db.prepare(sql) - rescue SQLite3::SQLException => e - process_sql_error(sql, e) + def import(csv_data_or_path) + case csv_data_or_path + when StringIO, IO + @csv_io = csv_data_or_path + else + @csv_path = csv_data_or_path end + @db = SQLite3::Database.new(get_db_path(@csv_path)) - def import(csv_data_or_path) - case csv_data_or_path - when StringIO, IO - @csv_io = csv_data_or_path - else - @csv_path = csv_data_or_path - end - @db = SQLite3::Database.new(get_db_path(@csv_path)) - - tables = db.execute("SELECT name FROM sqlite_master WHERE type='table';").first - unless tables && tables.include?('csv') - init_db_by_csv(@csv_io ? CSV.new(@csv_io) : CSV.open(@csv_path)) - end - true + tables = db.execute("SELECT name FROM sqlite_master WHERE type='table';").flatten + unless tables.include?('csv') + init_db_by_csv(@csv_io ? CSV.new(@csv_io) : CSV.open(@csv_path)) end + true + end - private + private - def parser_header(csv_header) - csv_header.map do |col, r| - name, type = col.strip.split(':') - [name, (type || 'varchar(255)').downcase.to_sym] - end + def parser_header(csv_header) + csv_header.map do |col, r| + name, type = col.strip.split(':') + [name, (type || 'varchar(255)').downcase.to_sym] end + end - def init_db_by_csv(csv) - header = parser_header(csv.readline) + def init_db_by_csv(csv) + header = parser_header(csv.readline) - cols = header.map { |name, type| "#{name} #{type}" }.join(', ') - sql = "CREATE TABLE csv (#{cols});" - execute sql + cols = header.map { |name, type| "#{name} #{type}" }.join(', ') + sql = "CREATE TABLE csv (#{cols});" + execute sql - cache = [] - col_names = header.map(&:first) - csv.each do |line| - if cache.length > BATCH_LINES then - import_lines(cache, col_names) - cache.clear - else - cache << line.each_with_index.map { |v, i| format_sql_val(v, header[i][1]) } - end + cache = [] + col_names = header.map(&:first) + Csvsql::Tracker.commit(:import_csv) + csv.each do |line| + cache << line.each_with_index.map { |v, i| format_sql_val(v, header[i][1]) } + + if cache.length >= batch_rows then + import_lines(cache, col_names) + cache.clear end - import_lines(cache, col_names) unless cache.empty? - db end + import_lines(cache, col_names) unless cache.empty? + Csvsql::Tracker.commit(:import_csv) + db + end - def import_lines(lines, col_names) - sql = "INSERT INTO csv (#{col_names.join(', ')}) VALUES " - values = lines.map { |line| "(#{line.join(',')})" }.join(', ') - execute sql + values + def import_lines(lines, col_names) + sql = Csvsql::Tracker.commit(:generate_import_sql) do + s = "INSERT INTO csv (#{col_names.join(', ')}) VALUES " + s += lines.map { |line| "(#{line.join(',')})" }.join(', ') end + Csvsql::Tracker.commit(:execute_import_sql) { execute sql } + end - def format_sql_val(val, type) - case type - when :int, :integer then val.to_i - when :float, :double then val.to_f - when :date then "'#{Date.parse(val).to_s}'" - when :datetime then "'#{Time.parse(val).strftime('%F %T')}'" - else - "'#{val.gsub("'", "''")}'" - end - rescue => e - process_sql_error("Parse val: #{val}", e) + def format_sql_val(val, type) + case type + when :int, :integer then val.to_i + when :float, :double then val.to_f + when :date then "'#{Date.parse(val).to_s}'" + when :datetime then "'#{Time.parse(val).strftime('%F %T')}'" + else + "'#{val.gsub("'", "''")}'" end + rescue => e + process_sql_error("Parse val: #{val}", e) + end - def process_sql_error(sql, err) - $stderr.puts(sql) + def process_sql_error(sql, err) + $stderr.puts(sql) - if @error_action == :exit - $stderr.puts(e.message) - exit - else - raise err - end + if @error_action == :exit + $stderr.puts(e.message) + exit + else + raise err end + end - def get_db_path(csv_path) - csv_path = csv_path || '' - return '' unless File.exist?(csv_path) + def get_db_path(csv_path) + csv_path = csv_path || '' + return '' unless File.exist?(csv_path) - if use_cache - stat = File.stat(csv_path) - filename = Digest::SHA2.hexdigest(File.absolute_path(csv_path)) + '.cache' - file_stat = [File.absolute_path(csv_path), stat.size, stat.ctime].join("\n") - stat_path = File.join(CACHE_DIR, filename.gsub(/\.cache$/, '.stat')) - cache_path = File.join(CACHE_DIR, filename) + if use_cache + stat = File.stat(csv_path) + filename = Digest::SHA2.hexdigest(File.absolute_path(csv_path)) + '.cache' + file_stat = [File.absolute_path(csv_path), stat.size, stat.ctime].join("\n") + stat_path = File.join(CACHE_DIR, filename.gsub(/\.cache$/, '.stat')) + cache_path = File.join(CACHE_DIR, filename) - if File.exist?(stat_path) - if File.read(stat_path) == file_stat - cache_path - else - FileUtils.rm(cache_path) - cache_path - end + if File.exist?(stat_path) + if File.read(stat_path) == file_stat + cache_path else - File.write(stat_path, file_stat) + FileUtils.rm(cache_path) cache_path end else - '' + File.write(stat_path, file_stat) + cache_path end + else + '' end end end