#!/usr/bin/env ruby require 'rubygems' require 'optparse' require 'io/console' require 'csv' require 'open3' require_relative '../lib/muzzy' Version = Muzzy::VERSION ## check environments kakasi_path = File.absolute_path(File.expand_path('..', __FILE__) + "/../vendor/bin/kakasi") unless File.exists?(kakasi_path) puts "cannot find kakasi. please check kakasi installation is successed." puts "expect path: #{kakasi_path}" puts "If you have not execute `muzzy_setup` yet, please run `muzzy_setup` first." exit 1 end options = { file: '', verbose: false, mysql_config: { user: 'root', host: 'localhost', use_password: false, database: 'muzzy', }, mysqlimport_config: { delete: false }, } mysql_cmd = `which mysql`.to_s.chomp if mysql_cmd == '' puts "cannot find mysql client" exit 1 end mysqlimport_cmd = `which mysqlimport`.to_s.chomp if mysqlimport_cmd == '' puts "cannot find mysqlimport command" exit 1 end def check_file(file) if File.directory?(file) puts "muzzy: #{file}: is directory" exit Errno::ENOENT::Errno end unless File.exists?(file) puts "muzzy: #{file}: No such file or directory" exit Errno::ENOENT::Errno end end ARGV.options do |opt| opt.banner = "Usage: muzzy [filepath] [options]" begin opt.on('-u', "--user [USER]", String, 'mysql user') {|user| options[:mysql_config][:user] = user } opt.on('-h [HOST]', 'mysql host') {|v| options[:mysql_config][:host] = v } opt.on('-p', 'using mysql password') {|v| options[:mysql_config][:use_password] = v } opt.on('-V', '--verbose', 'verbose option') { options[:verbose] = true } opt.on('-r', 'remove data and insert it') { options[:mysqlimport_config][:delete] = true } if ARGV[0].to_s.length > 0 && ARGV[0].to_s[0] != '-' check_file(ARGV[0]) options[:file] = ARGV[0] else opt.on('-f', '--file [FILEPATH]', 'path to target file') {|v| check_file(v) options[:file] = v } end opt.on('-v', '--version') { puts opt.ver exit } opt.parse! if options[:file].nil? || options[:file] == '' puts opt.help exit 1 end rescue => e $stderr.puts("[ERROR] #{e.message}") exit 1 end end if options[:mysql_config][:database] !~ /\A(\w)+\z/ puts "illegal database" exit 1 end # confirm database mysql_cmd_list = [mysql_cmd, '-u', options[:mysql_config][:user], '-h', options[:mysql_config][:host]] if options[:mysql_config][:use_password] mysql_cmd_list.push('-p') end confirm_database_cmd_list = mysql_cmd_list + ['-e', '"' + "SHOW DATABASES LIKE '#{options[:mysql_config][:database]}'" + '"'] if options[:verbose] puts confirm_database_cmd_list.join(' ') end confirm_database_res = `#{confirm_database_cmd_list.join(' ')}`.chomp if confirm_database_res == '' puts "creating database #{options[:mysql_config][:database]}" system(*mysql_cmd_list, '-e', "CREATE DATABASE #{options[:mysql_config][:database]}") if $? == 0 puts "creating database #{options[:mysql_config][:database]} done" end end def fetch_header_and_first_row(filepath, col_sep) i = 0 header_row, first_row = nil, nil CSV.foreach(filepath, col_sep: col_sep) do |row| if i == 0 header_row = row elsif i == 1 first_row = row else break end i += 1 end return [header_row, first_row] end ## get first and second rows header_row = [] first_row = [] # first row means first data row fields_terminated_by = "," fields_enclosed_by = '' File.open(options[:file], mode="rt") do |f| f.each_char do |c| if c == '"' fields_enclosed_by = '"' end break end end if options[:file] =~ /\.tsv\z/ # tsv header_row, first_row = fetch_header_and_first_row(options[:file], "\t") fields_terminated_by = "\t" else # csv(,) or csv(\t) or something # trying to csv csv_header_row, csv_first_row = fetch_header_and_first_row(options[:file], ",") # trying to tsv tsv_header_row, tsv_first_row = fetch_header_and_first_row(options[:file], "\t") if csv_header_row.length == tsv_header_row.length if csv_header_row.length == 1 && tsv_first_row.length == 1 # single col file header_row, first_row = csv_header_row, csv_first_row else puts "illegal file" exit 1 end end if csv_header_row.length > tsv_header_row.length # csv header_row, first_row = csv_header_row, csv_first_row else # tsv header_row, first_row = tsv_header_row, tsv_first_row fields_terminated_by = "\t" end end header_row = header_row.map(&:to_s) # guess header_row is data row or not header_row_is_header = nil if header_row.any?{|str| str.match(/_id/) } header_row_is_header = true elsif header_row.any?{|str| str.match(/NULL/) } header_row_is_header = false elsif header_row.map(&:to_i).select{|x| x <= 0}.count != first_row.map(&:to_i).select{|x| x <= 0}.count # i guess number col count is different, header_row is header. header_row_is_header = true else # cannot judge header is header or not puts "header row is" puts header_row.join(',') puts "header row is HEADER? [y/n]" loop do y_or_n = $stdin.gets.to_s.chomp if y_or_n.match(/\A[Yy]\z/) header_row_is_header = true break end if y_or_n.match(/\A[Nn]\z/) header_row_is_header = false break end puts "plase enter y or n" end end if header_row_is_header header_row = header_row.map do |str| std_out = Open3.capture2('echo', str)[0] Open3.capture2(kakasi_path, '-Ja', '-Ha', '-Ka', '-Ea', '-i', 'utf8', '-o', 'utf8', stdin_data: std_out)[0] end.map(&:chomp).map do |x| # kakasi returns ko^do if 'コード' given so trimming x.gsub(/[\^]/, '') end end Cell = Struct.new(:type, :name) if header_row_is_header row_data_types = first_row.map.with_index do |str, i| if str.match(/\A[\d,]+\z/) # number Cell.new('integer', header_row[i]) else Cell.new('text', header_row[i]) end end else # header row is data row_data_types = header_row.map.with_index do |str, i| if str.match(/\A[\d,]+\z/) # number Cell.new('integer', "col#{i}") else Cell.new('text', "col#{i}") end end end filename = File.basename(options[:file]) table_name = filename.match(/\A(\w+)(\.\w+)?\z/)[1] create_table_sql = "CREATE TABLE #{table_name} (#{row_data_types.map{|x| "#{x.name} #{x.type}"}.join(', ')})" confirm_table_cmd = mysql_cmd_list + [options[:mysql_config][:database], '-e', "\"SHOW CREATE TABLE #{table_name}\"", '2>&1'] if options[:verbose] puts confirm_table_cmd.join(' ') end confirm_table_res = `#{confirm_table_cmd.join(' ')}` if confirm_table_res.to_s.match(/\AERROR 1146/) create_database_cmd_list = mysql_cmd_list + [options[:mysql_config][:database], '-e', '"', "#{create_table_sql}", '"'] if options[:verbose] puts create_database_cmd_list.join(' ') end `#{create_database_cmd_list.join(' ')}` end mysql_options = ['-u', options[:mysql_config][:user], '-h', options[:mysql_config][:host]] if options[:mysql_config][:use_password] mysql_options.push('-p') end cmds = [ mysqlimport_cmd, *mysql_options, options[:mysql_config][:database], '--local', options[:file], "--ignore-lines=#{header_row_is_header ? 1 : 0}", ] if fields_terminated_by cmds.push("--fields_terminated_by=#{fields_terminated_by}") end if options[:mysqlimport_config][:delete] cmds.push('--delete') end if fields_enclosed_by != '' cmds.push("--fields_enclosed_by=#{fields_enclosed_by}") end if options[:verbose] puts cmds.join(' ') end # mysqlimport data system(*cmds) exit 0