#!/usr/bin/env ruby require 'optparse' require 'rubygems' require 'sip' starttime = Time.now options = { :db => nil, :table => nil, :config => 'sip.yml', :debug => false, :tfile => nil, :overwrite => false } parser = OptionParser.new { |opts| opts.banner = "Usage: sip [options]" opts.on('-h', '--help', 'displays usage information') { puts opts exit } opts.on('-o', '--overwrite', 'force full overwrite') { options[:overwrite] = true } opts.on('-d', '--debug', 'Print debugging information') { |d| options[:debug] = true } opts.on('-D ', '--database ', 'only import from given database') { |db| options[:db] = db } opts.on('-c ', '--config ', 'use specified config') { |c| options[:config] = c } opts.on('-t ', '--table
', 'only import from given table (must specify -D as well)') { |t| options[:table] = t } opts.on('-f ', '--transpart-file ', 'translation/partition file location (default: looks for file in path)') { |t| options[:tfile] = t } }.parse! if ENV['HADOOP_HOME'].nil? or ENV['HIVE_HOME'].nil? puts "You must first set both the HADOOP_HOME and HIVE_HOME environment variables." exit 1 end debug = Proc.new { |t| puts "#{Sip::Utils::hostname} #{Time.now.strftime("%Y-%m-%d %H:%M:%S")}: #{t}" if options[:debug] } debug.call "Using config file #{options[:config]}" config = Sip::Config.load_file options[:config] config.set_temp options, [:debug, :overwrite] config['tfile'] = Sip::Utils::tfile_path(options[:tfile] || config['tfile']) config['hdfs_tmpdir'] ||= "/tmp/sip" if config['tfile'].nil? puts "Could not find transforation / partition (transpart) script. Please use -f option to specify a location." exit 1 end debug.call "Using transformation / partitioning script '#{config['tfile']}'" sipper = Sip::Sipper.new config config['databases'].each { |dbname, dbconfig| next if not options[:db].nil? and options[:db] != dbname dbconfig['tables'].each { |tablename, tableconfig| next if not options[:db].nil? and not options[:table].nil? and options[:table] != tablename debug.call "creating script for table #{tablename} in database #{dbname}" script_count = sipper.create_scripts dbconfig, tableconfig if script_count > 0 debug.call "Executing #{script_count} scripts..." sipper.run_scripts debug.call "Running hive import..." sipper.run_hive_import tableconfig end } } # Save new version of config debug.call "Writing new version of config file to #{options[:config]}" sipper.config.save_file options[:config] debug.call "Finished running import in #{Time.now - starttime} seconds"