lib/rspec/hive/rake_tasks/docker.rake in rspec-hive-0.3.0 vs lib/rspec/hive/rake_tasks/docker.rake in rspec-hive-0.4.0
- old
+ new
@@ -24,14 +24,14 @@
'hive_version' =>
ENV['HIVE_VERSION'] || default_config.hive_version
}
}
system 'mkdir', '-p', default_values['hive']['host_shared_directory_path']
- file_path = File.join(
- ENV['CONFIG_FILE_DIR'] || '.',
- ENV['CONFIG_FILE_NAME'] || 'rspec-hive.yml'
- )
+ system 'mkdir', '-p', 'config'
+
+ file_path = ENV['CONFIG_FILE'] || File.join('config', 'rspec-hive.yml')
+
File.open(file_path, 'w+') do |f|
f.write default_values.to_yaml
puts "Default config written to #{f.path}".green
end
end
@@ -39,38 +39,82 @@
namespace :docker do
desc 'Runs docker using hive config file.'\
' It assumes your docker-machine is running.'
task :run do
- puts 'Command `docker` not found.'.red unless system('which docker')
+ fail 'Command `docker` not found.'.red unless system('which docker')
- config_filepath = ENV['CONFIG_FILE'] || 'rspec-hive.yml'
- docker_image_name = ENV['DOCKER_IMAGE_NAME'] || 'nielsensocial/hive'
- unless File.exist? config_filepath
- puts "There's no config file #{config_filepath} please generate default or provide custom config.".red
- raise Errno::ENOENT.new config_filepath unless File.exist? config_filepath
- end
+ config_filepath = ENV['CONFIG_FILE'] || File.join('config', 'rspec-hive.yml')
+ fail "There's no config file #{config_filepath} please"\
+ "generate default or provide custom config.".red unless File.exist? config_filepath
interpolated = ERB.new(File.read(config_filepath)).result
config = YAML.load(interpolated)['hive']
+ docker_image_name = ENV['DOCKER_IMAGE_NAME'] || 'nielsensocial/hive'
cmd = "docker run -v #{config['host_shared_directory_path']}:"\
"#{config['docker_shared_directory_path']}"\
" -d -p #{config['port']}:10000 #{docker_image_name}"
puts "Running `#{cmd}`...".green
system(cmd)
end
desc 'Downloads docker image from dockerhub.'
task :download_image do
- puts 'Command `docker` not found.'.red unless system('which docker')
+ fail 'Command `docker` not found.'.red unless system('which docker')
docker_image_name = ENV['DOCKER_IMAGE_NAME'] || 'nielsensocial/hive'
cmd = "docker pull #{docker_image_name}"
puts "Running `#{cmd}`...".green
system(cmd)
end
+
+ def container_id
+ return ENV['CONTAINER_ID'] if ENV['CONTAINER_ID']
+ docker_conatiners = `docker ps`.lines
+ if docker_conatiners.size != 2
+ raise 'There is more than 1 instance of docker container running (or no running docker containers). '\
+ 'Check `docker ps` and stop containers that are not in use right now or specify CONTAINER_ID and run this command again.'.red
+ else
+ docker_conatiners[1].split[0]
+ end
+ end
+
+ desc 'Load Hive UDFS (user defined functions) onto docker.'
+ task :load_udfs, [:udfs_path] do |t, args|
+ udfs_path = args[:udfs_path]
+ config_filepath = ENV['CONFIG_FILE'] || File.join('config', 'rspec-hive.yml')
+ interpolated = ERB.new(File.read(config_filepath)).result
+ config = YAML.load(interpolated)['hive']
+
+ host_hive_udfs_path = File.join(config['host_shared_directory_path'], 'hive-udfs.jar')
+ fail 'Please provide UDFS_PATH'.red unless udfs_path
+ if udfs_path.start_with?('s3://')
+ puts 'Downloading from s3...'.yellow
+ cmd = "aws s3 ls #{udfs_path}"
+
+ fail 'awscli is not configured.'.red unless system(cmd)
+ cmd = "aws s3 cp #{udfs_path} #{host_hive_udfs_path}"
+ system(cmd)
+ else
+ puts 'Copying from local directory...'.yellow
+ cmd = "cp #{udfs_path} #{host_hive_udfs_path}"
+ end
+ puts 'Done'.green
+
+ puts 'Copying to hadoop on docker...'.yellow
+ cmd = "docker exec -it #{container_id} /bin/bash -c 'cp #{config['docker_shared_directory_path']}/hive-udfs.jar $HADOOP_HOME'"
+ system(cmd)
+ puts 'Done'.green
+ end
+ end
+
+ desc 'Runs beeline console on hive.'
+ task :beeline do
+ puts "Connecting to docker container: #{container_id} and running beeline. To exit: '!q'".green
+ cmd = "docker exec -it #{container_id} /bin/bash -c '$HIVE_HOME/bin/beeline -u jdbc:hive2://localhost:10000 -d org.apache.hive.jdbc.HiveDriver'"
+ system(cmd)
end
end
end