Sha256: 1cdf832d9e3d78be5c2c752f2b96271c8c26adadca47f9a86a1d54415d7fc2a2

Contents?: true

Size: 1.15 KB

Versions: 5

Compression:

Stored size: 1.15 KB

Contents

#!/usr/bin/env ruby
require "rubygems"
require "mandy"

def absolute_path(path)
  path =~ /^\// ? path : File.join(Dir.pwd, path)
end

if ARGV.size==0
  puts "USAGE: mandy-hadoop my_script.rb input_file_or_folder_on_hdfs output_folder_on_hdfs cluster-config.xml [payload]"
  exit
end

file   = ARGV[0]
filename = File.basename(file)
input  = ARGV[1]
output_folder = ARGV[2]
config = ARGV[3]
payload = ARGV[4] ? Mandy::Packer.pack(ARGV[4]) : ARGV[0]

require absolute_path(file)

Mandy::Job.jobs.each_with_index do |job, i|
  
  jobconf = job.settings.map { |key, value| %(-D #{key}='#{value}') }.join(' ')
  output = File.join(output_folder, "#{i+1}-#{job.name.downcase.gsub(/\W/, '-')}")
  
  command = %($HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/contrib/streaming/hadoop-*-streaming.jar #{jobconf}\
                  -conf '#{config}' \
                  -input "#{input}"  \
                  -mapper "mandy-map #{filename} '#{job.name}' #{payload}"  \
                  -reducer "mandy-reduce #{filename} '#{job.name}' #{payload}"  \
                  -file "#{payload}" \
                  -output "#{output}")

  `#{command}`

  # puts "#{command}"
  input = output
end

Version data entries

5 entries across 5 versions & 1 rubygems

Version Path
trafficbroker-mandy-0.2.2 bin/mandy-hadoop
trafficbroker-mandy-0.2.3 bin/mandy-hadoop
trafficbroker-mandy-0.2.4.2 bin/mandy-hadoop
trafficbroker-mandy-0.2.4.3 bin/mandy-hadoop
trafficbroker-mandy-0.2.4 bin/mandy-hadoop