Sha256: eed59b4ad7b079b8e72bbf6972b4fceb31967f177a417915507e76e6a2036ca6

Contents?: true

Size: 1.73 KB

Versions: 3

Compression:

Stored size: 1.73 KB

Contents

#!/usr/bin/env ruby
require "rubygems"
require "mandy"
require 'optparse'
require 'ostruct'

options = OpenStruct.new

OptionParser.new do |opts|
  opts.banner = "USAGE: mandy-hadoop script input output [options]"

  opts.on("-p", "--payload PAYLOAD", "Add a working directory to be sent to the cluster.") do |payload|
    options.payload = payload
  end
  
  opts.on("-c", "--conf HADOOP_CONF", "Use this cluster xml config file.") do |config|
    options.config = config
  end
  
  opts.on_tail("-h", "--help", "Show this message") do
    puts opts
    exit
  end
end.parse!

def absolute_path(path)
  path =~ /^\// ? path : File.join(Dir.pwd, path)
end

file   = ARGV[0]
filename = File.basename(file)
input  = ARGV[1]
output_folder = ARGV[2]
config = options.config || 'cluster.xml'
payload = options.payload ? Mandy::Packer.pack(options.payload) : ARGV[0]

at_exit { Mandy::Packer.cleanup!(payload) }

require absolute_path(file)

output = nil

Mandy::Job.jobs.each_with_index do |job, i|
  
  jobconf = job.settings.map { |key, value| %(-D #{key}='#{value}') }.join(' ')
  output = File.join(output_folder, "#{i+1}-#{job.name.downcase.gsub(/\W/, '-')}")
  
  command = %($HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/contrib/streaming/hadoop-*-streaming.jar #{jobconf}\
                  -conf '#{config}' \
                  -input "#{input}"  \
                  -mapper "mandy-map #{filename} '#{job.name}' #{File.basename(payload)}"  \
                  -reducer "mandy-reduce #{filename} '#{job.name}' #{File.basename(payload)}"  \
                  -file "#{payload}" \
                  -output "#{output}")

  `#{command}`

  # puts "#{command}"
  input = output
end

# print out the output location so caller can know where to get the results from
puts output

Version data entries

3 entries across 3 versions & 1 rubygems

Version Path
trafficbroker-mandy-0.2.5.1 bin/mandy-hadoop
trafficbroker-mandy-0.2.5 bin/mandy-hadoop
trafficbroker-mandy-0.2.6 bin/mandy-hadoop