Sha256: 2f82d426e91371c8373734845cf2af735800b9b6c56a9dc19386917d14931e80

Contents?: true

Size: 1.74 KB

Versions: 4

Compression:

Stored size: 1.74 KB

Contents

#!/usr/bin/env ruby
require 'optparse'
require 'ostruct'

require "rubygems"
require "mandy"
require 'fileutils'

HadoopConfiguration.check_home_and_version

exec('mandy-get -h') unless ARGV.size >= 2

options = OpenStruct.new(:raw => false, :merge => false)

OptionParser.new do |opts|
  opts.banner = "USAGE: mandy-get hdfs_file_location local_file_destination [options]"

  opts.on("-c", "--conf HADOOP_CONF", "Use this cluster xml config file.") do |config|
    options.config = config
  end
  
  opts.on('-r', "--raw", "Get the raw file without decompression.") do |config|
    options.raw = true
  end
  
  opts.on('-m', "--merge", "Get a whole folder and merge results.") do |config|
    options.merge = true
  end
  
  opts.on_tail("-h", "--help", "Show this message") do
    puts opts
    exit
  end
end.parse!


def absolute_path(path)
  path =~ /^\// ? path : File.join(Dir.pwd, path)
end

remote_file = ARGV[0]
local_file = ARGV[1]
config = absolute_path(options.config || 'cluster.xml')

def file?(permissions)
  permissions[0,1].downcase != 'd'
end

def is_a_valid(path)
  path =~ /\/(\S+)/
end

if options.raw
  `$HADOOP_HOME/bin/hadoop fs -conf #{config} -get#{options.merge ? 'merge' : ''} #{remote_file} #{local_file}`
  exit($?.exitstatus)
end

FileUtils.mkdir_p(File.split(local_file)[0])
File.open("#{local_file}", "a") do |target|
  IO.popen("$HADOOP_HOME/bin/hadoop fs -conf #{config} -ls #{remote_file}").each_line do |line|
    permissions,replication,user,group,size,mod_date,mod_time,path = *line.split(/\s+/)
    next unless is_a_valid(path)
    if file?(permissions)
      IO.popen("$HADOOP_HOME/bin/hadoop fs -conf #{config} -text #{path}") do |b|
        while data = b.read(1024*50)
          target.write(data)
        end
      end
    end
  end
end

Version data entries

4 entries across 4 versions & 1 rubygems

Version Path
mandy-0.5.28 bin/mandy-get
mandy-0.5.27 bin/mandy-get
mandy-0.5.26 bin/mandy-get
mandy-0.5.25 bin/mandy-get