Sha256: d7b322ecde939aedea3785a0a7aab664836896284e087676d9ddeb54be3b4ac6

Contents?: true

Size: 1.28 KB

Versions: 1

Compression:

Stored size: 1.28 KB

Contents

#!/usr/bin/env ruby
require 'optparse'
require 'ostruct'

require "rubygems"
require "mandy"

HadoopConfiguration.check_home_and_version

exec('mandy-get -h') unless ARGV.size >= 2

options = OpenStruct.new

OptionParser.new do |opts|
  opts.banner = "USAGE: mandy-get hdfs_file_location local_file_destination [options]"

  opts.on("-c", "--conf HADOOP_CONF", "Use this cluster xml config file.") do |config|
    options.config = config
  end
  
  opts.on_tail("-h", "--help", "Show this message") do
    puts opts
    exit
  end
end.parse!


def absolute_path(path)
  path =~ /^\// ? path : File.join(Dir.pwd, path)
end

remote_file = ARGV[0]
local_file = ARGV[1]
config = absolute_path(options.config || 'cluster.xml')

def file?(permissions)
  permissions.chars.first.downcase != 'd'
end

def is_a_valid(path)
  path =~ /\/(\S+)/
end

File.open("#{local_file}", "a") do |target|
  IO.popen("$HADOOP_HOME/bin/hadoop fs -conf #{config} -ls #{remote_file}").each_line do |line|
    permissions,replication,user,group,size,mod_date,mod_time,path = *line.split(/\s+/)
    next unless is_a_valid(path)
    if file?(permissions)
      IO.popen("$HADOOP_HOME/bin/hadoop fs -conf #{config} -text #{path}") do |b|
        while data = b.read(1024*50)
          target.puts(data)
        end
      end
    end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
mandy-0.5.19 bin/mandy-get