Sha256: fc53b8327f7d3cf869384f5bc68014cbf3343b60892f45e13c8ad3839fab5e44

Contents?: true

Size: 1.89 KB

Versions: 27

Compression:

Stored size: 1.89 KB

Contents

#!/usr/bin/env ruby

dir_to_rename = ARGV[0]
dest_ext = '.tsv'

unless dir_to_rename && (! dir_to_rename.empty?)
  warn "Need a directory or file spec to rename."
  exit
end

#
# Setup
#
warn "\nPlease IGNORE the 'cat: Unable to write to output stream.' errors\n"

#
# Examine the files
#
file_listings = `hdp-ls #{dir_to_rename}`.split("\n")
command_lists = { }
file_listings[1..-1].each do |file_listing|
  m = %r{[-drwx]+\s+[\-\d]+\s+\w+\s+\w+\s+(\d+)\s+[\d\-]+\s+[\d\:]+\s+(.+)$}.match(file_listing)
  if !m then warn "Couldn't grok #{file_listing}" ; next ; end
  size, filename = m.captures
  case
  when size.to_i == 0 then (command_lists[:deletes]||=[]) << filename
  else
    firstline = `hdp-cat #{filename} | head -qn1 `
    file_key, _ = firstline.split("\t", 2)
    unless file_key && (file_key =~ /\A[\w\-\.]+\z/)
      warn "Don't want to rename to '#{file_key}'... skipping"
      next
    end
    dirname = File.dirname(filename)
    destfile = File.join(dirname, file_key)+dest_ext
    (command_lists[:moves]||=[]) << "hdp-mv #{filename} #{destfile}"
  end
end

#
# Execute the command_lists
#
command_lists.each do |type, command_list|
  case type
  when :deletes
    command = "hdp-rm #{command_list.join(" ")}"
    puts command
    `#{command}`
  when :moves
    command_list.each do |command|
      puts command
      `#{command}`
    end
  end
end


# -rw-r--r--   3 flip supergroup          0 2008-12-20 05:51 /user/flip/out/sorted-tweets-20081220/part-00010

# # Killing empty files
# find . -size 0 -print -exec rm {} \;
#
# for foo in part-0* ; do
#   newname=`
#     head -n1 $foo |
#     cut -d'   ' -f1 |
#     ruby -ne 'puts $_.chomp.gsub(/[^\-\w]/){|s| s.bytes.map{|c| "%%%02X" % c }}'
#     `.tsv ;
#   echo "moving $foo to $newname"
#   mv "$foo" "$newname"
# done
#
# # dir=`basename $PWD`
# # for foo in *.tsv ; do
# #   echo "Compressing $dir"
# #   bzip2 -c $foo > ../$dir-bz2/$foo.bz2
# # done

Version data entries

27 entries across 27 versions & 3 rubygems

Version Path
mrflip-wukong-0.1.0 bin/hdp-parts_to_keys.rb
wukong-hadoop-0.2.0 bin/hdp-parts_to_keys.rb
wukong-hadoop-0.1.1 bin/hdp-parts_to_keys.rb
wukong-hadoop-0.1.0 bin/hdp-parts_to_keys.rb
wukong-hadoop-0.0.2 bin/hdp-parts_to_keys.rb
wukong-hadoop-0.0.1 bin/hdp-parts_to_keys.rb
wukong-3.0.0.pre bin/hdp-parts_to_keys.rb
wukong-2.0.2 bin/hdp-parts_to_keys.rb
wukong-2.0.1 bin/hdp-parts_to_keys.rb
wukong-2.0.0 bin/hdp-parts_to_keys.rb
wukong-1.5.4 bin/hdp-parts_to_keys.rb
wukong-1.5.3 bin/hdp-parts_to_keys.rb
wukong-1.5.2 bin/hdp-parts_to_keys.rb
wukong-1.5.1 bin/hdp-parts_to_keys.rb
wukong-1.5.0 bin/hdp-parts_to_keys.rb
wukong-1.4.12 bin/hdp-parts_to_keys.rb
wukong-1.4.11 bin/hdp-parts_to_keys.rb
wukong-1.4.10 bin/hdp-parts_to_keys.rb
wukong-1.4.9 bin/hdp-parts_to_keys.rb
wukong-1.4.7 bin/hdp-parts_to_keys.rb