Sha256: 480adba6c8a065f8293d3a6003421e2efdf8a50ecbf6617db072a022e0774021

Contents?: true

Size: 985 Bytes

Versions: 1

Compression:

Stored size: 985 Bytes

Contents

#!/bin/bash

HADOOP_HOME=${HADOOP_HOME-/usr/lib/hadoop}

OUTPUT="$1" ; shift

INPUTS=''
for foo in $@; do 
  INPUTS="$INPUTS -input $foo\
"
done

echo "Removing output directory $OUTPUT"
hadoop fs -rmr $OUTPUT

cmd="${HADOOP_HOME}/bin/hadoop \
     jar         ${HADOOP_HOME}/contrib/streaming/hadoop-*-streaming.jar		   \
    -partitioner org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner 			   \
    -jobconf     mapred.output.compress=true                                               \
    -jobconf     mapred.output.compression.codec=org.apache.hadoop.io.compress.BZip2Codec  \
    -jobconf     mapred.reduce.tasks=1                                                     \
    -mapper  	 \"/bin/cat\"                                                              \
    -reducer	 \"/usr/bin/uniq\"                                                         \
    $INPUTS
    -output  	 $OUTPUT                                                                   \
    "
echo $cmd
$cmd

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
wukong-1.4.9 bin/hdp-bzip