Sha256: b1484f8b38b0e2e4ad5c1ce095ef4d73f8c3f8c28bc0861fac8e31bebfa03949

Contents?: true

Size: 1.34 KB

Versions: 18

Compression:

Stored size: 1.34 KB

Contents

#!/usr/bin/env bash

input_file=${1} 		; shift
output_file=${1} 		; shift
map_script=${1-/bin/cat}	; shift
reduce_script=${1-/usr/bin/uniq} ; shift
partfields=${1-2} 		; shift
sortfields=${1-2} 		; shift

if [ "$output_file" == "" ] ; then echo "$0 input_file output_file [mapper=/bin/cat] [reducer=/usr/bin/uniq] [partfields=2] [sortfields=2] [extra_args]" ; exit ; fi

HADOOP_HOME=${HADOOP_HOME-/usr/lib/hadoop}

cmd="${HADOOP_HOME}/bin/hadoop \
     jar         ${HADOOP_HOME}/contrib/streaming/hadoop-*streaming*.jar
    $@
    -D   num.key.fields.for.partition=\"$partfields\"
    -D 	 stream.num.map.output.key.fields=\"$sortfields\"
    -D   stream.map.output.field.separator=\"'/t'\"
    -D   mapred.text.key.partitioner.options=\"-k1,$partfields\"
    -D   mapred.job.name=\"`basename $0`-$map_script-$input_file-$output_file\"
    -partitioner org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner 			
    -mapper  	 \"$map_script\"  							
    -reducer	 \"$reduce_script\"							
    -input       \"$input_file\"							
    -output  	 \"$output_file\"							
    "

echo "$cmd"

$cmd

# For a map-side-only job specify
# -jobconf mapred.reduce.tasks=0                                                    \

# Maybe?
#
#     -inputformat    org.apache.hadoop.mapred.KeyValueTextInputFormat \
#     -mapper         org.apache.hadoop.mapred.lib.IdentityMapper      \
#

Version data entries

18 entries across 9 versions & 2 rubygems

Version Path
wukong-hadoop-0.2.0 bin/hdp-sort
wukong-hadoop-0.2.0 bin/hdp-stream
wukong-hadoop-0.1.1 bin/hdp-sort
wukong-hadoop-0.1.1 bin/hdp-stream
wukong-hadoop-0.1.0 bin/hdp-stream
wukong-hadoop-0.1.0 bin/hdp-sort
wukong-hadoop-0.0.2 bin/hdp-sort
wukong-hadoop-0.0.2 bin/hdp-stream
wukong-hadoop-0.0.1 bin/hdp-stream
wukong-hadoop-0.0.1 bin/hdp-sort
wukong-3.0.0.pre bin/hdp-stream
wukong-3.0.0.pre bin/hdp-sort
wukong-2.0.2 bin/hdp-stream
wukong-2.0.2 bin/hdp-sort
wukong-2.0.1 bin/hdp-sort
wukong-2.0.1 bin/hdp-stream
wukong-2.0.0 bin/hdp-stream
wukong-2.0.0 bin/hdp-sort