Sha256: c10235248c3ec3adc6401721a8a2032ee75a7d7d360abb5c9494a76ef7daaabd

Contents?: true

Size: 1.11 KB

Versions: 8

Compression:

Stored size: 1.11 KB

Contents

#!/usr/bin/env bash
# hadoop dfs -rmr out/parsed-followers

input_file=${1} 		; shift
output_file=${1} 		; shift
map_script=${1-/bin/cat}	; shift
reduce_script=${1-/usr/bin/uniq}	; shift
fields=${1-2} 			; shift

if [ "$reduce_script" == "" ] ; then echo "$0 input_file output_file [sort_fields] [mapper] [reducer] [args]" ; exit ; fi

HADOOP_HOME=${HADOOP_HOME-/usr/lib/hadoop}

${HADOOP_HOME}/bin/hadoop \
     jar         ${HADOOP_HOME}/contrib/streaming/hadoop-*-streaming.jar		\
    -partitioner org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner 			\
    -jobconf     map.output.key.field.separator='\t'					\
    -jobconf     num.key.fields.for.partition=1 					\
    -jobconf 	 stream.map.output.field.separator='\t'					\
    -jobconf 	 stream.num.map.output.key.fields="$fields"				\
    -mapper  	 "$map_script"  							\
    -reducer	 "$reduce_script"							\
    -input       "$input_file"								\
    -output  	 "$output_file"								\
    "$@"


# -jobconf mapred.map.tasks=3                                                       \
# -jobconf mapred.reduce.tasks=3                                                    \

Version data entries

8 entries across 4 versions & 2 rubygems

Version Path
mrflip-wukong-0.1.0 bin/hdp-sort
mrflip-wukong-0.1.0 bin/hdp-stream
wukong-1.4.0 bin/hdp-sort
wukong-1.4.0 bin/hdp-stream
wukong-0.1.4 bin/hdp-sort
wukong-0.1.4 bin/hdp-stream
wukong-0.1.1 bin/hdp-sort
wukong-0.1.1 bin/hdp-stream