Sha256: c1deea459f13b5d70adf6a912c181fb0cc12d9fe6ba06e7e3d1b9d53d93eb17a

Contents?: true

Size: 1.85 KB

Versions: 12

Compression:

Stored size: 1.85 KB

Contents

#!/usr/bin/env bash
# hadoop dfs -rmr out/parsed-followers

input_file=${1} 		; shift
output_file=${1} 		; shift
map_script=${1-/bin/cat}	; shift
reduce_script=${1-/usr/bin/uniq} ; shift
partfields=${1-2} 		; shift
sortfields=${1-2} 		; shift

if [ "$output_file" == "" ] ; then echo "$0 input_file output_file [mapper=/bin/cat] [reducer=/usr/bin/uniq] [sortfields=2] [partfields=1] [extra_args]" ; exit ; fi

HADOOP_HOME=${HADOOP_HOME-/usr/lib/hadoop}

cmd="${HADOOP_HOME}/bin/hadoop \
     jar         ${HADOOP_HOME}/contrib/streaming/hadoop-*-streaming.jar		
    -partitioner org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner 			
    -jobconf     num.key.fields.for.partition=\"$partfields\"				
    -jobconf 	 stream.num.map.output.key.fields=\"$sortfields\"			
    -mapper  	 \"$map_script\"  							
    -reducer	 \"$reduce_script\"							
    -input       \"$input_file\"							
    -output  	 \"$output_file\"							
    $@
    "

echo "$cmd"

$cmd

# -jobconf      mapred.text.key.partitioner.options="-k1,$partfields"                   \
# -jobconf      stream.map.output.field.separator='\t'                                  \
# -jobconf      map.output.key.field.separator='\t'                                     \
# -jobconf      mapred.map.tasks=3                                                      \
# -jobconf      mapred.reduce.tasks=3                                                   \

#
# TODO:
#   http://issues.apache.org/jira/browse/MAPREDUCE-594
#   http://hadoop.apache.org/common/docs/current/api/org/apache/hadoop/mapred/KeyValueTextInputFormat.html
#   Instead of /bin/cat, Identity can be (I think)
#     -inputformat    org.apache.hadoop.mapred.KeyValueTextInputFormat \
#     -mapper         org.apache.hadoop.mapred.lib.IdentityMapper      \
#     ...
#
# TODO
#
#   New-style secondary sort:
#     http://hadoop.apache.org/common/docs/r0.20.0/streaming.html

Version data entries

12 entries across 6 versions & 1 rubygems

Version Path
wukong-1.4.9 bin/hdp-stream
wukong-1.4.9 bin/hdp-sort
wukong-1.4.7 bin/hdp-stream
wukong-1.4.7 bin/hdp-sort
wukong-1.4.6 bin/hdp-sort
wukong-1.4.6 bin/hdp-stream
wukong-1.4.5 bin/hdp-sort
wukong-1.4.5 bin/hdp-stream
wukong-1.4.2 bin/hdp-sort
wukong-1.4.2 bin/hdp-stream
wukong-1.4.1 bin/hdp-sort
wukong-1.4.1 bin/hdp-stream