Sha256: b1484f8b38b0e2e4ad5c1ce095ef4d73f8c3f8c28bc0861fac8e31bebfa03949
Contents?: true
Size: 1.34 KB
Versions: 18
Compression:
Stored size: 1.34 KB
Contents
#!/usr/bin/env bash input_file=${1} ; shift output_file=${1} ; shift map_script=${1-/bin/cat} ; shift reduce_script=${1-/usr/bin/uniq} ; shift partfields=${1-2} ; shift sortfields=${1-2} ; shift if [ "$output_file" == "" ] ; then echo "$0 input_file output_file [mapper=/bin/cat] [reducer=/usr/bin/uniq] [partfields=2] [sortfields=2] [extra_args]" ; exit ; fi HADOOP_HOME=${HADOOP_HOME-/usr/lib/hadoop} cmd="${HADOOP_HOME}/bin/hadoop \ jar ${HADOOP_HOME}/contrib/streaming/hadoop-*streaming*.jar $@ -D num.key.fields.for.partition=\"$partfields\" -D stream.num.map.output.key.fields=\"$sortfields\" -D stream.map.output.field.separator=\"'/t'\" -D mapred.text.key.partitioner.options=\"-k1,$partfields\" -D mapred.job.name=\"`basename $0`-$map_script-$input_file-$output_file\" -partitioner org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner -mapper \"$map_script\" -reducer \"$reduce_script\" -input \"$input_file\" -output \"$output_file\" " echo "$cmd" $cmd # For a map-side-only job specify # -jobconf mapred.reduce.tasks=0 \ # Maybe? # # -inputformat org.apache.hadoop.mapred.KeyValueTextInputFormat \ # -mapper org.apache.hadoop.mapred.lib.IdentityMapper \ #
Version data entries
18 entries across 9 versions & 2 rubygems