Sha256: 631b176ff4ea73e7d31acf7618854865f936b2fa36ecdf8bd5cc718612d76e12
Contents?: true
Size: 1013 Bytes
Versions: 1
Compression:
Stored size: 1013 Bytes
Contents
#!/usr/bin/env ruby require 'wukong' # cat data/jabberwocky.txt | bin/wu-map examples/word_count.rb | sort | bin/wu-red examples/word_count.rb | sort -rnk2 | head Wukong.processor(:add_count) do def process(word) emit [word, 1] end end Wukong.processor(:accumulator) do attr_accessor :current, :count def setup() reset! ; end def stop() report_then_reset! ; end def reset!() @current = nil ; @count = 0 ; end def report_then_reset! emit [current, count] unless current.nil? reset! end def accumulate(word, seen) @current = word if @current.nil? @count += seen end def process(pair) word, seen = pair report_then_reset! unless word == current accumulate(word, seen.to_i) end end Wukong.dataflow(:mapper) do splitter = map { |line| line.downcase.strip.split(/\W/) } cleaner = reject { |word| word.length < 2 } splitter > flatten > cleaner > add_count > to_tsv end Wukong.dataflow(:reducer) do from_tsv > accumulator > to_tsv end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
wukong-3.0.0.pre | examples/word_count.rb |