Sha256: a56614fc20af17da9cf432a6759728c69e52fcc171e9772f4453d5b570317964
Contents?: true
Size: 1 KB
Versions: 9
Compression:
Stored size: 1 KB
Contents
#!/usr/bin/env ruby require 'rubygems' require 'wukong' # Run as (local mode) # # ./examples/stupidly_simple_filter.rb --run=local input.tsv output.tsv # # for hadoop mode, # # ./examples/stupidly_simple_filter.rb --run=hadoop input.tsv output.tsv # # For debugging, run # # cat input.tsv | ./examples/stupidly_simple_filter.rb --map input.tsv | more # # # A very simple mapper -- looks for a regex match in one field, # and emits the whole record if the field matches # class GrepMapper < Wukong::Streamer::RecordStreamer MATCHER = %r{(ford|mercury|saab|mazda|isuzu)} # # Given a series of records like: # # tweet 123456789 20100102030405 @frank: I'm having a bacon sandwich # tweet 123456789 20100102030405 @jerry, I'm having your baby # # emits only the lines matching that regex # def process rsrc, id, timestamp, text, *rest yield [rsrc, id, timestamp, text, *rest] if line =~ MATCHER end end # Execute the script Wukong::Script.new( GrepMapper, nil ).run
Version data entries
9 entries across 9 versions & 1 rubygems