Sha256: ea7f978e5a8160f81f6268b66b3ec84833ed95d586e865c6f858227e9a476fdc

Contents?: true

Size: 984 Bytes

Versions: 1

Compression:

Stored size: 984 Bytes

Contents

#!/usr/bin/env ruby
$: << File.dirname(__FILE__)+'/../../lib'
require 'wukong/script'
require 'wukong/streamer/list_reducer'

module PageRank
  class Script < Wukong::Script
    #
    # Input format is
    #
    #   rsrc    src_id  dest_id  [... junk ...]
    #
    # All we want from the line are its src and dest IDs.
    #
    def map_command
      %Q{/usr/bin/cut -d"\t" -f2,3}
    end
  end

  #
  # Accumulate the dests list in memory, dump as a whole. Multiple edges between
  # any two nodes are permitted, and will accumulate pagerank according to the
  # edge's multiplicity.
  #
  class Reducer < Wukong::Streamer::ListReducer
    def accumulate src, dest
      @values << dest
    end

    # Emit src, initial pagerank, and flattened dests list
    def finalize
      @values = ['dummy'] if @values.blank?
      yield [key, 1.0, @values.to_a.join(",")]
    end
  end

  # Execute the script
  Script.new(nil, PageRank::Reducer, :io_sort_record_percent => 0.25).run
end



Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
wukong-2.0.0 examples/pagerank/pagerank_initialize.rb