Sha256: 600fcde64ce99ccdc2d6e4ddd9e830d1130d1e67385f4f07d7c0d55afbbca2d2

Contents?: true

Size: 1.89 KB

Versions: 12

Compression:

Stored size: 1.89 KB

Contents

#!/usr/bin/env ruby
# USAGE:
#
# You can provide one or more words. The last argument is the directory.
#
# Eg:
#
# ruby ./script/dgrep [-r] word1 word2 word3 directory
#
# Passing -r means search recursively
#
# Most Common Words
# I've included a second demo in the form of:
# ruby ./script/dgrep -r -mcw directory
# This will return the top 10 most common 5 or more letter words
#
# You can also choose to distribute the reduce step by passing --reducers=2  (or any other number. 1 is the default)

require 'rubygems'
require 'rdoc/usage'
require 'skynet'
require File.dirname(__FILE__) + '/../config/skynet_config.rb'

puts "LOGGING TO #{Skynet.config.logfile_location}"

RDoc::usage if ARGV.empty? or ARGV.include?("--help")

reducers = 1
if reduce_arg = ARGV.detect {|i| i =~ /--reducers/ }
  reducers = reduce_arg.split('=').last.to_i
  ARGV.delete_if {|i| i =~ /--reducers/ }  
end

user_dir = ARGV.pop
directory = File.expand_path(user_dir)

mcw = ARGV.delete('-mcw') ? true : false

files = if ARGV.delete('-r')
   Dir["#{directory}/**/*"]
else
  Dir["#{directory}/*"]
end                             

if files.empty?
  puts "No files in #{user_dir}"
  exit
end

words = ARGV

results = []
if mcw
  puts "Looking for the most common words in #{user_dir}"
  results = files.mapreduce(MostCommonWords, :reducers => reducers, :master_timeout => 60)
  if results and results.any?
    puts "RESULTS:"
    results.sort_by{|a| a[1]}.reverse.each_with_index do |a,ii|
      puts "#{ii}. #{a[1]} #{a[0]}"
    end
  else
    puts "No results"
  end
else
  map_data = files.collect{|file| [file,words]}
  puts "Looking at all the files in #{user_dir} for the word or words '#{words.join(',')}'"
  results = map_data.mapreduce(Dgrep, :data_debug => true)
  if results and results.any?
    results.sort_by{|a| a[1]}.reverse.each_with_index do |a,ii|
      puts "#{ii}. #{a[1]} #{a[0]}"
    end
  else
    puts "No results"
  end
end

Version data entries

12 entries across 12 versions & 3 rubygems

Version Path
brendan-skynet-0.9.3.1 examples/dgrep/script/dgrep
brendan-skynet-0.9.3.2 examples/dgrep/script/dgrep
brendan-skynet-0.9.3.3 examples/dgrep/script/dgrep
brendan-skynet-0.9.3.5 examples/dgrep/script/dgrep
brendan-skynet-0.9.3.6 examples/dgrep/script/dgrep
brendan-skynet-0.9.3.7 examples/dgrep/script/dgrep
brendan-skynet-0.9.3.8 examples/dgrep/script/dgrep
brendan-skynet-0.9.31 examples/dgrep/script/dgrep
brendan-skynet-0.9.32 examples/dgrep/script/dgrep
brendan-skynet-0.9.33 examples/dgrep/script/dgrep
timocratic-skynet-0.9.4 examples/dgrep/script/dgrep
skynet-0.9.3 examples/dgrep/script/dgrep