Sha256: 600fcde64ce99ccdc2d6e4ddd9e830d1130d1e67385f4f07d7c0d55afbbca2d2
Contents?: true
Size: 1.89 KB
Versions: 12
Compression:
Stored size: 1.89 KB
Contents
#!/usr/bin/env ruby # USAGE: # # You can provide one or more words. The last argument is the directory. # # Eg: # # ruby ./script/dgrep [-r] word1 word2 word3 directory # # Passing -r means search recursively # # Most Common Words # I've included a second demo in the form of: # ruby ./script/dgrep -r -mcw directory # This will return the top 10 most common 5 or more letter words # # You can also choose to distribute the reduce step by passing --reducers=2 (or any other number. 1 is the default) require 'rubygems' require 'rdoc/usage' require 'skynet' require File.dirname(__FILE__) + '/../config/skynet_config.rb' puts "LOGGING TO #{Skynet.config.logfile_location}" RDoc::usage if ARGV.empty? or ARGV.include?("--help") reducers = 1 if reduce_arg = ARGV.detect {|i| i =~ /--reducers/ } reducers = reduce_arg.split('=').last.to_i ARGV.delete_if {|i| i =~ /--reducers/ } end user_dir = ARGV.pop directory = File.expand_path(user_dir) mcw = ARGV.delete('-mcw') ? true : false files = if ARGV.delete('-r') Dir["#{directory}/**/*"] else Dir["#{directory}/*"] end if files.empty? puts "No files in #{user_dir}" exit end words = ARGV results = [] if mcw puts "Looking for the most common words in #{user_dir}" results = files.mapreduce(MostCommonWords, :reducers => reducers, :master_timeout => 60) if results and results.any? puts "RESULTS:" results.sort_by{|a| a[1]}.reverse.each_with_index do |a,ii| puts "#{ii}. #{a[1]} #{a[0]}" end else puts "No results" end else map_data = files.collect{|file| [file,words]} puts "Looking at all the files in #{user_dir} for the word or words '#{words.join(',')}'" results = map_data.mapreduce(Dgrep, :data_debug => true) if results and results.any? results.sort_by{|a| a[1]}.reverse.each_with_index do |a,ii| puts "#{ii}. #{a[1]} #{a[0]}" end else puts "No results" end end
Version data entries
12 entries across 12 versions & 3 rubygems