class Dgrep
  include SkynetDebugger

  # Takes an array that looks like [ [filename1,["word1","word2"]], [filename2,["word1","word2"]] ]
  # Returns an array that looks like [ [word1, cnt], [word2,cnt], [word1, cnt] ]
  # Map gets an array and should return an array
  def self.map(map_datas)
    results = []
    map_datas.each do |filename,words|
      begin
        words.each do |word|
          cnt = File.read(filename).scan(/\W#{word}\W/i).size
          results << [word,cnt] if cnt and cnt > 0
        end
      rescue Errno::EISDIR
        # skip directories
      end
    end
    return results if results.any?
  end

  # The chosen reduce partiioner attempts group keys by partition.  For example, if you choose 2 words and 2 reducers it will make sure
  # the first word goes to the first reducer and the second word to the second reducer.  If you only have 1 reducer (the defaukt)
  # It doesn't bother partitioning.  You can write as complex a partiioner as you'd like.
  def self.reduce_partition(post_map_data,new_partitions)
    Skynet::Partitioners::ArrayDataSplitByFirstEntry.reduce_partition(post_map_data,new_partitions)

    ## I've included the contents of the above method so you can see how to write a reduce_partitioner
    # partitions = []
    # (0..new_partitions - 1).each { |i| partitions[i] = Array.new }
    # keys_seen = {}
    # post_map_data.each do |partition|
    #   partition.each do |array|
    #     next unless array.is_a?(Array) and array.size >= 2
    #     if array[0].kind_of?(Fixnum)
    #       key = array[0]
    #     else
    #       keys_seen[array[0]] ||= keys_seen.keys.size
    #       key = keys_seen[array[0]]
    #     end
    #     partitions[key % new_partitions] << array
    #   end
    # end
    # partitions

  end

  # Takes an array that looks like  [ [word1, cnt], [word2,cnt], [word1, cnt] ]
  # reduce also gets an array
  def self.reduce(reduce_datas)
    results = {}
    reduce_datas.each do |reduce_data|
      results[reduce_data[0]] ||= 0
      results[reduce_data[0]] += reduce_data[1]
    end
    results
  end

end