Sha256: 81ead02a0917f3cac4a596c4a328896a1c6827ebd57ad67cb3c91bff60708adf
Contents?: true
Size: 1.23 KB
Versions: 3
Compression:
Stored size: 1.23 KB
Contents
module Wukong module AndPig # # Load the main class definitions # def self.init_load puts File.open(PIG_DEFS_DIR+"/init_load.pig").read end # # OK we're going to cheat here: # just cat the file in, and treat it as a scalar # def load_scalar path # var = `hadoop dfs -cat '#{path}/part-*' | head -n1 `.chomp var = "636" end def count_distinct dest_rel, attr, group_by distincted = generate(temp_rel(dest_rel), attr). distinct(temp_rel(dest_rel), :parallel => 10) distincted. group( temp_rel(dest_rel), group_by). foreach( dest_rel, "GENERATE COUNT(#{distincted.relation}.#{attr}) AS n_#{attr}") end # # Group a relation into bins, and return the counts for each bin # * dest_rel - Relation to store # {bin, # def histogram dest_rel, bin_attr, bin_expr=nil bin_expr ||= bin_attr bin_name = "#{bin_attr}_bin" binned = foreach(temp_rel(dest_rel), "GENERATE #{bin_expr} AS #{bin_name}") binned. group( temp_rel(dest_rel), :by => bin_name). foreach( dest_rel, "GENERATE group AS #{bin_name}, COUNT(#{binned.relation}) AS #{bin_attr}_count") end end end
Version data entries
3 entries across 3 versions & 2 rubygems
Version | Path |
---|---|
mrflip-wukong-0.1.0 | lib/wukong/and_pig/junk.rb |
wukong-0.1.4 | lib/wukong/and_pig/junk.rb |
wukong-0.1.1 | lib/wukong/and_pig/junk.rb |