#--
# Author:: Tyler Rick
# Copyright:: Copyright (c) 2007 QualitySmith, Inc.
# License:: Ruby License
# Submit to Facets?:: Maybe.
# Developer notes:
# * Name is too general? Name it something to do with 'tables'?
# * group_table_by ?
# * Compare to Array#classify (quality_extensions), which aims to be more general, letting you classify arrays that are
# *not* in "table" form (whose elements are *not* all arrays of equal size and might not even be *arrays*).
#++
$LOAD_PATH << File.expand_path(File.join(File.dirname(__FILE__), '..', '..'))
gem 'facets'
require 'facets/array/delete'
class Array
=begin rdoc
Breaks an array into a hash of smaller arrays, making a new group for each unique value in the specified column.
Each unique value becomes a key in the hash.
Example:
[
['a', 1],
['a', 2],
['b', 3],
['b', 4],
].group_by(0)
=>
"a"=>[[1], [2]],
"b"=>[[3], [4]]
Options:
* delete_key: deletes the key from the corresponding array if true (default true)
Example:
[
['a', 1],
['a', 2],
['b', 3],
['b', 4],
].group_by(0, :delete_key => false)
=>
"a"=>[['a', 1], ['a', 2]],
"b"=>[['b', 3], ['b', 4]]
*Notes*:
* self must be in the shape of a "table" (that is, a rectangular-shaped, two-dimensional array = an array of arrays,
each member array being of the same size (the "width" of the table)).
* This is different from the GROUP BY in SQL in that it doesn't apply an aggregate (like sum or average) to each group -- it just returns each group unmodified.
=end
def group_by(column_index, *args)
options = (if args.last.is_a?(Hash) then args.pop else {} end)
hash = {}
self.each do |row|
row_to_keep = row.dup
row_to_keep.delete_values_at(column_index) unless options[:delete_key] == false
hash[row[column_index]] ||= []
hash[row[column_index]] << row_to_keep
end
hash
end
end
# _____ _
# |_ _|__ ___| |_
# | |/ _ \/ __| __|
# | | __/\__ \ |_
# |_|\___||___/\__|
#
=begin test
require 'test/unit'
require 'set'
class TheTest < Test::Unit::TestCase
def test_group_by
assert_equal({ }, [ ].group_by(column_index = 0))
assert_equal({
"a"=>[[1], [2]],
"b"=>[[3], [4]]
}, [
['a', 1],
['a', 2],
['b', 3],
['b', 4],
].group_by(column_index = 0)
)
assert_equal({
"a"=>[['a', 1], ['a', 2]],
"b"=>[['b', 3], ['b', 4]]
}, [
['a', 1],
['a', 2],
['b', 3],
['b', 4],
].group_by(column_index = 0, :delete_key => false)
)
assert_equal({
"a" => Set[['a', 1], ['a', 2]],
"b" => Set[['b', 3], ['b', 4]]
}, [
['a', 1],
['a', 2],
['b', 3],
['b', 4],
].to_set.classify {|o| o[0]}
)
input = [
['Bob', "Bottle of water", 1.00],
['Bob', "Expensive stapler", 50.00],
['Alice', "Dinner for 2", 100.00],
['Alice', "Bus ride to RubyConf", 50.00],
]
assert_equal({
"Alice"=>[["Dinner for 2", 100.0], ["Bus ride to RubyConf", 50.0]],
"Bob"=>[["Bottle of water", 1.0], ["Expensive stapler", 50.0]]
}, input.group_by(column_index = 0)
)
assert_equal({
50.0=>[["Bob", "Expensive stapler"], ["Alice", "Bus ride to RubyConf"]],
100.0=>[["Alice", "Dinner for 2"]],
1.0=>[["Bob", "Bottle of water"]]
}, input.group_by(column_index = 2)
)
end
end
=end