lib/fat_table/table.rb in fat_table-0.4.2 vs lib/fat_table/table.rb in fat_table-0.5.1
- old
+ new
@@ -60,13 +60,18 @@
###########################################################################
# :category: Constructors
# Return an empty FatTable::Table object.
- def initialize
+ def initialize(*heads)
@columns = []
@boundaries = []
+ unless heads.empty?
+ heads.each do |h|
+ @columns << Column.new(header: h)
+ end
+ end
end
# :category: Constructors
# Return an empty duplicate of self. This allows the library to create an
@@ -203,10 +208,11 @@
result.mark_boundary
next
end
result << hsh.to_h
end
+ result.normalize_boundaries
result
end
# Construct a new table from an array of arrays. By default, with hlines
# false, do not look for separators, i.e. nils, just treat the first row
@@ -251,19 +257,21 @@
end
row = row.map { |s| s.to_s.strip }
hash_row = Hash[headers.zip(row)]
result << hash_row
end
+ result.normalize_boundaries
result
end
def from_csv_io(io)
result = new
::CSV.new(io, headers: true, header_converters: :symbol,
skip_blanks: true).each do |row|
result << row.to_h
end
+ result.normalize_boundaries
result
end
# Form rows of table by reading the first table found in the org file. The
# header row must be marked with an hline (i.e, a row that looks like
@@ -332,12 +340,15 @@
# :category: Attributes
# Set the column type for Column with the given +key+ as a String type,
# but only if empty. Otherwise, we would have to worry about converting
# existing items in the column to String. Perhaps that's a TODO.
- def set_column_to_string_type(key)
- column(key).force_to_string_type
+ def force_string!(*keys)
+ keys.each do |h|
+ column(h).force_string!
+ end
+ self
end
# :category: Attributes
# Return the array of items of the column with the given header symbol
@@ -521,10 +532,50 @@
groups << group_rows(k)
end
groups
end
+ # Return the number of groups in the table.
+ def number_of_groups
+ boundaries.size
+ end
+
+ # Return the range of row indexes for boundary number +k+
+ def group_row_range(k)
+ last_k = boundaries.size - 1
+ if k < 0 || k > last_k
+ raise ArgumentError, "boundary number '#{k}' out of range in boundary_row_range"
+ end
+
+ if boundaries.empty?
+ (0..size-1)
+ elsif boundaries.size == 1
+ (0..boundaries.first)
+ else
+ # Keep index at or above zero
+ if k.zero?
+ (0..boundaries[k])
+ else
+ (boundaries[k-1]+1..boundaries[k])
+ end
+ end
+ end
+
+ # Return an Array of Column objects for header +col+ representing a
+ # sub-column for each group in the table under that header.
+ def group_cols(col)
+ normalize_boundaries
+ cols = []
+ (0..boundaries.size - 1).each do |k|
+ range = group_row_range(k)
+ tab_col = column(col)
+ gitems = tab_col.items[range]
+ cols << Column.new(header: col, items: gitems, type: tab_col.type)
+ end
+ cols
+ end
+
# :category: Operators
# Return this table mutated with all groups removed. Useful after something
# like #order_by, which adds groups as a side-effect, when you do not want
# the groups displayed in the output. This modifies the input table, so is a
@@ -543,12 +594,10 @@
else
boundaries.push(size - 1)
end
end
- protected
-
# :stopdoc:
# Make sure size - 1 is last boundary and that they are unique and sorted.
def normalize_boundaries
unless empty?
@@ -556,10 +605,12 @@
self.boundaries = boundaries.uniq.sort
end
boundaries
end
+ protected
+
# Concatenate the array of argument bounds to this table's boundaries, but
# increase each of the indexes in bounds by shift. This is used in the
# #union_all method.
def append_boundaries(bounds, shift: 0)
@boundaries += bounds.map { |k| k + shift }
@@ -602,39 +653,83 @@
# tab.order_by(:date!) => reverse sort on :date
#
# After sorting, the output Table will have group boundaries added after
# each row where the sort key changes.
def order_by(*sort_heads)
- sort_heads = [sort_heads].flatten
- rev_heads = sort_heads.select { |h| h.to_s.ends_with?('!') }
- sort_heads = sort_heads.map { |h| h.to_s.sub(/\!\z/, '').to_sym }
- rev_heads = rev_heads.map { |h| h.to_s.sub(/\!\z/, '').to_sym }
+ # Sort the rows in order and add to new_rows.
+ key_hash = partition_sort_keys(sort_heads)
new_rows = rows.sort do |r1, r2|
- key1 = sort_heads.map { |h| rev_heads.include?(h) ? r2[h] : r1[h] }
- key2 = sort_heads.map { |h| rev_heads.include?(h) ? r1[h] : r2[h] }
- key1 <=> key2
+ # Set the sort keys based on direction
+ key1 = []
+ key2 = []
+ key_hash.each_pair do |h, dir|
+ if dir == :forward
+ key1 << r1[h]
+ key2 << r2[h]
+ else
+ key1 << r2[h]
+ key2 << r1[h]
+ end
+ end
+ # Make any booleans comparable with <=>
+ key1 = key1.map_booleans
+ key2 = key2.map_booleans
+
+ # If there are any nils, <=> will return nil, and we have to use the
+ # special comparison method, compare_with_nils, instead.
+ result = (key1 <=> key2)
+ result.nil? ? compare_with_nils(key1, key2) : result
end
- # Add the new rows to the table, but mark a group boundary at the points
+
+ # Add the new_rows to the table, but mark a group boundary at the points
# where the sort key changes value. NB: I use self.class.new here
# rather than Table.new because if this class is inherited, I want the
# new_tab to be an instance of the subclass. With Table.new, this
# method's result will be an instance of FatTable::Table rather than of
# the subclass.
new_tab = empty_dup
last_key = nil
new_rows.each_with_index do |nrow, k|
new_tab << nrow
- key = nrow.fetch_values(*sort_heads)
+ # key = nrow.fetch_values(*sort_heads)
+ key = nrow.fetch_values(*key_hash.keys)
new_tab.mark_boundary(k - 1) if last_key && key != last_key
last_key = key
end
new_tab.normalize_boundaries
new_tab
end
# :category: Operators
+
+ # Return a new Table sorting the rows of this Table on an any expression
+ # +expr+ that is valid with the +select+ method, except that they
+ # expression may end with an exclamation mark +!+ to indicate a reverse
+ # sort. The new table will have an additional column called +sort_key+
+ # populated with the result of evaluating the given expression and will be
+ # sorted (or reverse sorted) on that column.
#
+ # tab.order_with('date.year') => table sorted by date's year
+ # tab.order_with('date.year!') => table reverse sorted by date's year
+ #
+ # After sorting, the output Table will have group boundaries added after
+ # each row where the sort key changes.
+ def order_with(expr)
+ unless expr.is_a?(String)
+ raise "must call FatTable::Table\#order_with with a single string expression"
+ end
+ rev = false
+ if expr.match?(/\s*!\s*\z/)
+ rev = true
+ expr = expr.sub(/\s*!\s*\z/, '')
+ end
+ sort_sym = rev ? :sort_key! : :sort_key
+ dup.select(*headers, sort_key: expr).order_by(sort_sym)
+ end
+
+ # :category: Operators
+ #
# Return a Table having the selected column expressions. Each expression can
# be either a
#
# 1. in +cols+, a symbol, +:old_col+, representing a column in the current
# table,
@@ -742,11 +837,19 @@
grp = row_index_to_group_index(old_k)
ev.update_ivars(row: old_k + 1, group: grp)
ev.eval_before_hook(locals: old_row)
# Compute the new row.
new_row = {}
- cols.each do |k|
+ # Allow the :omni col to stand for all columns if it is alone and
+ # first.
+ cols_to_include =
+ if cols.size == 1 && cols.first.as_sym == :omni
+ headers
+ else
+ cols
+ end
+ cols_to_include.each do |k|
h = k.as_sym
msg = "Column '#{h}' in select does not exist"
raise UserError, msg unless column?(h)
new_row[h] = old_row[h]
@@ -908,40 +1011,10 @@
# are eliminated in the output Table.
def except_all(other)
set_operation(other, :difference, distinct: false)
end
- private
-
- # Apply the set operation given by ~oper~ between this table and the other
- # table given in the first argument. If distinct is true, eliminate
- # duplicates from the result.
- def set_operation(other, oper = :+, distinct: true, add_boundaries: true, inherit_boundaries: false)
- unless columns.size == other.columns.size
- msg = "can't apply set ops to tables with a different number of columns"
- raise UserError, msg
- end
- unless columns.map(&:type) == other.columns.map(&:type)
- msg = "can't apply a set ops to tables with different column types."
- raise UserError, msg
- end
- other_rows = other.rows.map { |r| r.replace_keys(headers) }
- result = empty_dup
- new_rows = rows.send(oper, other_rows)
- new_rows.each_with_index do |row, k|
- result << row
- result.mark_boundary if k == size - 1 && add_boundaries
- end
- if inherit_boundaries
- result.boundaries = normalize_boundaries
- other.normalize_boundaries
- result.append_boundaries(other.boundaries, shift: size)
- end
- result.normalize_boundaries
- distinct ? result.distinct : result
- end
-
public
# An Array of symbols for the valid join types.
JOIN_TYPES = %i[inner left right full cross].freeze
@@ -1504,8 +1577,78 @@
# @return [String]
def to_text(options = {})
fmt = TextFormatter.new(self, **options)
yield fmt if block_given?
fmt.output
+ end
+
+ private
+
+ # Apply the set operation given by ~oper~ between this table and the other
+ # table given in the first argument. If distinct is true, eliminate
+ # duplicates from the result.
+ def set_operation(other, oper = :+, distinct: true, add_boundaries: true, inherit_boundaries: false)
+ unless columns.size == other.columns.size
+ msg = "can't apply set ops to tables with a different number of columns"
+ raise UserError, msg
+ end
+ unless columns.map(&:type) == other.columns.map(&:type)
+ msg = "can't apply a set ops to tables with different column types."
+ raise UserError, msg
+ end
+ other_rows = other.rows.map { |r| r.replace_keys(headers) }
+ result = empty_dup
+ new_rows = rows.send(oper, other_rows)
+ new_rows.each_with_index do |row, k|
+ result << row
+ result.mark_boundary if k == size - 1 && add_boundaries
+ end
+ if inherit_boundaries
+ result.boundaries = normalize_boundaries
+ other.normalize_boundaries
+ result.append_boundaries(other.boundaries, shift: size)
+ end
+ result.normalize_boundaries
+ distinct ? result.distinct : result
+ end
+
+ # Return a hash with the key being the header to sort on and the value
+ # being either :forward or :reverse to indicate the sort order on that
+ # key.
+ def partition_sort_keys(keys)
+ result = {}
+ [keys].flatten.each do |h|
+ if h.to_s.match?(/\s*!\s*\z/)
+ result[h.to_s.sub(/\s*!\s*\z/, '').to_sym] = :reverse
+ else
+ result[h] = :forward
+ end
+ end
+ result
+ end
+
+ # The <=> operator cannot handle nils without some help. Treat a nil as
+ # smaller than any other value, but equal to other nils. The two keys are assumed to be arrays of values to be
+ # compared with <=>.
+ def compare_with_nils(key1, key2)
+ result = nil
+ key1.zip(key2) do |k1, k2|
+ if k1.nil? && k2.nil?
+ result = 0
+ next
+ elsif k1.nil?
+ result = -1
+ break
+ elsif k2.nil?
+ result = 1
+ break
+ elsif (k1 <=> k2) == 0
+ next
+ else
+ result = (k1 <=> k2)
+ break
+ end
+ end
+ result
end
end
end