lib/fat_table/table.rb in fat_table-0.4.2 vs lib/fat_table/table.rb in fat_table-0.5.1

- old
+ new

@@ -60,13 +60,18 @@ ########################################################################### # :category: Constructors # Return an empty FatTable::Table object. - def initialize + def initialize(*heads) @columns = [] @boundaries = [] + unless heads.empty? + heads.each do |h| + @columns << Column.new(header: h) + end + end end # :category: Constructors # Return an empty duplicate of self. This allows the library to create an @@ -203,10 +208,11 @@ result.mark_boundary next end result << hsh.to_h end + result.normalize_boundaries result end # Construct a new table from an array of arrays. By default, with hlines # false, do not look for separators, i.e. nils, just treat the first row @@ -251,19 +257,21 @@ end row = row.map { |s| s.to_s.strip } hash_row = Hash[headers.zip(row)] result << hash_row end + result.normalize_boundaries result end def from_csv_io(io) result = new ::CSV.new(io, headers: true, header_converters: :symbol, skip_blanks: true).each do |row| result << row.to_h end + result.normalize_boundaries result end # Form rows of table by reading the first table found in the org file. The # header row must be marked with an hline (i.e, a row that looks like @@ -332,12 +340,15 @@ # :category: Attributes # Set the column type for Column with the given +key+ as a String type, # but only if empty. Otherwise, we would have to worry about converting # existing items in the column to String. Perhaps that's a TODO. - def set_column_to_string_type(key) - column(key).force_to_string_type + def force_string!(*keys) + keys.each do |h| + column(h).force_string! + end + self end # :category: Attributes # Return the array of items of the column with the given header symbol @@ -521,10 +532,50 @@ groups << group_rows(k) end groups end + # Return the number of groups in the table. + def number_of_groups + boundaries.size + end + + # Return the range of row indexes for boundary number +k+ + def group_row_range(k) + last_k = boundaries.size - 1 + if k < 0 || k > last_k + raise ArgumentError, "boundary number '#{k}' out of range in boundary_row_range" + end + + if boundaries.empty? + (0..size-1) + elsif boundaries.size == 1 + (0..boundaries.first) + else + # Keep index at or above zero + if k.zero? + (0..boundaries[k]) + else + (boundaries[k-1]+1..boundaries[k]) + end + end + end + + # Return an Array of Column objects for header +col+ representing a + # sub-column for each group in the table under that header. + def group_cols(col) + normalize_boundaries + cols = [] + (0..boundaries.size - 1).each do |k| + range = group_row_range(k) + tab_col = column(col) + gitems = tab_col.items[range] + cols << Column.new(header: col, items: gitems, type: tab_col.type) + end + cols + end + # :category: Operators # Return this table mutated with all groups removed. Useful after something # like #order_by, which adds groups as a side-effect, when you do not want # the groups displayed in the output. This modifies the input table, so is a @@ -543,12 +594,10 @@ else boundaries.push(size - 1) end end - protected - # :stopdoc: # Make sure size - 1 is last boundary and that they are unique and sorted. def normalize_boundaries unless empty? @@ -556,10 +605,12 @@ self.boundaries = boundaries.uniq.sort end boundaries end + protected + # Concatenate the array of argument bounds to this table's boundaries, but # increase each of the indexes in bounds by shift. This is used in the # #union_all method. def append_boundaries(bounds, shift: 0) @boundaries += bounds.map { |k| k + shift } @@ -602,39 +653,83 @@ # tab.order_by(:date!) => reverse sort on :date # # After sorting, the output Table will have group boundaries added after # each row where the sort key changes. def order_by(*sort_heads) - sort_heads = [sort_heads].flatten - rev_heads = sort_heads.select { |h| h.to_s.ends_with?('!') } - sort_heads = sort_heads.map { |h| h.to_s.sub(/\!\z/, '').to_sym } - rev_heads = rev_heads.map { |h| h.to_s.sub(/\!\z/, '').to_sym } + # Sort the rows in order and add to new_rows. + key_hash = partition_sort_keys(sort_heads) new_rows = rows.sort do |r1, r2| - key1 = sort_heads.map { |h| rev_heads.include?(h) ? r2[h] : r1[h] } - key2 = sort_heads.map { |h| rev_heads.include?(h) ? r1[h] : r2[h] } - key1 <=> key2 + # Set the sort keys based on direction + key1 = [] + key2 = [] + key_hash.each_pair do |h, dir| + if dir == :forward + key1 << r1[h] + key2 << r2[h] + else + key1 << r2[h] + key2 << r1[h] + end + end + # Make any booleans comparable with <=> + key1 = key1.map_booleans + key2 = key2.map_booleans + + # If there are any nils, <=> will return nil, and we have to use the + # special comparison method, compare_with_nils, instead. + result = (key1 <=> key2) + result.nil? ? compare_with_nils(key1, key2) : result end - # Add the new rows to the table, but mark a group boundary at the points + + # Add the new_rows to the table, but mark a group boundary at the points # where the sort key changes value. NB: I use self.class.new here # rather than Table.new because if this class is inherited, I want the # new_tab to be an instance of the subclass. With Table.new, this # method's result will be an instance of FatTable::Table rather than of # the subclass. new_tab = empty_dup last_key = nil new_rows.each_with_index do |nrow, k| new_tab << nrow - key = nrow.fetch_values(*sort_heads) + # key = nrow.fetch_values(*sort_heads) + key = nrow.fetch_values(*key_hash.keys) new_tab.mark_boundary(k - 1) if last_key && key != last_key last_key = key end new_tab.normalize_boundaries new_tab end # :category: Operators + + # Return a new Table sorting the rows of this Table on an any expression + # +expr+ that is valid with the +select+ method, except that they + # expression may end with an exclamation mark +!+ to indicate a reverse + # sort. The new table will have an additional column called +sort_key+ + # populated with the result of evaluating the given expression and will be + # sorted (or reverse sorted) on that column. # + # tab.order_with('date.year') => table sorted by date's year + # tab.order_with('date.year!') => table reverse sorted by date's year + # + # After sorting, the output Table will have group boundaries added after + # each row where the sort key changes. + def order_with(expr) + unless expr.is_a?(String) + raise "must call FatTable::Table\#order_with with a single string expression" + end + rev = false + if expr.match?(/\s*!\s*\z/) + rev = true + expr = expr.sub(/\s*!\s*\z/, '') + end + sort_sym = rev ? :sort_key! : :sort_key + dup.select(*headers, sort_key: expr).order_by(sort_sym) + end + + # :category: Operators + # # Return a Table having the selected column expressions. Each expression can # be either a # # 1. in +cols+, a symbol, +:old_col+, representing a column in the current # table, @@ -742,11 +837,19 @@ grp = row_index_to_group_index(old_k) ev.update_ivars(row: old_k + 1, group: grp) ev.eval_before_hook(locals: old_row) # Compute the new row. new_row = {} - cols.each do |k| + # Allow the :omni col to stand for all columns if it is alone and + # first. + cols_to_include = + if cols.size == 1 && cols.first.as_sym == :omni + headers + else + cols + end + cols_to_include.each do |k| h = k.as_sym msg = "Column '#{h}' in select does not exist" raise UserError, msg unless column?(h) new_row[h] = old_row[h] @@ -908,40 +1011,10 @@ # are eliminated in the output Table. def except_all(other) set_operation(other, :difference, distinct: false) end - private - - # Apply the set operation given by ~oper~ between this table and the other - # table given in the first argument. If distinct is true, eliminate - # duplicates from the result. - def set_operation(other, oper = :+, distinct: true, add_boundaries: true, inherit_boundaries: false) - unless columns.size == other.columns.size - msg = "can't apply set ops to tables with a different number of columns" - raise UserError, msg - end - unless columns.map(&:type) == other.columns.map(&:type) - msg = "can't apply a set ops to tables with different column types." - raise UserError, msg - end - other_rows = other.rows.map { |r| r.replace_keys(headers) } - result = empty_dup - new_rows = rows.send(oper, other_rows) - new_rows.each_with_index do |row, k| - result << row - result.mark_boundary if k == size - 1 && add_boundaries - end - if inherit_boundaries - result.boundaries = normalize_boundaries - other.normalize_boundaries - result.append_boundaries(other.boundaries, shift: size) - end - result.normalize_boundaries - distinct ? result.distinct : result - end - public # An Array of symbols for the valid join types. JOIN_TYPES = %i[inner left right full cross].freeze @@ -1504,8 +1577,78 @@ # @return [String] def to_text(options = {}) fmt = TextFormatter.new(self, **options) yield fmt if block_given? fmt.output + end + + private + + # Apply the set operation given by ~oper~ between this table and the other + # table given in the first argument. If distinct is true, eliminate + # duplicates from the result. + def set_operation(other, oper = :+, distinct: true, add_boundaries: true, inherit_boundaries: false) + unless columns.size == other.columns.size + msg = "can't apply set ops to tables with a different number of columns" + raise UserError, msg + end + unless columns.map(&:type) == other.columns.map(&:type) + msg = "can't apply a set ops to tables with different column types." + raise UserError, msg + end + other_rows = other.rows.map { |r| r.replace_keys(headers) } + result = empty_dup + new_rows = rows.send(oper, other_rows) + new_rows.each_with_index do |row, k| + result << row + result.mark_boundary if k == size - 1 && add_boundaries + end + if inherit_boundaries + result.boundaries = normalize_boundaries + other.normalize_boundaries + result.append_boundaries(other.boundaries, shift: size) + end + result.normalize_boundaries + distinct ? result.distinct : result + end + + # Return a hash with the key being the header to sort on and the value + # being either :forward or :reverse to indicate the sort order on that + # key. + def partition_sort_keys(keys) + result = {} + [keys].flatten.each do |h| + if h.to_s.match?(/\s*!\s*\z/) + result[h.to_s.sub(/\s*!\s*\z/, '').to_sym] = :reverse + else + result[h] = :forward + end + end + result + end + + # The <=> operator cannot handle nils without some help. Treat a nil as + # smaller than any other value, but equal to other nils. The two keys are assumed to be arrays of values to be + # compared with <=>. + def compare_with_nils(key1, key2) + result = nil + key1.zip(key2) do |k1, k2| + if k1.nil? && k2.nil? + result = 0 + next + elsif k1.nil? + result = -1 + break + elsif k2.nil? + result = 1 + break + elsif (k1 <=> k2) == 0 + next + else + result = (k1 <=> k2) + break + end + end + result end end end