lib/fat_table/table.rb in fat_table-0.5.1 vs lib/fat_table/table.rb in fat_table-0.5.2

- old
+ new

@@ -51,22 +51,29 @@ # spaces converted to underscore and everything down-cased. So, the heading, # 'Two Words' becomes the header +:two_words+. class Table # An Array of FatTable::Columns that constitute the table. attr_reader :columns - attr_accessor :boundaries + # Record boundaries set explicitly with mark_boundaries or from reading + # hlines from input. When we want to access boundaries, however, we want + # to add an implict boundary at the last row of the table. Since, as the + # table grows, the implict boundary changes index, we synthesize the + # boundaries by dynamically adding the final boundary with the #boundaries + # method call. + attr_accessor :explicit_boundaries + ########################################################################### # Constructors ########################################################################### # :category: Constructors # Return an empty FatTable::Table object. def initialize(*heads) @columns = [] - @boundaries = [] + @explicit_boundaries = [] unless heads.empty? heads.each do |h| @columns << Column.new(header: h) end end @@ -82,15 +89,14 @@ self.dup.__empty! end def __empty! @columns = [] - @boundaries = [] + @explicit_boundaries = [] self end - # :category: Constructors # Construct a Table from the contents of a CSV file named +fname+. Headers # will be taken from the first CSV row and converted to symbols. def self.from_csv_file(fname) @@ -452,12 +458,10 @@ # Return the rows from first to last. We could just index #rows, but in a # large table, that would require that we construct all the rows for a range # of any size. def rows_range(first = 0, last = nil) # :nodoc: - last ||= size - 1 - last = [last, 0].max raise UserError, 'first must be <= last' unless first <= last rows = [] unless columns.empty? first.upto(last) do |rnum| @@ -499,10 +503,12 @@ # Boundaries can be added when a table is read in, for example, from the # text of an org table in which each hline (other than the one separating # the headers from the body) marks a boundary for the row immediately # preceding the hline. # + # Boundaries can also be added manually with the +mark_boundary+ method. + # # The #order_by method resets the boundaries then adds boundaries at the # last row of each group of rows on which the sort keys were equal as a # boundary. # # The #union_all (but not #union since it deletes duplicates) method adds a @@ -534,31 +540,27 @@ groups end # Return the number of groups in the table. def number_of_groups - boundaries.size + empty? ? 0 : boundaries.size end # Return the range of row indexes for boundary number +k+ def group_row_range(k) last_k = boundaries.size - 1 if k < 0 || k > last_k raise ArgumentError, "boundary number '#{k}' out of range in boundary_row_range" end - if boundaries.empty? - (0..size-1) - elsif boundaries.size == 1 + if boundaries.size == 1 (0..boundaries.first) - else + elsif k.zero? # Keep index at or above zero - if k.zero? - (0..boundaries[k]) - else - (boundaries[k-1]+1..boundaries[k]) - end + (0..boundaries[k]) + else + ((boundaries[k - 1] + 1)..boundaries[k]) end end # Return an Array of Column objects for header +col+ representing a # sub-column for each group in the table under that header. @@ -579,60 +581,103 @@ # Return this table mutated with all groups removed. Useful after something # like #order_by, which adds groups as a side-effect, when you do not want # the groups displayed in the output. This modifies the input table, so is a # departure from the otherwise immutability of Tables. def degroup! - @boundaries = [] + self.explicit_boundaries = [] self end # Mark a group boundary at row +row+, and if +row+ is +nil+, mark the last - # row in the table as a group boundary. This is mainly used for internal - # purposes. - def mark_boundary(row = nil) # :nodoc: - if row - boundaries.push(row) - else - boundaries.push(size - 1) + # row in the table as a group boundary. An attempt to add a boundary to + # an empty table has no effect. We adopt the convention that the last row + # of the table always marks an implicit boundary even if it is not in the + # @explicit_boundaries array. When we "mark" a boundary, we intend it to + # be an explicit boundary, even if it marks the last row of the table. + def mark_boundary(row_num = nil) + return self if empty? + + if row_num + unless row_num < size + raise ArgumentError, "can't mark boundary at row #{row_num}, last row is #{size - 1}" + end + unless row_num >= 0 + raise ArgumentError, "can't mark boundary at non-positive row #{row_num}" + end + explicit_boundaries.push(row_num) + elsif size > 0 + explicit_boundaries.push(size - 1) end + normalize_boundaries + self end # :stopdoc: # Make sure size - 1 is last boundary and that they are unique and sorted. def normalize_boundaries unless empty? - boundaries.push(size - 1) unless boundaries.include?(size - 1) - self.boundaries = boundaries.uniq.sort + self.explicit_boundaries = explicit_boundaries.uniq.sort end - boundaries + explicit_boundaries end + # Return the explicit_boundaries, augmented by an implicit boundary for + # the end of the table, unless it's already an implicit boundary. + def boundaries + return [] if empty? + + if explicit_boundaries.last == size - 1 + explicit_boundaries + else + explicit_boundaries + [size - 1] + end + end + protected # Concatenate the array of argument bounds to this table's boundaries, but # increase each of the indexes in bounds by shift. This is used in the # #union_all method. def append_boundaries(bounds, shift: 0) - @boundaries += bounds.map { |k| k + shift } + @explicit_boundaries += bounds.map { |k| k + shift } end - # Return the group number to which row ~row~ belongs. Groups, from the - # user's point of view are indexed starting at 1. - def row_index_to_group_index(row) + # Return the group number to which row ~row_num~ belongs. Groups, from the + # user's point of view are indexed starting at 0. + def row_index_to_group_index(row_num) boundaries.each_with_index do |b_last, g_num| - return (g_num + 1) if row <= b_last + return (g_num + 1) if row_num <= b_last end - 1 + 0 end - def group_rows(row) # :nodoc: + # Return the index of the first row in group number +grp_num+ + def first_row_num_in_group(grp_num) + if grp_num >= boundaries.size || grp_num < 0 + raise ArgumentError, "group number #{grp_num} out of bounds" + end + + grp_num.zero? ? 0 : boundaries[grp_num - 1] + 1 + end + + # Return the index of the last row in group number +grp_num+ + def last_row_num_in_group(grp_num) + if grp_num > boundaries.size || grp_num < 0 + raise ArgumentError, "group number #{grp_num} out of bounds" + else + boundaries[grp_num] + end + end + + # Return the rows for group number +grp_num+. + def group_rows(grp_num) # :nodoc: normalize_boundaries - return [] unless row < boundaries.size + return [] unless grp_num < boundaries.size - first = row.zero? ? 0 : boundaries[row - 1] + 1 - last = boundaries[row] + first = first_row_num_in_group(grp_num) + last = last_row_num_in_group(grp_num) rows_range(first, last) end # :startdoc: @@ -874,11 +919,11 @@ # the row after the new row is evaluated. # vars = new_row.merge(__group: grp) ev.eval_after_hook(locals: new_row) result << new_row end - result.boundaries = boundaries + result.explicit_boundaries = explicit_boundaries result.normalize_boundaries result end # :category: Operators @@ -1011,12 +1056,10 @@ # are eliminated in the output Table. def except_all(other) set_operation(other, :difference, distinct: false) end - public - # An Array of symbols for the valid join types. JOIN_TYPES = %i[inner left right full cross].freeze # :category: Operators # @@ -1121,18 +1164,18 @@ out_row = build_out_row(row_a: self_row, row_b: other_row, common_heads: other_common_heads, type: join_type) result << out_row end - next unless %i[left full].include?(join_type) + next unless [:left, :full].include?(join_type) next if self_row_matched result << build_out_row(row_a: self_row, row_b: other_row_nils, type: join_type) end - if %i[right full].include?(join_type) + if [:right, :full].include?(join_type) other_rows.each_with_index do |other_row, k| next if other_row_matches[k] result << build_out_row(row_a: self_row_nils, row_b: other_row, @@ -1257,11 +1300,11 @@ partial_result << "#{a_head}_a)" and_conds << partial_result partial_result = nil else # First of a pair of _a or _b - partial_result = String.new("(#{a_head}_a == ") + partial_result = +"(#{a_head}_a == " end last_sym = a_head when /\A(?<sy>.*)_b\z/ b_head = Regexp.last_match[:sy].to_sym unless b_heads.include?(b_head) @@ -1276,11 +1319,11 @@ partial_result << "#{b_head}_b)" and_conds << partial_result partial_result = nil else # First of a pair of _a or _b - partial_result = String.new("(#{b_head}_b == ") + partial_result = +"(#{b_head}_b == " end b_common_heads << b_head last_sym = b_head else # No modifier, so must be one of the common columns @@ -1383,19 +1426,10 @@ public # :category: Constructors - # Add a group boundary mark at the given row, or at the end of the table - # by default. - def add_boundary(at_row = nil) - row = at_row || (size - 1) - @boundaries << row - end - - # :category: Constructors - # Add a +row+ represented by a Hash having the headers as keys. If +mark:+ # is set true, mark this row as a boundary. All tables should be built # ultimately using this method as a primitive. def add_row(row, mark: false) row.transform_keys!(&:as_sym) @@ -1601,11 +1635,10 @@ new_rows.each_with_index do |row, k| result << row result.mark_boundary if k == size - 1 && add_boundaries end if inherit_boundaries - result.boundaries = normalize_boundaries - other.normalize_boundaries + result.explicit_boundaries = boundaries result.append_boundaries(other.boundaries, shift: size) end result.normalize_boundaries distinct ? result.distinct : result end