lib/fat_table/table.rb in fat_table-0.5.2 vs lib/fat_table/table.rb in fat_table-0.5.3

- old
+ new

@@ -60,23 +60,57 @@ # table grows, the implict boundary changes index, we synthesize the # boundaries by dynamically adding the final boundary with the #boundaries # method call. attr_accessor :explicit_boundaries + # An Array of FatTable::Columns that should be tolerant. + attr_reader :tolerant_columns + ########################################################################### # Constructors ########################################################################### # :category: Constructors - # Return an empty FatTable::Table object. - def initialize(*heads) + # Return an empty FatTable::Table object. Specifying headers is optional. + # Any headers ending with a ! are marked as tolerant, in that, if an + # incompatible type is added to it, the column is re-typed as a String + # column, and construction proceeds. The ! is stripped from the header to + # form the column key, though. You can also provide the names of columns + # that should be tolerant by using the +tolerant_columns key-word to + # provide an array of headers that should be tolerant. The special string + # '*' or the symbol :* indicates that all columns should be created + # tolerant. + def initialize(*heads, tolerant_columns: []) @columns = [] @explicit_boundaries = [] + @tolerant_columns = + case tolerant_columns + when Array + tolerant_columns.map { |h| h.to_s.as_sym } + when String + if tolerant_columns.strip == '*' + ['*'.to_sym] + else + [tolerant_columns.as_sym] + end + when Symbol + if tolerant_columns.to_s.strip == '*' + ['*'.to_sym] + else + [tolerant_columns.to_s.as_sym] + end + else + raise ArgumentError, "set tolerant_columns to String, Symbol, or an Array of either" + end unless heads.empty? heads.each do |h| - @columns << Column.new(header: h) + if h.to_s.end_with?('!') || @tolerant_columns.include?(h) + @columns << Column.new(header: h.to_s.sub(/!\s*\z/, ''), tolerant: true) + else + @columns << Column.new(header: h) + end end end end # :category: Constructors @@ -97,42 +131,42 @@ # :category: Constructors # Construct a Table from the contents of a CSV file named +fname+. Headers # will be taken from the first CSV row and converted to symbols. - def self.from_csv_file(fname) + def self.from_csv_file(fname, tolerant_columns: []) File.open(fname, 'r') do |io| - from_csv_io(io) + from_csv_io(io, tolerant_columns: tolerant_columns) end end # :category: Constructors # Construct a Table from a CSV string +str+, treated in the same manner as # the input from a CSV file in ::from_org_file. - def self.from_csv_string(str) - from_csv_io(StringIO.new(str)) + def self.from_csv_string(str, tolerant_columns: []) + from_csv_io(StringIO.new(str), tolerant_columns: tolerant_columns) end # :category: Constructors # Construct a Table from the first table found in the given Emacs org-mode # file named +fname+. Headers are taken from the first row if the second row # is an hrule. Otherwise, synthetic headers of the form +:col_1+, +:col_2+, # etc. are created. - def self.from_org_file(fname) + def self.from_org_file(fname, tolerant_columns: []) File.open(fname, 'r') do |io| - from_org_io(io) + from_org_io(io, tolerant_columns: tolerant_columns) end end # :category: Constructors # Construct a Table from a string +str+, treated in the same manner as the # contents of an org-mode file in ::from_org_file. - def self.from_org_string(str) - from_org_io(StringIO.new(str)) + def self.from_org_string(str, tolerant_columns: []) + from_org_io(StringIO.new(str), tolerant_columns: tolerant_columns) end # :category: Constructors # Construct a new table from an Array of Arrays +aoa+. By default, with @@ -147,24 +181,24 @@ # users: In org mode code blocks when an org-mode table is passed in as a # variable it is passed in as an Array of Arrays. By default (+ HEADER: # :hlines no +) org-mode strips all hrules from the table; otherwise (+ # HEADER: :hlines yes +) they are indicated with nil elements in the outer # array. - def self.from_aoa(aoa, hlines: false) - from_array_of_arrays(aoa, hlines: hlines) + def self.from_aoa(aoa, hlines: false, tolerant_columns: []) + from_array_of_arrays(aoa, hlines: hlines, tolerant_columns: tolerant_columns) end # :category: Constructors # Construct a Table from +aoh+, an Array of Hashes or an Array of any # objects that respond to the #to_h method. All hashes must have the same # keys, which, when converted to symbols will become the headers for the # Table. If hlines is set true, mark a group boundary whenever a nil, rather # than a hash appears in the outer array. - def self.from_aoh(aoh, hlines: false) + def self.from_aoh(aoh, hlines: false, tolerant_columns: []) if aoh.first.respond_to?(:to_h) - from_array_of_hashes(aoh, hlines: hlines) + from_array_of_hashes(aoh, hlines: hlines, tolerant_columns: tolerant_columns) else raise UserError, "Cannot initialize Table with an array of #{input[0].class}" end end @@ -179,11 +213,11 @@ # :category: Constructors # Construct a Table by running a SQL +query+ against the database set up # with FatTable.connect, with the rows of the query result as rows. - def self.from_sql(query) + def self.from_sql(query, tolerant_columns: []) msg = 'FatTable.db must be set with FatTable.connect' raise UserError, msg if FatTable.db.nil? result = Table.new rows = FatTable.db[query] @@ -201,12 +235,12 @@ private # Construct table from an array of hashes or an array of any object that # can respond to #to_h. If an array element is a nil, mark it as a group # boundary in the Table. - def from_array_of_hashes(hashes, hlines: false) - result = new + def from_array_of_hashes(hashes, hlines: false, tolerant_columns: []) + result = new(tolerant_columns: tolerant_columns) hashes.each do |hsh| if hsh.nil? unless hlines msg = 'found an hline in input: try setting hlines true' raise UserError, msg @@ -230,12 +264,12 @@ # element of the outer array is a nil, mark the preceding row as a group # boundary. Note: In org mode code blocks, by default (:hlines no) all # hlines are stripped from the table, otherwise (:hlines yes) they are # indicated with nil elements in the outer array as expected by this # method when hlines is set true. - def from_array_of_arrays(rows, hlines: false) - result = new + def from_array_of_arrays(rows, hlines: false, tolerant_columns: []) + result = new(tolerant_columns: tolerant_columns) headers = [] if !hlines # Take the first row as headers # Second row et seq as data headers = rows[0].map(&:to_s).map(&:as_sym) @@ -267,12 +301,12 @@ end result.normalize_boundaries result end - def from_csv_io(io) - result = new + def from_csv_io(io, tolerant_columns: []) + result = new(tolerant_columns: tolerant_columns) ::CSV.new(io, headers: true, header_converters: :symbol, skip_blanks: true).each do |row| result << row.to_h end result.normalize_boundaries @@ -281,11 +315,11 @@ # Form rows of table by reading the first table found in the org file. The # header row must be marked with an hline (i.e, a row that looks like # '|---+--...--|') and groups of rows may be marked with hlines to # indicate group boundaries. - def from_org_io(io) + def from_org_io(io, tolerant_columns: []) table_re = /\A\s*\|/ hrule_re = /\A\s*\|[-+]+/ rows = [] table_found = false header_found = false @@ -316,11 +350,11 @@ else line = line.sub(/\A\s*\|/, '').sub(/\|\s*\z/, '') rows << line.split('|').map(&:clean) end end - from_array_of_arrays(rows, hlines: true) + from_array_of_arrays(rows, hlines: true, tolerant_columns: tolerant_columns) end end ########################################################################### # Attributes @@ -410,10 +444,19 @@ columns.map(&:header) end # :category: Attributes + # Return whether the column with the given head should be made tolerant. + def tolerant_col?(h) + return true if tolerant_columns.include?(:'*') + + tolerant_columns.include?(h) + end + + # :category: Attributes + # Return the number of rows in the Table. def size return 0 if columns.empty? columns.first.size @@ -569,11 +612,12 @@ cols = [] (0..boundaries.size - 1).each do |k| range = group_row_range(k) tab_col = column(col) gitems = tab_col.items[range] - cols << Column.new(header: col, items: gitems, type: tab_col.type) + cols << Column.new(header: col, items: gitems, + type: tab_col.type, tolerant: tab_col.tolerant?) end cols end # :category: Operators @@ -939,11 +983,16 @@ # tab.where('@row.even? && shares > 500') => even rows with lots of shares def where(expr) expr = expr.to_s result = empty_dup headers.each do |h| - col = Column.new(header: h) + col = + if tolerant_col?(h) + Column.new(header: h, tolerant: true) + else + Column.new(header: h) + end result.add_column(col) end ev = Evaluator.new(ivars: { row: 0, group: 0 }) rows.each_with_index do |row, k| grp = row_index_to_group_index(k) @@ -1404,10 +1453,13 @@ result end private + # Collapse a group of rows to a single row by applying the aggregator from + # the +agg_cols+ to the items in that column and the presumably identical + # value in the +grp_cols to those columns. def row_from_group(rows, grp_cols, agg_cols) new_row = {} grp_cols.each do |h| new_row[h] = rows.first[h] end @@ -1438,10 +1490,10 @@ new_heads = row.keys - headers new_heads.each do |h| # This column is new, so it needs nil items for all prior rows lest # the value be added to a prior row. items = Array.new(size, nil) - columns << Column.new(header: h, items: items) + columns << Column.new(header: h, items: items, tolerant: tolerant_col?(h)) end headers.each do |h| # NB: This adds a nil if h is not in row. column(h) << row[h] end