lib/fat_table/table.rb in fat_table-0.7.0 vs lib/fat_table/table.rb in fat_table-0.8.0

- old
+ new

@@ -52,67 +52,74 @@ # 'Two Words' becomes the header +:two_words+. class Table # An Array of FatTable::Columns that constitute the table. attr_reader :columns + # Headers of columns that are to be tolerant when they are built. + attr_accessor :tolerant_cols + attr_reader :omni_typ, :omni_tol + # Record boundaries set explicitly with mark_boundaries or from reading # hlines from input. When we want to access boundaries, however, we want # to add an implict boundary at the last row of the table. Since, as the # table grows, the implict boundary changes index, we synthesize the # boundaries by dynamically adding the final boundary with the #boundaries # method call. attr_accessor :explicit_boundaries - # An Array of FatTable::Columns that should be tolerant. - attr_reader :tolerant_columns - ########################################################################### # Constructors - ########################################################################### - + # + # # :category: Constructors - + # # Return an empty FatTable::Table object. Specifying headers is optional. - # Any headers ending with a ! are marked as tolerant, in that, if an - # incompatible type is added to it, the column is re-typed as a String - # column, and construction proceeds. The ! is stripped from the header to - # form the column key, though. You can also provide the names of columns - # that should be tolerant by using the +tolerant_columns key-word to - # provide an array of headers that should be tolerant. The special string - # '*' or the symbol :* indicates that all columns should be created - # tolerant. - def initialize(*heads, tolerant_columns: []) + # By default, all columns start our as having an "open" type and get + # assigned a type based on their contents. For example, if a column + # contains items that can be interpreted as dates, the column gets + # assigned a DateTime type. Other types are Numeric, Boolean, and String. + # Once a type is assigned to a column, any non-conforming vaules in that + # column raise an IncompatibleType error. If a column is marked + # "tolerant", however, the incompatible item is converted to a string and + # allowed to remain in the column without raising an error. They count as + # nils when calculations are performed on the column and paricipate only + # in string formatting directives on output. + # + # Rather than have a column's type determined by content, you can also + # specify a column type by providing a type hash, where the key is the + # header's name and the value is the desired type. In that case, any + # incompatible type raises an an IncompatibleTypeError unless the column + # is also marked tolerant, in which case it gets converted to a string as + # discussed above. If the type name in the types hash ends in a '~', it + # is treated as a specifying the given type but marking it as tolerant as + # well. The values in the type hash can be any string or sybol that + # starts with 'num', 'dat', 'bool', or 'str' to specify Numeric, + # DateTime, Boolean, or String types respectively. + def initialize(*heads, **types) @columns = [] - @explicit_boundaries = [] - @tolerant_columns = - case tolerant_columns - when Array - tolerant_columns.map { |h| h.to_s.as_sym } - when String - if tolerant_columns.strip == '*' - ['*'.to_sym] - else - [tolerant_columns.as_sym] - end - when Symbol - if tolerant_columns.to_s.strip == '*' - ['*'.to_sym] - else - [tolerant_columns.to_s.as_sym] - end - else - raise ArgumentError, "set tolerant_columns to String, Symbol, or an Array of either" - end - unless heads.empty? - heads.each do |h| - if h.to_s.end_with?('!') || @tolerant_columns.include?(h) - @columns << Column.new(header: h.to_s.sub(/!\s*\z/, ''), type: 'String') - else - @columns << Column.new(header: h) - end - end + @tolerant_cols = [] + @headers = [] + # Check for the special 'omni' key + @omni_type = 'NilClass' + @omni_tol = false + if types.keys.map(&:to_s).include?('omni') + # All columns not otherwise included in types should have the type and + # tolerance of omni. + omni_val = (types['omni'] || types[:omni]) + @omni_type, @omni_tol = Table.typ_tol(omni_val) + # Remove omni from types. + types.delete(:omni) + types.delete('omni') end + heads += types.keys + heads.uniq.each do |h| + typ, tol = Table.typ_tol(types[h]) + @tolerant_cols << h.to_s.as_sym if tol + @columns << Column.new(header: h.to_s.sub(/~\s*\z/, ''), type: typ, + tolerant: tol) + end + @explicit_boundaries = [] end # :category: Constructors # Return an empty duplicate of self. This allows the library to create an @@ -131,42 +138,42 @@ # :category: Constructors # Construct a Table from the contents of a CSV file named +fname+. Headers # will be taken from the first CSV row and converted to symbols. - def self.from_csv_file(fname, tolerant_columns: []) + def self.from_csv_file(fname, **types) File.open(fname, 'r') do |io| - from_csv_io(io, tolerant_columns: tolerant_columns) + from_csv_io(io, **types) end end # :category: Constructors # Construct a Table from a CSV string +str+, treated in the same manner as # the input from a CSV file in ::from_org_file. - def self.from_csv_string(str, tolerant_columns: []) - from_csv_io(StringIO.new(str), tolerant_columns: tolerant_columns) + def self.from_csv_string(str, **types) + from_csv_io(StringIO.new(str), **types) end # :category: Constructors # Construct a Table from the first table found in the given Emacs org-mode # file named +fname+. Headers are taken from the first row if the second row # is an hrule. Otherwise, synthetic headers of the form +:col_1+, +:col_2+, # etc. are created. - def self.from_org_file(fname, tolerant_columns: []) + def self.from_org_file(fname, **types) File.open(fname, 'r') do |io| - from_org_io(io, tolerant_columns: tolerant_columns) + from_org_io(io, **types) end end # :category: Constructors # Construct a Table from a string +str+, treated in the same manner as the # contents of an org-mode file in ::from_org_file. - def self.from_org_string(str, tolerant_columns: []) - from_org_io(StringIO.new(str), tolerant_columns: tolerant_columns) + def self.from_org_string(str, **types) + from_org_io(StringIO.new(str), **types) end # :category: Constructors # Construct a new table from an Array of Arrays +aoa+. By default, with @@ -181,24 +188,24 @@ # users: In org mode code blocks when an org-mode table is passed in as a # variable it is passed in as an Array of Arrays. By default (+ HEADER: # :hlines no +) org-mode strips all hrules from the table; otherwise (+ # HEADER: :hlines yes +) they are indicated with nil elements in the outer # array. - def self.from_aoa(aoa, hlines: false, tolerant_columns: []) - from_array_of_arrays(aoa, hlines: hlines, tolerant_columns: tolerant_columns) + def self.from_aoa(aoa, hlines: false, **types) + from_array_of_arrays(aoa, hlines: hlines, **types) end # :category: Constructors # Construct a Table from +aoh+, an Array of Hashes or an Array of any # objects that respond to the #to_h method. All hashes must have the same # keys, which, when converted to symbols will become the headers for the # Table. If hlines is set true, mark a group boundary whenever a nil, rather # than a hash appears in the outer array. - def self.from_aoh(aoh, hlines: false, tolerant_columns: []) + def self.from_aoh(aoh, hlines: false, **types) if aoh.first.respond_to?(:to_h) - from_array_of_hashes(aoh, hlines: hlines, tolerant_columns: tolerant_columns) + from_array_of_hashes(aoh, hlines: hlines, **types) else raise UserError, "Cannot initialize Table with an array of #{input[0].class}" end end @@ -213,11 +220,11 @@ # :category: Constructors # Construct a Table by running a SQL +query+ against the database set up # with FatTable.connect, with the rows of the query result as rows. - def self.from_sql(query, tolerant_columns: []) + def self.from_sql(query, **types) msg = 'FatTable.db must be set with FatTable.connect' raise UserError, msg if FatTable.db.nil? result = Table.new rows = FatTable.db[query] @@ -230,17 +237,36 @@ ############################################################################ # Class-level constructor helpers ############################################################################ class << self + # Return [typ, tol] based on the type string, str. + def typ_tol(str) + tol = str ? str.match?(/~\s*\Z/) : false + typ = + case str + when /\A\s*num/i + 'Numeric' + when /\A\s*boo/i + 'Boolean' + when /\A\s*dat/i + 'DateTime' + when /\A\s*str/i + 'String' + else + 'NilClass' + end + [typ, tol] + end + private # Construct table from an array of hashes or an array of any object that # can respond to #to_h. If an array element is a nil, mark it as a group # boundary in the Table. - def from_array_of_hashes(hashes, hlines: false, tolerant_columns: []) - result = new(tolerant_columns: tolerant_columns) + def from_array_of_hashes(hashes, hlines: false, **types) + result = new(**types) hashes.each do |hsh| if hsh.nil? unless hlines msg = 'found an hline in input: try setting hlines true' raise UserError, msg @@ -264,12 +290,12 @@ # element of the outer array is a nil, mark the preceding row as a group # boundary. Note: In org mode code blocks, by default (:hlines no) all # hlines are stripped from the table, otherwise (:hlines yes) they are # indicated with nil elements in the outer array as expected by this # method when hlines is set true. - def from_array_of_arrays(rows, hlines: false, tolerant_columns: []) - result = new(tolerant_columns: tolerant_columns) + def from_array_of_arrays(rows, hlines: false, **types) + result = new(**types) headers = [] if !hlines # Take the first row as headers # Second row et seq as data headers = rows[0].map(&:to_s).map(&:as_sym) @@ -301,12 +327,12 @@ end result.normalize_boundaries result end - def from_csv_io(io, tolerant_columns: []) - result = new(tolerant_columns: tolerant_columns) + def from_csv_io(io, **types) + result = new(**types) ::CSV.new(io, headers: true, header_converters: :symbol, skip_blanks: true).each do |row| result << row.to_h end result.normalize_boundaries @@ -315,11 +341,11 @@ # Form rows of table by reading the first table found in the org file. The # header row must be marked with an hline (i.e, a row that looks like # '|---+--...--|') and groups of rows may be marked with hlines to # indicate group boundaries. - def from_org_io(io, tolerant_columns: []) + def from_org_io(io, **types) table_re = /\A\s*\|/ hrule_re = /\A\s*\|[-+]+/ rows = [] table_found = false header_found = false @@ -350,11 +376,11 @@ else line = line.sub(/\A\s*\|/, '').sub(/\|\s*\z/, '') rows << line.split('|').map(&:clean) end end - from_array_of_arrays(rows, hlines: true, tolerant_columns: tolerant_columns) + from_array_of_arrays(rows, hlines: true, **types) end end ########################################################################### # Attributes @@ -375,10 +401,20 @@ # header as a String. def type(key) column(key).type end + # Return the type of the Column with the given +key+ as its + # header as a String. + def types + result = {} + headers.each do |h| + result[h] = type(h) + end + result + end + # :category: Attributes # Set the column type for Column with the given +key+ as a String type. def force_string!(*keys) keys.each do |h| @@ -426,11 +462,11 @@ end # :category: Attributes # Return a Hash of the Table's Column header symbols to type strings. - def types + def col_types result = {} columns.each do |c| result[c.header] = c.type end result @@ -443,15 +479,15 @@ columns.map(&:header) end # :category: Attributes - # Return whether the column with the given head should be made tolerant. + # Return whether the column with the given head is supposed to be + # tolerant. We can't just look up the Column because it may not be build + # yet, as when we do a row-by-row add. def tolerant_col?(h) - return true if tolerant_columns.include?(:'*') - - tolerant_columns.include?(h) + tolerant_cols.include?(h.to_s.as_sym) || self.omni_tol end # :category: Attributes # Return the number of rows in the Table. @@ -992,14 +1028,14 @@ def where(expr) expr = expr.to_s result = empty_dup headers.each do |h| col = - if tolerant_col?(h) - Column.new(header: h, tolerant: true) - else - Column.new(header: h) - end + if tolerant_col?(h) + Column.new(header: h, tolerant: true) + else + Column.new(header: h) + end result.add_column(col) end ev = Evaluator.new(ivars: { row: 0, group: 0 }) rows.each_with_index do |row, k| grp = row_index_to_group_index(k)