module Eco class CSV class Table < ::CSV::Table # @param input [Array, Array, Eco::CSV::Table, ::CSV::Table] # - when `Array` => all `rows` as arrays where first array is the **header** def initialize(input) super(to_rows_array(input)) delete_if do |row| values = row.fields values.all?(&:nil?) || values.map(&:to_s).all?(&:empty?) end end # It ensures blank strings are set to `nil` # @note assumes there are no repeated header names # @return [Eco::CSV::Table] def nil_blank_cells! each do |row| row.dup.each do |header, value| value = value.to_s.strip row[header] = value.empty?? nil : value end end self end # A new table from `self` where blank strings are have been set to `nil` # @return [Eco::CSV::Table] def nil_blank_cells self.class.new(self).nil_blank_cells! end # @return [Hash] where keys are the groups and the values a `Eco::CSV::Table` def group_by(&block) rows.group_by(&block).transform_values do |rows| self.class.new(rows) end end # It allows to rename the header names # @return [Eco::CSV::Table] def transform_headers cols = columns cols.each do |col| col[0] = yield(col.first) end columns_to_table(cols) end # When there are headers with the same name, it merges those columns # @note it also offers a way to resolve merge conflicts # @return [Eco::CSV::Table] def merge_same_header_names dups = duplicated_header_names out_rows = map do |row| row.to_a.each_with_object({}) do |(name, value), out| if dups.include?(name) && out.key?(name) if block_given? yield(value, out[name], name) else # resolve value || out[name] end elsif out.key?(name) out[name] else value end.tap do |final_value| out[name] = final_value end end end self.class.new(out_rows) end # @return [Array] list of duplicated header names def duplicated_header_names header = headers header.select {|e| header.count(e) > 1}.uniq end # @return [Eco::CSV::Table] def transform_values transformed_rows = rows.map do |row| res = yield(row) case res when Array ::CSV::Row.new(row.headers, res) when ::CSV::Row res end end self.class.new(transformed_rows) end # Slices the selected rows # @return [Eco::CSV::Table] def slice(*index) case index.first when Range, Numeric self.class.new(rows.slice(index.first)) else self end end # @return [Eco::CSV::Table] def slice_columns(*index) case index.first when Range, Numeric columns_to_table(columns.slice(index.first)) when String csv_cols = columns csv_cols = index.each_with_object([]) do |name, cols| col = csv_cols.find {|cl| cl.first == name} cols << col if col end columns_to_table(csv_cols) else self end end # @return [Eco::CSV::Table] def delete_column(i) csv_cols = columns csv_cols.delete(i) columns_to_table(csv_cols) end # Adds a new column at the end # @note by default it adds it to the end. # @param header_name [String] header of the new column # @param pos [Integer] index where to add the column (i.e. `0` for first) # @return [Eco::CSV::Table] with a new empty column def add_column(header_name, pos: -1) header_name = header_name.to_s.strip raise ArgumentError, "header_name can't be blank" if header_name.empty? new_col = Array.new(length).unshift(header_name) columns_to_table(columns.insert(pos, new_col)) end # @note by default it adds as a first column # @param header_name [String] header of the new column # @param pos [Integer] index where to add the column (i.e. `-1` for last) # @return [Eco::CSV::Table] with a new column named `name` with the row number def add_index_column(header_name = 'idx', pos: 0) header_name = header_name.to_s.strip add_column(header_name, pos: pos).tap do |table| table.each.with_index do |row, idx| row[header_name] = idx + 2 end end end # @return [Array<::CSV::Row>] def rows [].tap do |out| each {|row| out << row} end end # It removes all rows where all columns' values are the same def delete_duplicates! unique_rows = [] by_row!.delete_if do |row| unique_rows.any? {|done| equal_rows?(row, done)}.tap do |found| unique_rows << row unless found end end end # @param row1 [CSV:Row] row to be compared # @param row2 [CSV:Row] row to be compared # @param [Boolean] `true` if all values of `row1` are as of `row2` def equal_rows?(row_1, row_2) row_1.fields.zip(row_2.fields).all? do |(v_1, v_2)| v_1 == v_2 end end # @return [Integer] total number of rows not including the header def length to_a.length - 1 end def empty? length < 1 end # @return [Array] each array is the column header followed by its values def columns to_a.transpose end # Creates a single `Hash` where each key, value is a column (header + values) # @note it will override columns with same header name # @return [Hash] keys are headers, values are arrays def columns_hash columns.to_h do |col| [col.first, col[1..]] end end # Returns an array of row hashes # @note it will override columns with same header def to_a_h rows.map(&:to_h) end # @see #to_a_h def to_array_of_hashes to_a_h end private def columns_to_table(columns_array) rows_data = columns_array.transpose return self.class.new(rows_data) if rows_data.length.positive? raise ArgumentError, "Expecting 'columns_array' to at least have header. Given: #{columns_array}" end def to_rows_array(data) # rubocop:disable Metrics/AbcSize case data when ::CSV::Table to_rows_array(data.to_a) when Hash # hash of columns header as key and column array as value # rows_arrays = [a.keys].concat(a.values.first.zip(*a.values[1..])) to_rows_array(data.keys) when Enumerable data = data.dup.compact return data unless data.count.positive? sample = data.first case sample when ::CSV::Row data when Array headers = data.shift data.push(Array.new(headers.length)) if data.count.zero? data.map do |arr_row| ::CSV::Row.new(headers, arr_row) end.compact when Hash headers = sample.keys headers_str = headers.map(&:to_s) data.map do |hash| values = hash.values_at(*headers).map do |value| if_array_to_pipe_string(value) end ::CSV::Row.new(headers_str, values) end.compact else msg = "Expected data that can be transformed into Array<::CSV::Row>. " msg << "Given 'Enumerable' of '#{sample.class}'" raise msg end else raise "Input type not supported. Given: #{data.class}" end end def if_array_to_pipe_string(value) return value unless value.is_a?(Array) value.join('|') end end end end