lib/io_streams/tabular/header.rb in iostreams-1.7.0 vs lib/io_streams/tabular/header.rb in iostreams-1.8.0

- old
+ new

@@ -1,9 +1,12 @@ module IOStreams class Tabular # Process files / streams that start with a header. class Header + # Column names that begin with this prefix have been rejected and should be ignored. + IGNORE_PREFIX = "__rejected__".freeze + attr_accessor :columns, :allowed_columns, :required_columns, :skip_unknown # Header # # Parameters @@ -15,12 +18,12 @@ # # allowed_columns [Array<String>] # List of columns to allow. # Default: nil ( Allow all columns ) # Note: - # When supplied any columns that are rejected will be returned in the cleansed columns - # as nil so that they can be ignored during processing. + # * So that rejected columns can be identified in subsequent steps, they will be prefixed with `__rejected__`. + # For example, `Unknown Column` would be cleansed as `__rejected__Unknown Column`. # # required_columns [Array<String>] # List of columns that must be present, otherwise an Exception is raised. # # skip_unknown [true|false] @@ -42,23 +45,25 @@ # - Leading and trailing whitespace is stripped. # - All characters converted to lower case. # - Spaces and '-' are converted to '_'. # - All characters except for letters, digits, and '_' are stripped. # - # Notes - # * Raises Tabular::InvalidHeader when there are no non-nil columns left after cleansing. + # Notes: + # * So that rejected columns can be identified in subsequent steps, they will be prefixed with `__rejected__`. + # For example, `Unknown Column` would be cleansed as `__rejected__Unknown Column`. + # * Raises Tabular::InvalidHeader when there are no rejected columns left after cleansing. def cleanse! return [] if columns.nil? || columns.empty? ignored_columns = [] self.columns = columns.collect do |column| cleansed = cleanse_column(column) if allowed_columns.nil? || allowed_columns.include?(cleansed) cleansed else ignored_columns << column - nil + "#{IGNORE_PREFIX}#{column}" end end if !skip_unknown && !ignored_columns.empty? raise(IOStreams::Errors::InvalidHeader, "Unknown columns after cleansing: #{ignored_columns.join(',')}") @@ -120,11 +125,11 @@ private def array_to_hash(row) h = {} - columns.each_with_index { |col, i| h[col] = row[i] unless IOStreams::Utils.blank?(col) } + columns.each_with_index { |col, i| h[col] = row[i] unless IOStreams::Utils.blank?(col) || col.start_with?(IGNORE_PREFIX) } h end # Perform cleansing on returned Hash keys during the narrowing process. # For example, avoids issues with case etc. @@ -132,15 +137,10 @@ unmatched = columns - hash.keys unless unmatched.empty? hash = hash.dup unmatched.each { |name| hash[cleanse_column(name)] = hash.delete(name) } end - # Hash#slice as of Ruby 2.5 - if hash.respond_to?(:slice) - hash.slice(*columns) - else - columns.each_with_object({}) { |column, new_hash| new_hash[column] = hash[column] } - end + hash.slice(*columns) end def cleanse_column(name) cleansed = name.to_s.strip.downcase cleansed.gsub!(/\s+/, "_")