lib/io_streams/tabular/header.rb in iostreams-1.7.0 vs lib/io_streams/tabular/header.rb in iostreams-1.8.0
- old
+ new
@@ -1,9 +1,12 @@
module IOStreams
class Tabular
# Process files / streams that start with a header.
class Header
+ # Column names that begin with this prefix have been rejected and should be ignored.
+ IGNORE_PREFIX = "__rejected__".freeze
+
attr_accessor :columns, :allowed_columns, :required_columns, :skip_unknown
# Header
#
# Parameters
@@ -15,12 +18,12 @@
#
# allowed_columns [Array<String>]
# List of columns to allow.
# Default: nil ( Allow all columns )
# Note:
- # When supplied any columns that are rejected will be returned in the cleansed columns
- # as nil so that they can be ignored during processing.
+ # * So that rejected columns can be identified in subsequent steps, they will be prefixed with `__rejected__`.
+ # For example, `Unknown Column` would be cleansed as `__rejected__Unknown Column`.
#
# required_columns [Array<String>]
# List of columns that must be present, otherwise an Exception is raised.
#
# skip_unknown [true|false]
@@ -42,23 +45,25 @@
# - Leading and trailing whitespace is stripped.
# - All characters converted to lower case.
# - Spaces and '-' are converted to '_'.
# - All characters except for letters, digits, and '_' are stripped.
#
- # Notes
- # * Raises Tabular::InvalidHeader when there are no non-nil columns left after cleansing.
+ # Notes:
+ # * So that rejected columns can be identified in subsequent steps, they will be prefixed with `__rejected__`.
+ # For example, `Unknown Column` would be cleansed as `__rejected__Unknown Column`.
+ # * Raises Tabular::InvalidHeader when there are no rejected columns left after cleansing.
def cleanse!
return [] if columns.nil? || columns.empty?
ignored_columns = []
self.columns = columns.collect do |column|
cleansed = cleanse_column(column)
if allowed_columns.nil? || allowed_columns.include?(cleansed)
cleansed
else
ignored_columns << column
- nil
+ "#{IGNORE_PREFIX}#{column}"
end
end
if !skip_unknown && !ignored_columns.empty?
raise(IOStreams::Errors::InvalidHeader, "Unknown columns after cleansing: #{ignored_columns.join(',')}")
@@ -120,11 +125,11 @@
private
def array_to_hash(row)
h = {}
- columns.each_with_index { |col, i| h[col] = row[i] unless IOStreams::Utils.blank?(col) }
+ columns.each_with_index { |col, i| h[col] = row[i] unless IOStreams::Utils.blank?(col) || col.start_with?(IGNORE_PREFIX) }
h
end
# Perform cleansing on returned Hash keys during the narrowing process.
# For example, avoids issues with case etc.
@@ -132,15 +137,10 @@
unmatched = columns - hash.keys
unless unmatched.empty?
hash = hash.dup
unmatched.each { |name| hash[cleanse_column(name)] = hash.delete(name) }
end
- # Hash#slice as of Ruby 2.5
- if hash.respond_to?(:slice)
- hash.slice(*columns)
- else
- columns.each_with_object({}) { |column, new_hash| new_hash[column] = hash[column] }
- end
+ hash.slice(*columns)
end
def cleanse_column(name)
cleansed = name.to_s.strip.downcase
cleansed.gsub!(/\s+/, "_")