lib/smarter_csv.rb in smarter_csv-1.8.0 vs lib/smarter_csv.rb in smarter_csv-1.8.1
- old
+ new
@@ -1,28 +1,29 @@
# frozen_string_literal: true
require_relative "extensions/hash"
require_relative "smarter_csv/version"
-# require_relative "smarter_csv/smarter_csv" unless ENV['CI'] # does not compile/link in CI?
-require 'smarter_csv.bundle' unless ENV['CI'] # does not compile/link in CI?
+require_relative "smarter_csv/smarter_csv" unless ENV['CI'] # does not compile/link in CI?
+# require 'smarter_csv.bundle' unless ENV['CI'] # local testing
module SmarterCSV
class SmarterCSVException < StandardError; end
class HeaderSizeMismatch < SmarterCSVException; end
class IncorrectOption < SmarterCSVException; end
+ class ValidationError < SmarterCSVException; end
class DuplicateHeaders < SmarterCSVException; end
class MissingHeaders < SmarterCSVException; end
class NoColSepDetected < SmarterCSVException; end
- class KeyMappingError < SmarterCSVException; end
- class MalformedCSVError < SmarterCSVException; end
+ class KeyMappingError < SmarterCSVException; end # CURRENTLY UNUSED -> version 1.9.0
# first parameter: filename or input object which responds to readline method
def SmarterCSV.process(input, options = {}, &block)
options = default_options.merge(options)
options[:invalid_byte_sequence] = '' if options[:invalid_byte_sequence].nil?
puts "SmarterCSV OPTIONS: #{options.inspect}" if options[:verbose]
+ validate_options!(options)
headerA = []
result = []
@file_line_count = 0
@csv_line_count = 0
@@ -212,18 +213,19 @@
force_simple_split: false,
force_utf8: false,
headers_in_file: true,
invalid_byte_sequence: '',
keep_original_headers: false,
- key_mapping_hash: nil,
+ key_mapping: nil,
quote_char: '"',
remove_empty_hashes: true,
remove_empty_values: true,
remove_unmapped_keys: false,
remove_values_matching: nil,
remove_zero_values: false,
required_headers: nil,
+ required_keys: nil,
row_sep: :auto, # was: $/,
silence_missing_keys: false,
skip_lines: nil,
strings_as_keys: false,
strip_chars_from_headers: nil,
@@ -484,17 +486,17 @@
headerA.map!{|x| x.to_sym } unless options[:strings_as_keys] || options[:keep_original_headers]
unless options[:user_provided_headers] # wouldn't make sense to re-map user provided headers
key_mappingH = options[:key_mapping]
+
# do some key mapping on the keys in the file header
# if you want to completely delete a key, then map it to nil or to ''
if !key_mappingH.nil? && key_mappingH.class == Hash && key_mappingH.keys.size > 0
unless options[:silence_missing_keys]
# if silence_missing_keys are not set, raise error if missing header
missing_keys = key_mappingH.keys - headerA
-
puts "WARNING: missing header(s): #{missing_keys.join(",")}" unless missing_keys.empty?
end
headerA.map!{|x| key_mappingH.has_key?(x) ? (key_mappingH[x].nil? ? nil : key_mappingH[x]) : (options[:remove_unmapped_keys] ? nil : x)}
end
@@ -508,18 +510,27 @@
unless options[:user_provided_headers] || duplicate_headers.empty?
raise SmarterCSV::DuplicateHeaders, "ERROR: duplicate headers: #{duplicate_headers.join(',')}"
end
- if options[:required_headers] && options[:required_headers].is_a?(Array)
- missing_headers = []
- options[:required_headers].each do |k|
- missing_headers << k unless headerA.include?(k)
+ # deprecate required_headers
+ if !options[:required_headers].nil?
+ puts "DEPRECATION WARNING: please use 'required_keys' instead of 'required headers'"
+ if options[:required_keys].nil?
+ options[:required_keys] = options[:required_headers]
+ options[:required_headers] = nil
end
- raise SmarterCSV::MissingHeaders, "ERROR: missing headers: #{missing_headers.join(',')}" unless missing_headers.empty?
end
+ if options[:required_keys] && options[:required_keys].is_a?(Array)
+ missing_keys = []
+ options[:required_keys].each do |k|
+ missing_keys << k unless headerA.include?(k)
+ end
+ raise SmarterCSV::MissingHeaders, "ERROR: missing attributes: #{missing_keys.join(',')}" unless missing_keys.empty?
+ end
+
@headers = headerA
[headerA, header_size]
end
def process_duplicate_headers(headers, options)
@@ -544,18 +555,33 @@
UTF_16_BOM = %w[fe ff].freeze
UTF_16LE_BOM = %w[ff fe].freeze
def remove_bom(str)
str_as_hex = str.bytes.map{|x| x.to_s(16)}
- # if string does not start with one of the bytes above, there is no BOM
+ # if string does not start with one of the bytes, there is no BOM
return str unless %w[ef fe ff 0].include?(str_as_hex[0])
return str.byteslice(4..-1) if [UTF_32_BOM, UTF_32LE_BOM].include?(str_as_hex[0..3])
return str.byteslice(3..-1) if str_as_hex[0..2] == UTF_8_BOM
return str.byteslice(2..-1) if [UTF_16_BOM, UTF_16LE_BOM].include?(str_as_hex[0..1])
puts "SmarterCSV found unhandled BOM! #{str.chars[0..7].inspect}"
str
+ end
+
+ def validate_options!(options)
+ keys = options.keys
+ errors = []
+ errors << "invalid row_sep" if keys.include?(:row_sep) && !option_valid?(options[:row_sep])
+ errors << "invalid col_sep" if keys.include?(:col_sep) && !option_valid?(options[:col_sep])
+ errors << "invalid quote_char" if keys.include?(:quote_char) && !option_valid?(options[:quote_char])
+ raise SmarterCSV::ValidationError, errors.inspect if errors.any?
+ end
+
+ def option_valid?(str)
+ return true if str.is_a?(Symbol) && str == :auto
+ return true if str.is_a?(String) && !str.empty?
+ false
end
def candidated_column_separators_from_headers(filehandle, options, delimiters)
candidates = Hash.new(0)
line = readline_with_counts(filehandle, options.slice(:row_sep))