lib/smarter_csv/headers.rb in smarter_csv-1.9.3 vs lib/smarter_csv/headers.rb in smarter_csv-1.10.0
- old
+ new
@@ -12,11 +12,15 @@
if options[:headers_in_file] # extract the header line
# process the header line in the CSV file..
# the first line of a CSV file contains the header .. it might be commented out, so we need to read it anyhow
header_line = @raw_header = readline_with_counts(filehandle, options)
header_line = preprocess_header_line(header_line, options)
- file_header_array, file_header_size = parse_and_modify_headers(header_line, options)
+
+ file_header_array, file_header_size = parse(header_line, options)
+
+ file_header_array = header_transformations(file_header_array, options)
+
else
unless options[:user_provided_headers]
raise SmarterCSV::IncorrectOption, "ERROR: If :headers_in_file is set to false, you have to provide :user_provided_headers"
end
end
@@ -34,26 +38,16 @@
raise SmarterCSV::HeaderSizeMismatch, "ERROR: :user_provided_headers defines #{user_header_array.size} headers != CSV-file has #{file_header_size} headers"
else
# we could print out the mapping of file_header_array to header_array here
end
end
+
header_array = user_header_array
else
header_array = file_header_array
end
- # detect duplicate headers and disambiguate
- header_array = disambiguate_headers(header_array, options) if options[:duplicate_header_suffix]
-
- # symbolize headers
- header_array.map!{|x| x.to_sym } unless options[:strings_as_keys] || options[:keep_original_headers]
-
- # wouldn't make sense to re-map user provided headers
- header_array = remap_headers(header_array, options) if options[:key_mapping] && !options[:user_provided_headers]
-
- validate_and_deprecate_headers(header_array, options)
-
[header_array, header_array.size]
end
private
@@ -61,95 +55,9 @@
header_line = enforce_utf8_encoding(header_line, options)
header_line = remove_comments_from_header(header_line, options)
header_line = header_line.chomp(options[:row_sep])
header_line.gsub!(options[:strip_chars_from_headers], '') if options[:strip_chars_from_headers]
header_line
- end
-
- def parse_and_modify_headers(header_line, options)
- file_header_array, file_header_size = parse(header_line, options)
-
- file_header_array.map!{|x| x.gsub(%r/#{options[:quote_char]}/, '')}
- file_header_array.map!{|x| x.strip} if options[:strip_whitespace]
-
- unless options[:keep_original_headers]
- file_header_array.map!{|x| x.gsub(/\s+|-+/, '_')}
- file_header_array.map!{|x| x.downcase} if options[:downcase_header]
- end
- [file_header_array, file_header_size]
- end
-
- def disambiguate_headers(headers, options)
- counts = Hash.new(0)
- headers.map do |header|
- counts[header] += 1
- counts[header] > 1 ? "#{header}#{options[:duplicate_header_suffix]}#{counts[header]}" : header
- end
- end
-
- # do some key mapping on the keys in the file header
- # if you want to completely delete a key, then map it to nil or to ''
- def remap_headers(headers, options)
- key_mapping = options[:key_mapping]
- if key_mapping.empty? || !key_mapping.is_a?(Hash) || key_mapping.keys.empty?
- raise(SmarterCSV::IncorrectOption, "ERROR: incorrect format for key_mapping! Expecting hash with from -> to mappings")
- end
-
- key_mapping = options[:key_mapping]
- # if silence_missing_keys are not set, raise error if missing header
- missing_keys = key_mapping.keys - headers
- # if the user passes a list of speciffic mapped keys that are optional
- missing_keys -= options[:silence_missing_keys] if options[:silence_missing_keys].is_a?(Array)
-
- unless missing_keys.empty? || options[:silence_missing_keys] == true
- raise SmarterCSV::KeyMappingError, "ERROR: can not map headers: #{missing_keys.join(', ')}"
- end
-
- headers.map! do |header|
- if key_mapping.has_key?(header)
- key_mapping[header].nil? ? nil : key_mapping[header]
- elsif options[:remove_unmapped_keys]
- nil
- else
- header
- end
- end
- headers
- end
-
- # header_validations
- def validate_and_deprecate_headers(headers, options)
- duplicate_headers = []
- headers.compact.each do |k|
- duplicate_headers << k if headers.select{|x| x == k}.size > 1
- end
-
- unless options[:user_provided_headers] || duplicate_headers.empty?
- raise SmarterCSV::DuplicateHeaders, "ERROR: duplicate headers: #{duplicate_headers.join(',')}"
- end
-
- # deprecate required_headers
- unless options[:required_headers].nil?
- puts "DEPRECATION WARNING: please use 'required_keys' instead of 'required_headers'"
- if options[:required_keys].nil?
- options[:required_keys] = options[:required_headers]
- options[:required_headers] = nil
- end
- end
-
- if options[:required_keys] && options[:required_keys].is_a?(Array)
- missing_keys = []
- options[:required_keys].each do |k|
- missing_keys << k unless headers.include?(k)
- end
- raise SmarterCSV::MissingKeys, "ERROR: missing attributes: #{missing_keys.join(',')}" unless missing_keys.empty?
- end
- end
-
- def enforce_utf8_encoding(header, options)
- return header unless options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
-
- header.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence])
end
def remove_comments_from_header(header, options)
return header unless options[:comment_regexp]