lib/smarter_csv/headers.rb in smarter_csv-1.9.3 vs lib/smarter_csv/headers.rb in smarter_csv-1.10.0

- old
+ new

@@ -12,11 +12,15 @@ if options[:headers_in_file] # extract the header line # process the header line in the CSV file.. # the first line of a CSV file contains the header .. it might be commented out, so we need to read it anyhow header_line = @raw_header = readline_with_counts(filehandle, options) header_line = preprocess_header_line(header_line, options) - file_header_array, file_header_size = parse_and_modify_headers(header_line, options) + + file_header_array, file_header_size = parse(header_line, options) + + file_header_array = header_transformations(file_header_array, options) + else unless options[:user_provided_headers] raise SmarterCSV::IncorrectOption, "ERROR: If :headers_in_file is set to false, you have to provide :user_provided_headers" end end @@ -34,26 +38,16 @@ raise SmarterCSV::HeaderSizeMismatch, "ERROR: :user_provided_headers defines #{user_header_array.size} headers != CSV-file has #{file_header_size} headers" else # we could print out the mapping of file_header_array to header_array here end end + header_array = user_header_array else header_array = file_header_array end - # detect duplicate headers and disambiguate - header_array = disambiguate_headers(header_array, options) if options[:duplicate_header_suffix] - - # symbolize headers - header_array.map!{|x| x.to_sym } unless options[:strings_as_keys] || options[:keep_original_headers] - - # wouldn't make sense to re-map user provided headers - header_array = remap_headers(header_array, options) if options[:key_mapping] && !options[:user_provided_headers] - - validate_and_deprecate_headers(header_array, options) - [header_array, header_array.size] end private @@ -61,95 +55,9 @@ header_line = enforce_utf8_encoding(header_line, options) header_line = remove_comments_from_header(header_line, options) header_line = header_line.chomp(options[:row_sep]) header_line.gsub!(options[:strip_chars_from_headers], '') if options[:strip_chars_from_headers] header_line - end - - def parse_and_modify_headers(header_line, options) - file_header_array, file_header_size = parse(header_line, options) - - file_header_array.map!{|x| x.gsub(%r/#{options[:quote_char]}/, '')} - file_header_array.map!{|x| x.strip} if options[:strip_whitespace] - - unless options[:keep_original_headers] - file_header_array.map!{|x| x.gsub(/\s+|-+/, '_')} - file_header_array.map!{|x| x.downcase} if options[:downcase_header] - end - [file_header_array, file_header_size] - end - - def disambiguate_headers(headers, options) - counts = Hash.new(0) - headers.map do |header| - counts[header] += 1 - counts[header] > 1 ? "#{header}#{options[:duplicate_header_suffix]}#{counts[header]}" : header - end - end - - # do some key mapping on the keys in the file header - # if you want to completely delete a key, then map it to nil or to '' - def remap_headers(headers, options) - key_mapping = options[:key_mapping] - if key_mapping.empty? || !key_mapping.is_a?(Hash) || key_mapping.keys.empty? - raise(SmarterCSV::IncorrectOption, "ERROR: incorrect format for key_mapping! Expecting hash with from -> to mappings") - end - - key_mapping = options[:key_mapping] - # if silence_missing_keys are not set, raise error if missing header - missing_keys = key_mapping.keys - headers - # if the user passes a list of speciffic mapped keys that are optional - missing_keys -= options[:silence_missing_keys] if options[:silence_missing_keys].is_a?(Array) - - unless missing_keys.empty? || options[:silence_missing_keys] == true - raise SmarterCSV::KeyMappingError, "ERROR: can not map headers: #{missing_keys.join(', ')}" - end - - headers.map! do |header| - if key_mapping.has_key?(header) - key_mapping[header].nil? ? nil : key_mapping[header] - elsif options[:remove_unmapped_keys] - nil - else - header - end - end - headers - end - - # header_validations - def validate_and_deprecate_headers(headers, options) - duplicate_headers = [] - headers.compact.each do |k| - duplicate_headers << k if headers.select{|x| x == k}.size > 1 - end - - unless options[:user_provided_headers] || duplicate_headers.empty? - raise SmarterCSV::DuplicateHeaders, "ERROR: duplicate headers: #{duplicate_headers.join(',')}" - end - - # deprecate required_headers - unless options[:required_headers].nil? - puts "DEPRECATION WARNING: please use 'required_keys' instead of 'required_headers'" - if options[:required_keys].nil? - options[:required_keys] = options[:required_headers] - options[:required_headers] = nil - end - end - - if options[:required_keys] && options[:required_keys].is_a?(Array) - missing_keys = [] - options[:required_keys].each do |k| - missing_keys << k unless headers.include?(k) - end - raise SmarterCSV::MissingKeys, "ERROR: missing attributes: #{missing_keys.join(',')}" unless missing_keys.empty? - end - end - - def enforce_utf8_encoding(header, options) - return header unless options[:force_utf8] || options[:file_encoding] !~ /utf-8/i - - header.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) end def remove_comments_from_header(header, options) return header unless options[:comment_regexp]