class Eco::API::Common::People::DefaultParsers::CSVParser < Eco::API::Common::Loaders::Parser attribute :csv def parser(data, deps) Eco::CSV.parse(data, headers: true, skip_blanks: true).tap do |table| require_headers!(table) check_headers(table) if deps[:check_headers] && check_headers? end.each_with_object([]) do |row, arr_hash| row_hash = row.headers.uniq.each_with_object({}) do |attr, hash| next if attr.to_s.strip.empty? hash[attr.strip] = parse_string(row[attr]) end arr_hash.push(row_hash) end end def serializer(array_hash, deps) arr_rows = [] unless array_hash.empty? header = array_hash.first.keys arr_rows = array_hash.map do |csv_row| CSV::Row.new(header, csv_row.values_at(*header)) end end CSV::Table.new(arr_rows).to_csv end private def abort(message) logger.error(message) exit(1) end def require_headers!(table) headers = table.headers abort("Missing headers in CSV") unless headers && !headers.empty? empty = [] headers.each_with_index do |header, idx| empty << idx if header.to_s.strip.empty? end abort("Empty headers in column(s): #{empty.join(', ')}") unless empty.empty? true end def check_headers? !options.dig(:input, :header_check, :skip) end def options ASSETS.cli.options end def parse_string(value) return nil if value.to_s.empty? return nil if null?(value) value end def null?(value) return true if !value str = value.strip.upcase ["NULL"].any? {|token| str == token} end def check_headers(table) headers = table.headers unmatch = [] unmatch = unmatched_headers(headers) if options.dig(:input, :header_check, :order) missing = missing_headers(headers) unknown = unknown_headers(headers) criteria = [unknown, missing[:direct], missing[:indirect], unmatch] return if criteria.all?(&:empty?) msg = "Detected possible HEADER ISSUES !!!\n" # requires exact match unless unmatch.empty? msg << "CSV headers do NOT exactly match the expected:\n" msg << " * Expected: #{known_headers}\n" expected, given = unmatch.first msg << " * First unmatch => Given: '#{given}' where expected '#{expected}'\n" missed = known_headers - headers unless missed.empty? msg << " * Missing headers:\n" msg << " - #{missed.join("\n - ")}\n" end end msg << "Missing or Wrong HEADER names in the CSV file:\n" msg << " * UNKNOWN (or not used?): #{unknown}\n" unless unknown.empty? msg << " * MISSING HEADER: #{missing[:direct]}\n" unless missing[:direct].empty? unless (data = missing[:indirect]).empty? msg << " * MISSING INDIRECTLY:\n" data.each do |ext, info| msg << " - '#{ext}' => " msg << (info[:attrs] || {}).map do |status, attrs| if status == :inactive "makes inactive: #{attrs}" elsif status == :active "there could be missing info in: #{attrs}" end end.compact.join("; ") + "\n" end end logger.warn(msg) if options.dig(:input, :header_check, :must_be_valid) abort("There were issues identified on the CSV header names. Aborting...") end sleep(2) end def unmatched_headers(headers) known_headers.zip(headers).reject do |(expected, given)| expected == given end end def unknown_headers(headers) (headers - known_headers) - all_internal_attrs end def missing_headers(headers) int_head = internal_present_or_active(headers) ext = headers.select do |e| i = fields_mapper.to_internal(e) int_head.include?(i) end ext_present = known_headers_present(int_head) | ext ext_miss = known_headers - ext_present { direct: [], indirect: {} }.tap do |missing| ext_miss.each do |ext| next unless int = fields_mapper.to_internal(ext) missing[:direct] << ext if all_internal_attrs.include?(int) related_attrs_requirements = required_attrs.values.select do |req| dep = req.dependant?(int) affects = dep && !int_head.include?(int) in_header = int_head.include?(req.attr) affects || (dep && !in_header) end next if related_attrs_requirements.empty? data = missing[:indirect][ext] = {} data[:int] = int data[:attrs] = {} related_attrs_requirements.each_with_object(data[:attrs]) do |req, attrs| status = req.active?(*int_head) ? :active : :inactive attrs[status] ||= [] attrs[status] << req.attr end end end end def known_headers_present(headers_internal) @known_internal ||= known_headers.select do |ext| int = fields_mapper.to_internal(ext) headers_internal.include?(int) end end # Scopes what internal attrs appear in headers as they are def internal_present_or_active(headers, inactive_requirements = {}) # internal attrs that are not being mapped int_all = all_internal_attrs.reject {|i| fields_mapper.external?(i)} hint = headers & int_all hext = headers - hint int_present = hint + hext.map {|e| fields_mapper.to_internal(e)}.compact update_inactive = Proc.new do inactive_requirements.dup.each do |attr, req| if req.active?(*int_present) inactive_requirements.delete(attr) int_present << attr update_inactive.call end end end required_attrs.values.each do |req| next if int_present.include?(req) if req.active?(*int_present) inactive_requirements.delete(req.attr) int_present << req.attr update_inactive.call else inactive_requirements[req.attr] = req end end int_present end # The csv header names as expected def known_headers @known_headers ||= fields_mapper.list(:external).compact.uniq end def fields_mapper session.fields_mapper end def required_attrs @required_attrs ||= person_parser.required_attrs.each_with_object({}) do |ra, out| out[ra.attr] = ra end end def all_internal_attrs @all_internal_attrs ||= [].tap do |int_attrs| known_int_attrs = person_parser.all_attrs(include_defined_parsers: true) known_int_attrs |= fields_mapper.list(:internal).compact int_attrs.concat(known_int_attrs) end end def person_parser session.entry_factory.person_parser end end