lib/eco/api/common/people/default_parsers/csv_parser.rb in eco-helpers-3.0.18 vs lib/eco/api/common/people/default_parsers/csv_parser.rb in eco-helpers-3.0.19

- old
+ new

@@ -1,235 +1,48 @@ class Eco::API::Common::People::DefaultParsers::CSVParser < Eco::API::Common::Loaders::Parser attribute :csv + include Eco::API::Common::People::DefaultParsers::Helpers::ExpectedHeaders + include Eco::API::Common::People::DefaultParsers::Helpers::NullParsing + def parser(data, deps) Eco::CSV.parse(data, headers: true, skip_blanks: true).tap do |table| - require_headers!(table) - check_headers(table) if deps[:check_headers] && check_headers? - end.each_with_object([]) do |row, arr_hash| - row_hash = row.headers.uniq.each_with_object({}) do |attr, hash| + require_headers!(table.headers) + + next unless deps[:check_headers] + next unless check_headers? + + check_headers!( + table.headers, + order_check: options.dig(:input, :header_check, :order) + ) + end.each_with_object([]) do |item, arr_hash| + item_hash = item.headers.uniq.each_with_object({}) do |attr, hash| next if attr.to_s.strip.empty? - hash[attr.strip] = parse_string(row[attr]) + + hash[attr.strip] = parse_null(item[attr]) end - arr_hash.push(row_hash) + + arr_hash.push(item_hash) end end def serializer(array_hash, _deps) arr_rows = [] + unless array_hash.empty? header = array_hash.first.keys + arr_rows = array_hash.map do |csv_row| CSV::Row.new(header, csv_row.values_at(*header)) end end + CSV::Table.new(arr_rows).to_csv end private - def abort(msg) - super(msg, raising: false) - end - - def require_headers!(table) - headers = table.headers - - abort("Missing headers in CSV") unless headers&.any? - - empty = [] - headers.each_with_index do |header, idx| - empty << idx if header.to_s.strip.empty? - end - - abort("Empty headers in column(s): #{empty.join(', ')}") if empty.any? - - true - end - def check_headers? !options.dig(:input, :header_check, :skip) - end - - def options - ASSETS.cli.options - end - - def parse_string(value) - return nil if value.to_s.empty? - return nil if null?(value) - value - end - - def null?(value) - return true unless value - - str = value.strip.upcase - ["NULL"].any? {|token| str == token} - end - - def check_headers(table) # rubocop:disable Metrics/AbcSize - headers = table.headers - unmatch = [] - unmatch = unmatched_headers(headers) if options.dig(:input, :header_check, :order) - missing = missing_headers(headers) - unknown = unknown_headers(headers) - criteria = [unknown, missing[:direct], missing[:indirect], unmatch] - return if criteria.all?(&:empty?) - - msg = "Detected possible HEADER ISSUES !!!\n" - - # requires exact match - unless unmatch.empty? - msg << "CSV headers do NOT exactly match the expected:\n" - msg << " * Expected: #{known_headers}\n" - expected, given = unmatch.first - msg << " * First unmatch => Given: '#{given}' where expected '#{expected}'\n" - missed = known_headers - headers - unless missed.empty? - msg << " * Missing headers:\n" - msg << " - #{missed.join("\n - ")}\n" - end - end - - msg << "Missing or Wrong HEADER names in the CSV file:\n" - msg << " * UNKNOWN (or not used?): #{unknown}\n" unless unknown.empty? - msg << " * MISSING HEADER: #{missing[:direct]}\n" unless missing[:direct].empty? - - unless (data = missing[:indirect]).empty? - msg << " * MISSING INDIRECTLY:\n" - data.each do |ext, info| - msg << " - '#{ext}' => " - msg << (info[:attrs] || {}).map do |status, attrs| - if status == :inactive - "makes inactive: #{attrs}" - elsif status == :active - "there could be missing info in: #{attrs}" - end - end.compact.join("; ") - msg << "\n" - end - end - - log(:warn) { msg } - - msg = "There were issues identified on the CSV header names. Aborting..." - abort(msg) if options.dig(:input, :header_check, :must_be_valid) - - sleep(2) - end - - def unmatched_headers(headers) - known_headers.zip(headers).reject do |(expected, given)| - expected == given - end - end - - def unknown_headers(headers) - (headers - known_headers) - all_internal_attrs - end - - def missing_headers(headers) # rubocop:disable Metrics/AbcSize - int_head = internal_present_or_active(headers) - external = headers.select do |e| - i = fields_mapper.to_internal(e) - int_head.include?(i) - end - - ext_present = known_headers_present(int_head) | external - ext_miss = known_headers - ext_present - - { - direct: [], - indirect: {} - }.tap do |missing| - ext_miss.each do |ext| - next unless (int = fields_mapper.to_internal(ext)) - - missing[:direct] << ext if all_internal_attrs.include?(int) - related_attrs_requirements = required_attrs.values.select do |req| - dep = req.dependant?(int) - affects = dep && !int_head.include?(int) - in_header = int_head.include?(req.attr) - affects || (dep && !in_header) - end - - next if related_attrs_requirements.empty? - - data = missing[:indirect][ext] = {} - data[:int] = int - data[:attrs] = {} - - related_attrs_requirements.each_with_object(data[:attrs]) do |req, attrs| - status = req.active?(*int_head) ? :active : :inactive - attrs[status] ||= [] - attrs[status] << req.attr - end - end - end - end - - def known_headers_present(headers_internal) - known_headers.select do |ext| - int = fields_mapper.to_internal(ext) - headers_internal.include?(int) - end - end - - # Scopes what internal attrs appear in headers as they are - def internal_present_or_active(headers, inactive_requirements = {}) # rubocop:disable Metrics/AbcSize - # internal attrs that are not being mapped - int_all = all_internal_attrs.reject {|i| fields_mapper.external?(i)} - hint = headers & int_all - hext = headers - hint - int_present = hint + hext.map {|e| fields_mapper.to_internal(e)}.compact - - update_inactive = proc do - inactive_requirements.dup.each do |attr, req| - next unless req.active?(*int_present) - - inactive_requirements.delete(attr) - int_present << attr - update_inactive.call - end - end - - required_attrs.each_value do |req| - next if int_present.include?(req) - - if req.active?(*int_present) - inactive_requirements.delete(req.attr) - int_present << req.attr - update_inactive.call - else - inactive_requirements[req.attr] = req - end - end - - int_present - end - - # The csv header names as expected - def known_headers - @known_headers ||= fields_mapper.list(:external).compact.uniq - end - - def fields_mapper - session.fields_mapper - end - - def required_attrs - @required_attrs ||= person_parser.required_attrs.to_h {|ra| [ra.attr, ra]} - end - - def all_internal_attrs - @all_internal_attrs ||= [].tap do |int_attrs| - known_int_attrs = person_parser.all_attrs(include_defined_parsers: true) - known_int_attrs |= fields_mapper.list(:internal).compact - int_attrs.concat(known_int_attrs) - end - end - - def person_parser - session.entry_factory.person_parser end end