lib/eco/api/common/people/default_parsers/csv_parser.rb in eco-helpers-2.4.9 vs lib/eco/api/common/people/default_parsers/csv_parser.rb in eco-helpers-2.5.1

- old
+ new

@@ -1,11 +1,12 @@ class Eco::API::Common::People::DefaultParsers::CSVParser < Eco::API::Common::Loaders::Parser attribute :csv def parser(data, deps) Eco::CSV.parse(data, headers: true, skip_blanks: true).tap do |table| - check_headers(table) if deps[:check_headers] && !options.dig(:input, :skip_header_check) + require_headers!(table) + check_headers(table) if deps[:check_headers] && check_headers? end.each_with_object([]) do |row, arr_hash| row_hash = row.headers.uniq.each_with_object({}) do |attr, hash| next if attr.to_s.strip.empty? hash[attr.strip] = parse_string(row[attr]) end @@ -24,10 +25,30 @@ CSV::Table.new(arr_rows).to_csv end private + def abort(message) + logger.error(message) + exit(1) + end + + def require_headers!(table) + headers = table.headers + abort("Missing headers in CSV") unless headers && !headers.empty? + empty = [] + with_value = headers.each_with_index do |header, idx| + empty << idx if header.to_s.strip.empty? + end + abort("Empty headers in column(s): #{empty.join(', ')}") unless empty.empty? + true + end + + def check_headers? + !options.dig(:input, :header_check, :skip) + end + def options ASSETS.cli.options end def parse_string(value) @@ -41,36 +62,61 @@ str = value.strip.upcase ["NULL"].any? {|token| str == token} end def check_headers(table) - headers = table.headers - missing = missing_headers(headers) - unknown = unknown_headers(headers) - unless missing.empty? && unknown.empty? - msg = "Detected possible HEADER ISSUES !!!\n" - msg << "There might be Missing or Wrong HEADER names in the CSV file:\n" - msg << " * UNKNOWN (or not used?): #{unknown}\n" unless unknown.empty? - msg << " * MISSING DIRECT: #{missing[:direct]}\n" unless (missing[:direct] || []).empty? - unless (data = missing[:indirect] || []).empty? - msg << " * MISSING INDIRECT:\n" - data.each do |ext, info| - msg << " - '#{ext}' => " - msg << (info[:attrs] || {}).map do |status, attrs| - if status == :inactive - "makes inactive: #{attrs}" - elsif status == :active - "there could be missing info in: #{attrs}" - end - end.compact.join("; ") + "\n" - end + headers = table.headers + unmatch = [] + unmatch = unmatched_headers(headers) if options.dig(:input, :header_check, :order) + missing = missing_headers(headers) + unknown = unknown_headers(headers) + criteria = [unknown, missing[:direct], missing[:indirect], unmatch] + return if criteria.all?(&:empty?) + + msg = "Detected possible HEADER ISSUES !!!\n" + + # requires exact match + unless unmatch.empty? + msg << "CSV headers do NOT exactly match the expected:\n" + msg << " * Expected: #{known_headers}\n" + expected, given = unmatch.first + msg << " * First unmatch => Given: '#{given}' where expected '#{expected}'\n" + missed = known_headers - headers + unless missed.empty? + msg << " * Missing headers:\n" + msg << " - #{missed.join("\n - ")}\n" end - logger.warn(msg) - sleep(2) end + msg << "Missing or Wrong HEADER names in the CSV file:\n" + msg << " * UNKNOWN (or not used?): #{unknown}\n" unless unknown.empty? + msg << " * MISSING HEADER: #{missing[:direct]}\n" unless missing[:direct].empty? + unless (data = missing[:indirect]).empty? + msg << " * MISSING INDIRECTLY:\n" + data.each do |ext, info| + msg << " - '#{ext}' => " + msg << (info[:attrs] || {}).map do |status, attrs| + if status == :inactive + "makes inactive: #{attrs}" + elsif status == :active + "there could be missing info in: #{attrs}" + end + end.compact.join("; ") + "\n" + end + end + logger.warn(msg) + if options.dig(:input, :header_check, :must_be_valid) + abort("There were issues identified on the CSV header names. Aborting...") + end + sleep(2) end + def unmatched_headers(headers) + known_headers.zip(headers).reject do |(expected, given)| + expected == given + end + end + def unknown_headers(headers) (headers - known_headers) - all_internal_attrs end def missing_headers(headers) @@ -80,44 +126,48 @@ int_head.include?(i) end ext_present = known_headers_present(int_head) | ext ext_miss = known_headers - ext_present - ext_miss.each_with_object({}) do |ext, missing| - next unless int = fields_mapper.to_internal(ext) - if all_internal_attrs.include?(int) - missing[:direct] ||= [] - missing[:direct] << ext + { + direct: [], + indirect: {} + }.tap do |missing| + ext_miss.each do |ext| + next unless int = fields_mapper.to_internal(ext) + missing[:direct] << ext if all_internal_attrs.include?(int) + related_attrs_requirements = required_attrs.values.select do |req| + dep = req.dependant?(int) + affects = dep && !int_head.include?(int) + in_header = int_head.include?(req.attr) + affects || (dep && !in_header) + end + next if related_attrs_requirements.empty? + data = missing[:indirect][ext] = {} + data[:int] = int + data[:attrs] = {} + related_attrs_requirements.each_with_object(data[:attrs]) do |req, attrs| + status = req.active?(*int_head) ? :active : :inactive + attrs[status] ||= [] + attrs[status] << req.attr + end end - related_attrs_requirements = required_attrs.values.select do |req| - dep = req.dependant?(int) - affects = dep && !int_head.include?(int) - in_header = int_head.include?(req.attr) - affects || (dep && !in_header) - end - next if related_attrs_requirements.empty? - missing[:indirect] ||= {} - data = missing[:indirect][ext] = {} - data[:int] = int - data[:attrs] = {} - related_attrs_requirements.each_with_object(data[:attrs]) do |req, attrs| - status = req.active?(*int_head) ? :active : :inactive - attrs[status] ||= [] - attrs[status] << req.attr - end end end def known_headers_present(headers_internal) @known_internal ||= known_headers.select do |ext| int = fields_mapper.to_internal(ext) headers_internal.include?(int) end end + # Scopes what internal attrs appear in headers as they are def internal_present_or_active(headers, inactive_requirements = {}) - hint = headers & all_internal_attrs + # internal attrs that are not being mapped + int_all = all_internal_attrs.reject {|i| fields_mapper.external?(i)} + hint = headers & int_all hext = headers - hint int_present = hint + hext.map {|e| fields_mapper.to_internal(e)}.compact update_inactive = Proc.new do inactive_requirements.dup.each do |attr, req| @@ -140,12 +190,13 @@ end end int_present end + # The csv header names as expected def known_headers - @known_headers ||= fields_mapper.list(:external).compact + @known_headers ||= fields_mapper.list(:external).compact.uniq end def fields_mapper session.fields_mapper end @@ -165,7 +216,6 @@ end def person_parser session.entry_factory.person_parser end - end