lib/eco/api/common/people/default_parsers/csv_parser.rb in eco-helpers-3.0.18 vs lib/eco/api/common/people/default_parsers/csv_parser.rb in eco-helpers-3.0.19
- old
+ new
@@ -1,235 +1,48 @@
class Eco::API::Common::People::DefaultParsers::CSVParser < Eco::API::Common::Loaders::Parser
attribute :csv
+ include Eco::API::Common::People::DefaultParsers::Helpers::ExpectedHeaders
+ include Eco::API::Common::People::DefaultParsers::Helpers::NullParsing
+
def parser(data, deps)
Eco::CSV.parse(data, headers: true, skip_blanks: true).tap do |table|
- require_headers!(table)
- check_headers(table) if deps[:check_headers] && check_headers?
- end.each_with_object([]) do |row, arr_hash|
- row_hash = row.headers.uniq.each_with_object({}) do |attr, hash|
+ require_headers!(table.headers)
+
+ next unless deps[:check_headers]
+ next unless check_headers?
+
+ check_headers!(
+ table.headers,
+ order_check: options.dig(:input, :header_check, :order)
+ )
+ end.each_with_object([]) do |item, arr_hash|
+ item_hash = item.headers.uniq.each_with_object({}) do |attr, hash|
next if attr.to_s.strip.empty?
- hash[attr.strip] = parse_string(row[attr])
+
+ hash[attr.strip] = parse_null(item[attr])
end
- arr_hash.push(row_hash)
+
+ arr_hash.push(item_hash)
end
end
def serializer(array_hash, _deps)
arr_rows = []
+
unless array_hash.empty?
header = array_hash.first.keys
+
arr_rows = array_hash.map do |csv_row|
CSV::Row.new(header, csv_row.values_at(*header))
end
end
+
CSV::Table.new(arr_rows).to_csv
end
private
- def abort(msg)
- super(msg, raising: false)
- end
-
- def require_headers!(table)
- headers = table.headers
-
- abort("Missing headers in CSV") unless headers&.any?
-
- empty = []
- headers.each_with_index do |header, idx|
- empty << idx if header.to_s.strip.empty?
- end
-
- abort("Empty headers in column(s): #{empty.join(', ')}") if empty.any?
-
- true
- end
-
def check_headers?
!options.dig(:input, :header_check, :skip)
- end
-
- def options
- ASSETS.cli.options
- end
-
- def parse_string(value)
- return nil if value.to_s.empty?
- return nil if null?(value)
- value
- end
-
- def null?(value)
- return true unless value
-
- str = value.strip.upcase
- ["NULL"].any? {|token| str == token}
- end
-
- def check_headers(table) # rubocop:disable Metrics/AbcSize
- headers = table.headers
- unmatch = []
- unmatch = unmatched_headers(headers) if options.dig(:input, :header_check, :order)
- missing = missing_headers(headers)
- unknown = unknown_headers(headers)
- criteria = [unknown, missing[:direct], missing[:indirect], unmatch]
- return if criteria.all?(&:empty?)
-
- msg = "Detected possible HEADER ISSUES !!!\n"
-
- # requires exact match
- unless unmatch.empty?
- msg << "CSV headers do NOT exactly match the expected:\n"
- msg << " * Expected: #{known_headers}\n"
- expected, given = unmatch.first
- msg << " * First unmatch => Given: '#{given}' where expected '#{expected}'\n"
- missed = known_headers - headers
- unless missed.empty?
- msg << " * Missing headers:\n"
- msg << " - #{missed.join("\n - ")}\n"
- end
- end
-
- msg << "Missing or Wrong HEADER names in the CSV file:\n"
- msg << " * UNKNOWN (or not used?): #{unknown}\n" unless unknown.empty?
- msg << " * MISSING HEADER: #{missing[:direct]}\n" unless missing[:direct].empty?
-
- unless (data = missing[:indirect]).empty?
- msg << " * MISSING INDIRECTLY:\n"
- data.each do |ext, info|
- msg << " - '#{ext}' => "
- msg << (info[:attrs] || {}).map do |status, attrs|
- if status == :inactive
- "makes inactive: #{attrs}"
- elsif status == :active
- "there could be missing info in: #{attrs}"
- end
- end.compact.join("; ")
- msg << "\n"
- end
- end
-
- log(:warn) { msg }
-
- msg = "There were issues identified on the CSV header names. Aborting..."
- abort(msg) if options.dig(:input, :header_check, :must_be_valid)
-
- sleep(2)
- end
-
- def unmatched_headers(headers)
- known_headers.zip(headers).reject do |(expected, given)|
- expected == given
- end
- end
-
- def unknown_headers(headers)
- (headers - known_headers) - all_internal_attrs
- end
-
- def missing_headers(headers) # rubocop:disable Metrics/AbcSize
- int_head = internal_present_or_active(headers)
- external = headers.select do |e|
- i = fields_mapper.to_internal(e)
- int_head.include?(i)
- end
-
- ext_present = known_headers_present(int_head) | external
- ext_miss = known_headers - ext_present
-
- {
- direct: [],
- indirect: {}
- }.tap do |missing|
- ext_miss.each do |ext|
- next unless (int = fields_mapper.to_internal(ext))
-
- missing[:direct] << ext if all_internal_attrs.include?(int)
- related_attrs_requirements = required_attrs.values.select do |req|
- dep = req.dependant?(int)
- affects = dep && !int_head.include?(int)
- in_header = int_head.include?(req.attr)
- affects || (dep && !in_header)
- end
-
- next if related_attrs_requirements.empty?
-
- data = missing[:indirect][ext] = {}
- data[:int] = int
- data[:attrs] = {}
-
- related_attrs_requirements.each_with_object(data[:attrs]) do |req, attrs|
- status = req.active?(*int_head) ? :active : :inactive
- attrs[status] ||= []
- attrs[status] << req.attr
- end
- end
- end
- end
-
- def known_headers_present(headers_internal)
- known_headers.select do |ext|
- int = fields_mapper.to_internal(ext)
- headers_internal.include?(int)
- end
- end
-
- # Scopes what internal attrs appear in headers as they are
- def internal_present_or_active(headers, inactive_requirements = {}) # rubocop:disable Metrics/AbcSize
- # internal attrs that are not being mapped
- int_all = all_internal_attrs.reject {|i| fields_mapper.external?(i)}
- hint = headers & int_all
- hext = headers - hint
- int_present = hint + hext.map {|e| fields_mapper.to_internal(e)}.compact
-
- update_inactive = proc do
- inactive_requirements.dup.each do |attr, req|
- next unless req.active?(*int_present)
-
- inactive_requirements.delete(attr)
- int_present << attr
- update_inactive.call
- end
- end
-
- required_attrs.each_value do |req|
- next if int_present.include?(req)
-
- if req.active?(*int_present)
- inactive_requirements.delete(req.attr)
- int_present << req.attr
- update_inactive.call
- else
- inactive_requirements[req.attr] = req
- end
- end
-
- int_present
- end
-
- # The csv header names as expected
- def known_headers
- @known_headers ||= fields_mapper.list(:external).compact.uniq
- end
-
- def fields_mapper
- session.fields_mapper
- end
-
- def required_attrs
- @required_attrs ||= person_parser.required_attrs.to_h {|ra| [ra.attr, ra]}
- end
-
- def all_internal_attrs
- @all_internal_attrs ||= [].tap do |int_attrs|
- known_int_attrs = person_parser.all_attrs(include_defined_parsers: true)
- known_int_attrs |= fields_mapper.list(:internal).compact
- int_attrs.concat(known_int_attrs)
- end
- end
-
- def person_parser
- session.entry_factory.person_parser
end
end