class Eco::API::UseCases::DefaultCases::CsvToTree class Node < Struct.new(:row_num, :l1, :l2, :l3, :l4, :l5, :l6, :l7, :l8, :l9, :l10, :l11) TAGS_ATTRS = [:l1, :l2, :l3, :l4, :l5, :l6, :l7, :l8, :l9, :l10, :l11] ADDITIONAL_ATTRS = [:row_num] ALL_ATTRS = ADDITIONAL_ATTRS + TAGS_ATTRS ALLOWED_CHARACTERS = "A-Za-z0-9 &_'\/.-" VALID_TAG_REGEX = /^[#{ALLOWED_CHARACTERS}]+$/ INVALID_TAG_REGEX = /[^#{ALLOWED_CHARACTERS}]+/ VALID_TAG_CHARS = /[#{ALLOWED_CHARACTERS}]+/ DOUBLE_BLANKS = /\s\s+/ attr_accessor :parentId def nodeId id end def id tag.upcase end def name tag end def tag raw_tag.yield_self do |str| blanks_x2 = has_double_blanks?(str) partial = replace_not_allowed(str) remove_double_blanks(partial).tap do |result| next if invalid_warned? if partial != str invalid_chars = identify_invalid_characters(str) puts "• (Row: #{self.row_num}) Invalid characters _#{invalid_chars}_ (removed): '#{str}' (converted to '#{result}')" elsif blanks_x2 puts "• (Row: #{self.row_num}) Double blanks (removed): '#{str}' (converted to '#{result}')" end invalid_warned! end end end def invalid_warned? @invalid_warned ||= false end def invalid_warned! @invalid_warned = true end def raw_tag values_at(*TAGS_ATTRS.reverse).compact.first end def level actual_level end def actual_level tags_array.compact.length end def raw_level tags_array.index(raw_tag) + 1 end def tag_idx tags_array.index(raw_tag) end def previous_idx idx = tag_idx - 1 idx < 0 ? nil : idx end def empty_idx tary = tags_array tary.index(nil) || tary.length + 1 end def copy self.class.new.set_attrs(**self.to_h) end # We got a missing level that is compacted in one row # Here we get the missing intermediate levels # This is done from upper to lower level to ensure processing order # It skips last one, as that is this object already def decouple(num = 1) with_info = filled_idxs # must be the last among filled_idxs, so let's use it to verify unless with_info.last == tag_idx raise "Review this (row #{row_num}; '#{raw_tag}'): tag_idx is #{tag_idx}, while last filled idx is #{with_info.last}" end len = with_info.length target_idxs = with_info[len-(num+1)..-2] target_idxs.map do |idx| self.copy.tap do |dup| dup.clear_level(idx_to_level(idx + 1)) end end end def merge!(node) override_upper_levels(node.tags_array) end def set_high_levels(node) override_lower_levels(node.tags_array) end def clear_level(i) case i when Enumerable target = i.to_a when Integer return false unless i >= 1 && i <= tag_attrs_count target = Array(i..tag_attrs_count) else return false end return false if target.empty? target.each do |n| #puts "clearing 'l#{n}': #{attr("l#{n}")}" set_attr("l#{n}", nil) end true end def override_upper_levels(src_tags_array, from_level: self.raw_level + 1) target_lev = Array(from_level..tag_attrs_count) target_tags = src_tags_array[level_to_idx(from_level)..level_to_idx(tag_attrs_count)] target_lev.zip(target_tags).each do |(n, tag)| set_attr("l#{n}", tag) end self end def override_lower_levels(src_tags_array, to_level: self.raw_level - 1) target_lev = Array(1..to_level) target_tags = src_tags_array[level_to_idx(1)..level_to_idx(to_level)] target_lev.zip(target_tags).each do |(n, tag)| set_attr("l#{n}", tag) end self end def idx_to_level(x) x + 1 end def level_to_idx(x) x - 1 end def filled_idxs tags_array.each_with_index.with_object([]) do |(t, i), o| o << i if t end end def blanks_between? actual_level > empty_idx end def tags_array values_at(*TAGS_ATTRS) end def values_at(*attrs) attrs.map {|a| attr(a)} end def to_h(*attrs) attrs = ALL_ATTRS if attrs.empty? attrs.zip(values_at(*attrs)).to_h end def slice(*attrs) return {} if attrs.empty? to_h(*attrs) end def set_attrs(**kargs) kargs.each {|attr, value| set_attr(attr, value)} self end def set_attr(attr, value) self.send("#{attr}=", value) end def attr(sym) self.send(sym.to_sym) end def tag_attrs_count TAGS_ATTRS.length end def has_double_blanks?(str) return false if str.nil? str.match(DOUBLE_BLANKS) end def remove_double_blanks(str) return nil if str.nil? str.gsub(DOUBLE_BLANKS, ' ').strip end def replace_not_allowed(str) return nil if str.nil? return str if str.match(VALID_TAG_REGEX) str.gsub(INVALID_TAG_REGEX, ' ') end def identify_invalid_characters(str) str.gsub(VALID_TAG_CHARS, '') end end end