class Eco::Data::Locations::NodeLevel module Parsing include Eco::Data::Locations::NodeLevel::Cleaner include Eco::Data::Locations::Convert attr_writer :node_class def node_class @node_class ||= Eco::Data::Locations::NodeLevel end # It evaluates as an opposite to `NodePlain` # @note there are only two accepted input csv formats. # The expected header of `NodePlain` is predictable, # but the header of `NodeLevel` is not. # @param [CSV::Table] # @return [Boolean] whether or not it's worthy trying to parse with `NodeLevel`. def csv_matches_format?(csv) return false unless csv.is_a?(::CSV::Table) !Eco::Data::Locations::NodePlain.csv_matches_format?(csv) end # @note # 1. It ensures basic data integrity when builing the nodes in the first screening # 2. It then delegates the tidy up to a cleaner function (see `tidy_nodes`) # @param `csv` [CSV::Table] # @return [Array] def nodes_from_csv(csv) raise ArgumentError, "Expecting CSV::Table. Given: #{csv.class}" unless csv.is_a?(::CSV::Table) prev_level = nil prev_node = nil prev_nodes = empty_level_tracker_hash(11) # Convert to Eco::CSV::Table for a fresh start csv = Eco::CSV.parse(csv.to_csv).nil_blank_cells.add_index_column(:row_num) nodes = csv.each_with_object([]) do |row, out| row_num, *values = row.fields node = node_class.new(row_num, *values) prev_node ||= node # If node is nested in prev_node or is a sibling thereof if prev_node.raw_level <= node.raw_level # Make sure parent is among upper level tags node.set_high_levels(prev_node) else if parent_node = prev_nodes[node.raw_level - 1] node.set_high_levels(parent_node) elsif node.raw_level == 1 # It is expected not to have parent (as it's top level tag) else raise "Node '#{node.raw_tag}' (#{node.row_num} row) doesn't have parent" end end out << node prev_nodes[node.raw_level] = node prev_node = node end tidy_nodes(nodes) end end end