class Eco::API::UseCases::DefaultCases::CsvToTree module Helper extend NodesCleaner extend Treeify class << self def csv_from(filename) raise "Missing #{filename}" unless File.exists?(filename) result = csv_from_file(filename) if result.is_a?(Integer) puts "An encoding problem was found on line #{result}" result = csv_from_content(filename) end result end def nodes_from_csv(csv) i = 1; prev_level = nil; prev_node = nil; prev_nodes = Array(1..11).zip(Array.new(11, nil)).to_h nodes = csv.each_with_object([]) do |row, out| values = row.fields.map do |value| value = value.to_s.strip value.empty?? nil : value end i += 1 node = Node.new(i, *values) prev_node ||= node # If node is nested in prev_node or is a sibling thereof if prev_node.raw_level <= node.raw_level\ # Make sure parent is among upper level tags node.set_high_levels(prev_node) else if parent_node = prev_nodes[node.raw_level - 1] node.set_high_levels(parent_node) elsif node.raw_level == 1 # It is expected not to have parent #puts "Node '#{node.raw_tag}' doesn't have parent, but it's top level tag" else raise "Node '#{node.raw_tag}' (#{node.row_num} row) doesn't have parent" end end out << node prev_nodes[node.raw_level] = node prev_node = node end tidy_nodes(nodes) end def csv_nodes(filename) nodes_from_csv(csv_from(filename)) end private def csv_from_content(filename) CSV.parse(file_content(filename), headers: true) end def file_content(filename) coding = encoding(filename) coding = (coding != "utf-8")? "#{coding}|utf-8": coding if content = File.read(filename, encoding: coding) content.scrub do |bytes| '<' + bytes.unpack('H*')[0] + '>' end end end def csv_from_file(filename) coding = encoding(filename) coding = (coding != "utf-8")? "#{coding}|utf-8": coding CSV.read(filename, headers: true, encoding: coding) rescue CSV::MalformedCSVError => e if line = e.message.match(/line (?\d+)/i)[:line] return line.to_i else raise end end def has_bom?(path) return false if !path || file_empty?(path) File.open(path, "rb") do |f| bytes = f.read(3) return bytes.unpack("C*") == [239, 187, 191] end end def encoding(path) has_bom?(path) ? "bom" : "utf-8" end def file_empty?(path) return true if !File.file?(path) File.zero?(path) end end end end