lib/rbbt/util/tsv/parse.rb in rbbt-util-2.1.0 vs lib/rbbt/util/tsv/parse.rb in rbbt-util-3.0.2

- old
+ new

@@ -5,10 +5,11 @@ return [] if io.nil? ## split with delimiter, do not remove empty fields = io.split(delimiter, -1) + fields end def self.parse_header(stream, sep = nil, header_hash = nil) sep = /\t/ if sep.nil? @@ -114,10 +115,12 @@ case_insensitive, type, namespace, merge, keep_empty, cast = Misc.process_options options, :case_insensitive, :type, :namespace, :merge, :keep_empty, :cast fix, exclude, select, grep = Misc.process_options options, :fix, :exclude, :select, :grep + exclude ||= Misc.process_options options, :reject if options.include? :reject + #{{{ Process rest data = {} single = type.to_sym != :double max_cols = 0 while line do @@ -150,13 +153,14 @@ next if parts[key_pos].nil? || parts[key_pos].empty? if single ids = parse_fields(parts[key_pos], sep2) ids.collect!{|id| id.downcase} if case_insensitive + ids = ids.reject{|_id| _id.empty?}.uniq id = ids.shift - ids.each do |id2| data[id2] = "__Ref:#{id}" end + ids.each do |id2| data[id2] = "__Ref:#{id}" unless data.include? id2 end next if data.include?(id) and type != :flat if other_pos.nil? or (fields == nil and type == :flat) other_pos = (0..(parts.length - 1)).to_a @@ -169,12 +173,12 @@ extra = parts.values_at(*other_pos).collect{|f| parse_fields(f, sep2).first} end extra.collect! do |elem| case - when String === cast - elem.send(cast) + when (String === cast or Symbol === cast) + elem.send(cast.to_s) when Proc === cast cast.call elem end end if cast @@ -193,47 +197,67 @@ max_cols = extra.size if extra.size > (max_cols || 0) unless type == :flat else ids = parse_fields(parts[key_pos], sep2) ids.collect!{|id| id.downcase} if case_insensitive + ids = ids.reject{|_id| _id.empty?}.uniq + next if ids.empty? + id = ids.shift - ids.each do |id2| data[id2] = "__Ref:#{id}" end + while data.include? id and data[id] =~ /__Ref:(.*)/ + data[id] = data[$1].collect{|e| e.dup} + end + all_ids = [id] + ids.each do |id2| + if data.include? id2 + while data[id2] =~ /__Ref:(.*)/ + data[id2] = data[$1].collect{|e| e.dup} + end + all_ids << id2 + else + data[id2] = "__Ref:#{id}" + end + end + if other_pos.nil? or (fields == nil and type == :flat) other_pos = (0..(parts.length - 1)).to_a other_pos.delete key_pos end extra = parts.values_at(*other_pos).collect{|f| parse_fields(f, sep2)} extra.collect! do |list| case - when String === cast - list.collect{|elem| elem.send(cast)} + when (String === cast or Symbol === cast) + list.collect{|elem| elem.send(cast.to_s)} when Proc === cast list.collect{|elem| cast.call elem} end end if cast max_cols = extra.size if extra.size > (max_cols || 0) - if not merge - data[id] = extra unless data.include? id - else - if not data.include? id - data[id] = extra + + all_ids.each do |id| + if not merge + data[id] = extra unless data.include? id else - entry = data[id] - while entry =~ /__Ref:(.*)/ do entry = data[$1] end - extra.each_with_index do |f, i| - if f.empty? - next unless keep_empty - f= [""] + if not data.include? id + data[id] = extra + else + entry = data[id] + while entry =~ /__Ref:(.*)/ do entry = data[$1] end + extra.each_with_index do |f, i| + if f.empty? + next unless keep_empty + f= [""] + end + entry[i] ||= [] + entry[i] = entry[i].concat f end - entry[i] ||= [] - entry[i] = entry[i].concat f + data[id] = entry end - data[id] = entry end end end end @@ -252,9 +276,9 @@ end end fields = nil if Fixnum === fields or (Array === fields and fields.select{|f| Fixnum === f}.any?) fields ||= other_fields - [data, {:key_field => key_field, :fields => fields, :type => type, :case_insensitive => case_insensitive, :namespace => namespace, :datadir => options[:datadir], :identifiers => options[:identifiers], :cast => !!cast}] + [data, {:key_field => key_field, :fields => fields, :type => type, :case_insensitive => case_insensitive, :namespace => namespace, :datadir => options[:datadir], :identifiers => options[:identifiers], :cast => (cast.nil? ? false : cast)}] end end