lib/rbbt/util/tsv/parse.rb in rbbt-util-2.1.0 vs lib/rbbt/util/tsv/parse.rb in rbbt-util-3.0.2
- old
+ new
@@ -5,10 +5,11 @@
return [] if io.nil?
## split with delimiter, do not remove empty
fields = io.split(delimiter, -1)
+
fields
end
def self.parse_header(stream, sep = nil, header_hash = nil)
sep = /\t/ if sep.nil?
@@ -114,10 +115,12 @@
case_insensitive, type, namespace, merge, keep_empty, cast =
Misc.process_options options, :case_insensitive, :type, :namespace, :merge, :keep_empty, :cast
fix, exclude, select, grep =
Misc.process_options options, :fix, :exclude, :select, :grep
+ exclude ||= Misc.process_options options, :reject if options.include? :reject
+
#{{{ Process rest
data = {}
single = type.to_sym != :double
max_cols = 0
while line do
@@ -150,13 +153,14 @@
next if parts[key_pos].nil? || parts[key_pos].empty?
if single
ids = parse_fields(parts[key_pos], sep2)
ids.collect!{|id| id.downcase} if case_insensitive
+ ids = ids.reject{|_id| _id.empty?}.uniq
id = ids.shift
- ids.each do |id2| data[id2] = "__Ref:#{id}" end
+ ids.each do |id2| data[id2] = "__Ref:#{id}" unless data.include? id2 end
next if data.include?(id) and type != :flat
if other_pos.nil? or (fields == nil and type == :flat)
other_pos = (0..(parts.length - 1)).to_a
@@ -169,12 +173,12 @@
extra = parts.values_at(*other_pos).collect{|f| parse_fields(f, sep2).first}
end
extra.collect! do |elem|
case
- when String === cast
- elem.send(cast)
+ when (String === cast or Symbol === cast)
+ elem.send(cast.to_s)
when Proc === cast
cast.call elem
end
end if cast
@@ -193,47 +197,67 @@
max_cols = extra.size if extra.size > (max_cols || 0) unless type == :flat
else
ids = parse_fields(parts[key_pos], sep2)
ids.collect!{|id| id.downcase} if case_insensitive
+ ids = ids.reject{|_id| _id.empty?}.uniq
+ next if ids.empty?
+
id = ids.shift
- ids.each do |id2| data[id2] = "__Ref:#{id}" end
+ while data.include? id and data[id] =~ /__Ref:(.*)/
+ data[id] = data[$1].collect{|e| e.dup}
+ end
+ all_ids = [id]
+ ids.each do |id2|
+ if data.include? id2
+ while data[id2] =~ /__Ref:(.*)/
+ data[id2] = data[$1].collect{|e| e.dup}
+ end
+ all_ids << id2
+ else
+ data[id2] = "__Ref:#{id}"
+ end
+ end
+
if other_pos.nil? or (fields == nil and type == :flat)
other_pos = (0..(parts.length - 1)).to_a
other_pos.delete key_pos
end
extra = parts.values_at(*other_pos).collect{|f| parse_fields(f, sep2)}
extra.collect! do |list|
case
- when String === cast
- list.collect{|elem| elem.send(cast)}
+ when (String === cast or Symbol === cast)
+ list.collect{|elem| elem.send(cast.to_s)}
when Proc === cast
list.collect{|elem| cast.call elem}
end
end if cast
max_cols = extra.size if extra.size > (max_cols || 0)
- if not merge
- data[id] = extra unless data.include? id
- else
- if not data.include? id
- data[id] = extra
+
+ all_ids.each do |id|
+ if not merge
+ data[id] = extra unless data.include? id
else
- entry = data[id]
- while entry =~ /__Ref:(.*)/ do entry = data[$1] end
- extra.each_with_index do |f, i|
- if f.empty?
- next unless keep_empty
- f= [""]
+ if not data.include? id
+ data[id] = extra
+ else
+ entry = data[id]
+ while entry =~ /__Ref:(.*)/ do entry = data[$1] end
+ extra.each_with_index do |f, i|
+ if f.empty?
+ next unless keep_empty
+ f= [""]
+ end
+ entry[i] ||= []
+ entry[i] = entry[i].concat f
end
- entry[i] ||= []
- entry[i] = entry[i].concat f
+ data[id] = entry
end
- data[id] = entry
end
end
end
end
@@ -252,9 +276,9 @@
end
end
fields = nil if Fixnum === fields or (Array === fields and fields.select{|f| Fixnum === f}.any?)
fields ||= other_fields
- [data, {:key_field => key_field, :fields => fields, :type => type, :case_insensitive => case_insensitive, :namespace => namespace, :datadir => options[:datadir], :identifiers => options[:identifiers], :cast => !!cast}]
+ [data, {:key_field => key_field, :fields => fields, :type => type, :case_insensitive => case_insensitive, :namespace => namespace, :datadir => options[:datadir], :identifiers => options[:identifiers], :cast => (cast.nil? ? false : cast)}]
end
end