lib/rbbt/util/tsv/attach.rb in rbbt-util-3.0.3 vs lib/rbbt/util/tsv/attach.rb in rbbt-util-3.1.0
- old
+ new
@@ -45,17 +45,21 @@
case
when (String === file1 and not file1.index("\n") and file1.length < 250 and File.exists?(file1))
file1 = CMD.cmd("sort -k1,1 -t'#{sep}' #{ file1 } | grep -v '^#{sep}' ", :pipe => true)
when (String === file1 or StringIO === file1)
file1 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file1, :pipe => true)
+ when TSV === file1
+ file1 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file1.to_s(:sort, true), :pipe => true)
end
case
when (String === file2 and not file2.index("\n") and file2.length < 250 and File.exists?(file2))
file2 = CMD.cmd("sort -k1,1 -t'#{sep}' #{ file2 } | grep -v '^#{sep}' ", :pipe => true)
when (String === file2 or StringIO === file2)
file2 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file2, :pipe => true)
+ when TSV === file2
+ file2 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file2.to_s(:sort, true), :pipe => true)
end
output = File.open(output, 'w') if String === output
cols1 = nil
@@ -113,114 +117,10 @@
end
end
output.close
end
-
- def self.paste(file1, file2, output, sep = "\t")
- case
- when (String === file1 and not file1.index("\n") and file1.length < 250 and File.exists?(file1))
- file1 = CMD.cmd("sort -k1,1 -t'#{sep}' #{ file1 } ", :pipe => true)
- when String === file1
- file1 = CMD.cmd("sort -k1,1 -t'#{sep}'", :in => file1, :pipe => true)
- end
-
- case
- when (String === file2 and not file2.index("\n") and file2.length < 250 and File.exists?(file2))
- file2 = CMD.cmd("sort -k1,1 -t'#{sep}' #{ file2 } ", :pipe => true)
- when String === file2
- file2 = CMD.cmd("sort -k1,1 -t'#{sep}'", :in => file2, :pipe => true)
- end
-
- output = File.open(output, 'w') if String === output
-
- cols1 = nil
- cols2 = nil
-
- done1 = false
- done2 = false
-
- while (line1 = file1.gets) =~ /#/; end
- line1.strip!
- parts1 = line1.split(sep)
- key1 = parts1.shift
- cols1 = parts1.length
-
- while (line2 = file2.gets) =~ /#/; end
- line2.strip!
- parts2 = line2.split(sep)
- key2 = parts2.shift
- cols2 = parts2.length
- while not (done1 or done2)
- case
- when key1 < key2
- output.puts [key1, parts1, [""] * cols2] * sep
- if file1.eof?
- done1 = true
- else
- line1 = file1.gets
- line1.strip!
- parts1 = line1.split(sep)
- key1 = parts1.shift
- end
- when key2 < key1
- output.puts [key2, [""] * cols1, parts2] * sep
- if file2.eof?
- done2 = true
- else
- line2 = file2.gets
- line2.strip!
- parts2 = line2.split(sep)
- key2 = parts2.shift
- end
- when key1 == key2
- output.puts [key1, parts1, parts2] * sep
- if file1.eof?
- done1 = true
- else
- line1 = file1.gets
- line1.strip!
- parts1 = line1.split(sep)
- key1 = parts1.shift
- end
- if file2.eof?
- done2 = true
- else
- line2 = file2.gets
- line2.strip!
- parts2 = line2.split(sep)
- key2 = parts2.shift
- end
- end
- end
-
- while not done1
- output.puts [key1, parts1, [""] * cols2] * sep
- if file1.eof?
- done1 = true
- else
- line1 = file1.gets
- line1.strip!
- parts1 = line1.split(sep)
- key1 = parts1.shift
- end
- end
-
- while not done2
- output.puts [key2, [""] * cols1, parts2] * sep
- if file2.eof?
- done2 = true
- else
- line2 = file2.gets
- line2.strip!
- parts2 = line2.split(sep)
- key2 = parts2.shift
- end
- end
-
- output.close
- end
#{{{ Attach Methods
def attach_same_key(other, fields = nil)
fields = other.fields - [key_field].concat(self.fields) if fields.nil?
@@ -294,10 +194,12 @@
other = other.tsv unless TSV === other
field_positions = fields.collect{|field| other.identify_field field}
field_names = field_positions.collect{|pos| pos == :key ? other.key_field : other.fields[pos] }
+
+ length = self.fields.length
through do |key, values|
source_keys = index[key]
if source_keys.nil? or source_keys.empty?
all_new_values = []
else
@@ -313,29 +215,40 @@
end
else
other[source_key][pos]
end
end
- new_values.collect!{|v| [v]} if type == :double and not other.type == :double
+ new_values.collect!{|v| v.nil? ? [[]] : [v]} if type == :double and not other.type == :double
new_values.collect!{|v| v.nil? ? nil : v.first} if not type == :double and other.type == :double
all_new_values << new_values
end
end
if all_new_values.empty?
if type == :double
- self[key] = self[key].concat [[]] * field_positions.length
+ all_new_values = [[[]] * field_positions.length]
else
- self[key] = self[key].concat [""] * field_positions.length
+ all_new_values = [[""] * field_positions.length]
end
+ end
+
+ current = self[key]
+
+ if current.length > length
+ all_new_values << current.slice!(length..current.length - 1)
+ end
+
+ if type == :double
+ all_new_values = TSV.zip_fields(all_new_values).collect{|l| l.flatten}
else
- if type == :double
- self[key] = self[key].concat TSV.zip_fields(all_new_values).collect{|l| l.flatten}
- else
- self[key] = self[key].concat all_new_values.first
- end
+ all_new_values = all_new_values.first
end
+
+ current += all_new_values
+
+ self[key] = current
+
end
self.fields = self.fields.concat field_names
end
@@ -383,36 +296,37 @@
traversal_ids = path.collect{|p| p.first}
Log.medium "Found Traversal: #{traversal_ids * " => "}"
data_key, data_file = path.shift
- if data_key == data_file.key_field
- Log.debug "Data index not required '#{data_file.key_field}' => '#{data_key}'"
- data_index = nil
- else
- Log.debug "Data index required"
- data_index = data_file.index :target => data_key, :fields => data_file.key_field, :persistence => false
- end
+ data_index = if data_key == data_file.key_field
+ Log.debug "Data index not required '#{data_file.key_field}' => '#{data_key}'"
+ nil
+ else
+ Log.debug "Data index required"
+ data_file.index :target => data_key, :fields => data_file.key_field, :persistence => false
+ end
current_index = data_index
current_key = data_key
while not path.empty?
next_key, next_file = path.shift
if current_index.nil?
current_index = next_file.index :target => next_key, :fields => current_key, :persistence => persist_input
else
next_index = next_file.index :target => next_key, :fields => current_key, :persistence => persist_input
- current_index.process current_index.fields.first do |key, values, values|
+ current_index.process current_index.fields.first do |values|
if values.nil?
nil
else
next_index.values_at(*values).flatten.collect
end
end
current_index.fields = [next_key]
end
+ current_key = next_key
end
current_index
end
@@ -468,10 +382,12 @@
index = TSV.find_traversal(self, other, options)
raise "Cannot traverse identifiers" if index.nil?
attach_index other, index, fields
end
Log.medium("Attachment of fields:#{fields.inspect} from #{other.filename.inspect} finished.")
+
+ self
end
def detach(file)
file_fields = file.fields.collect{|field| field.fullname}
detached_fields = []
@@ -487,12 +403,23 @@
new.key_field = self.key_field unless self.key_field.nil?
if self.fields and other.fields
new.fields = self.fields + other.fields
end
-
+
FileUtils.rm tmpfile if File.exists? tmpfile
new
end
+
+ def paste(other, options = {})
+ TmpFile.with_file do |output|
+ TSV.paste_merge(self, other, output, options[:sep] || "\t")
+ TSV.new output, options
+ end
+ end
+
+ def self.fast_paste(files, delim = "$")
+ CMD.cmd("paste #{ files.collect{|f| "'#{f}'"} * " "} -d'#{delim}' |sed 's/#{delim}[^\\t]*//g'", :pipe => true)
+ end
end