lib/rbbt/tsv/change_id.rb in rbbt-util-5.14.33 vs lib/rbbt/tsv/change_id.rb in rbbt-util-5.14.34
- old
+ new
@@ -1,7 +1,9 @@
require 'rbbt/tsv'
+require 'rbbt/persist'
+
module TSV
def self.change_key(tsv, format, options = {}, &block)
options = Misc.add_defaults options, :persist => false, :identifiers => tsv.identifiers
identifiers, persist_input = Misc.process_options options, :identifiers, :persist_input
@@ -82,7 +84,124 @@
def swap_id(*args)
TSV.swap_id(self, *args)
end
+ def self.translation_index(files, target = nil, source = nil, options = {})
+ return nil if source == target
+ options = Misc.add_defaults options.dup, :persist => true
+ fields = source ? [source] : nil
+ files.each do |file|
+ if TSV === file
+ all_fields = file.all_fields
+ target = file.fields.first if target.nil?
+ return file.index(options.merge(:target => target, :fields => fields, :order => true)) if (source.nil? or all_fields.include? source) and all_fields.include? target
+ else
+ all_fields = TSV.parse_header(file).all_fields
+ target = all_fields[1] if target.nil?
+ return TSV.index(file, options.merge(:target => target, :fields => fields, :order => true)) if (source.nil? or all_fields.include? source) and all_fields.include? target
+ end
+ end
+ files.each do |file|
+ all_fields = TSV === file ? file.all_fields : TSV.parse_header(file).all_fields
+
+ files.each do |other_file|
+ next if file == other_file
+
+ other_all_fields = TSV === other_file ? other_file.all_fields : TSV.parse_header(other_file).all_fields
+
+ common_field = (all_fields & other_all_fields).first
+
+ if common_field and (source.nil? or fields.include? source) and all_fields.include? common_field and
+ other_all_fields.include? common_field and other_all_fields.include? target
+
+ return Persist.persist_tsv(nil, Misc.fingerprint(files), {:files => files, :source => source, :target => target}, :prefix => "Translation index", :persist => options[:persist]) do |data|
+ index = TSV === file ?
+ file.index(options.merge(:target => common_field, :fields => fields)) :
+ TSV.index(file, options.merge(:target => common_field, :fields => fields))
+
+ other_index = TSV === other_file ?
+ other_file.index(options.merge(:target => target, :fields => [common_field])) :
+ TSV.index(other_file, options.merge(:target => target, :fields => [common_field]))
+
+ data.serializer = :clean
+ data.merge! index.to_list.attach(other_index.to_list).slice([target]).to_single
+
+ data
+ end
+ end
+ end
+ end
+ return nil
+ end
+
+ def self.translate(tsv, *args)
+ new = TSV.open translate_stream(tsv, *args)
+ new.identifiers = tsv.identifiers
+ new
+ end
+
+ def self.translate_stream(tsv, field, format, options = {}, &block)
+ options = Misc.add_defaults options, :persist => false, :identifier_files => tsv.identifier_files, :compact => true
+
+ identifier_files, identifiers, persist_input, compact = Misc.process_options options, :identifier_files, :identifiers, :persist, :compact
+ identifier_files = [tsv, identifiers].compact if identifier_files.nil? or identifier_files.empty?
+
+ identifier_files.uniq!
+
+ index = translation_index identifier_files, format, field, options.dup
+ raise "No index: #{Misc.fingerprint([identifier_files, field, format])}" if index.nil?
+
+ orig_type = tsv.type
+ tsv = tsv.to_double if orig_type != :double
+
+ pos = tsv.identify_field field
+
+ new_options = tsv.options
+ new_options[:identifiers] = tsv.identifiers.find if tsv.identifiers
+
+ case pos
+ when :key
+ new_options[:key_field] = format if tsv.key_field == field
+ dumper = TSV::Dumper.new new_options
+ dumper.init
+ TSV.traverse tsv, :into => dumper do |key,values|
+ new_key = index[key]
+ [new_key, values]
+ end
+ else
+ new_options[:fields] = tsv.fields.collect{|f| f == field ? format : f }
+ dumper = TSV::Dumper.new new_options
+ dumper.init
+
+ case tsv.type
+ when :double
+ TSV.traverse tsv, :into => dumper do |key,values|
+ original = values[pos]
+ new = index.values_at *original
+ values[pos] = new
+ [key, values]
+ end
+ when :list
+ TSV.traverse tsv, :into => dumper do |key,values|
+ original = values[pos]
+ new = index[original]
+ values[pos] = new
+ [key, values]
+ end
+ when :flat
+ TSV.traverse tsv, :into => dumper do |key,values|
+ new = index.values_at *values
+ [key, new]
+ end
+ when :single
+ TSV.traverse tsv, :into => dumper do |key,original|
+ new = index[original]
+ [key, new]
+ end
+ end
+ end
+
+ dumper.stream
+ end
end