lib/rbbt/util/tsv/index.rb in rbbt-util-3.2.0 vs lib/rbbt/util/tsv/index.rb in rbbt-util-3.2.1
- old
+ new
@@ -1,6 +1,7 @@
require 'rbbt/util/tsv/manipulate'
+require 'rbbt/util/tsv/filters'
require 'rbbt/util/fix_width_table'
class TSV
def index(options = {})
@@ -279,118 +280,140 @@
def guess_field(values)
field_matches(values).sort_by{|field, matches| matches.uniq.length}.last
end
- def sorted_index(pos_start = nil, pos_end = nil)
- raise "Please specify indexing fields" if (pos_start.nil? and fields.length > 2)
+ def pos_index(pos_field = nil, options = {})
+ pos_field ||= "Position"
- case
- when (pos_start.nil? and pos_end.nil? and fields.length == 2)
- pos_start = fields.first
- pos_end = fields.last
- when (pos_start.nil? and pos_end.nil? and fields.length == 1)
- pos_start = fields.first
- end
+ options = Misc.add_defaults options,
+ :persistence => true, :persistence_file => nil, :persistence_update => false
- range = ! pos_end.nil?
+ prefix = "Pos[#{pos_field}]"
- index = Persistence.persist(filename, "SortedIndex[#{range ? pos_start + ":" + pos_end : pos_start}]", :fwt, :start => pos_start, :end => pos_end, :range => range) do |filename, options|
- pos_start, pos_end, range = Misc.process_options options, :start, :end, :range
- data = case
- when (type == :double and range)
- collect do |key, values|
- p_start, p_end = values.values_at pos_start, pos_end
- next if p_start.nil? or p_end.nil? or p_start.empty? or p_end.empty?
- [[p_start.first, p_end.first], key]
- end
- when (type == :double and not range)
- collect do |key, values|
- p_start = values.values_at pos_start
- next if p_start.nil? or p_start.empty?
- [p_start.first, key]
- end
- when range
- slice [pos_start, pos_end]
- else
- slice pos_start
- end
- data
- end
+ Persistence.persist(filename, prefix, :fwt, options.merge({
+ :pos_field => pos_field,
+ :filters => (self.respond_to?(:filters)? filters.collect{|f| [f.match, f.value]} : [])
+ })) do |file, options, filename|
+ pos_field = options[:pos_field]
+ value_size = 0
+ index_data = []
- index
+ through :key, pos_field do |key, values|
+ value_size = key.length if key.length > value_size
+
+ pos = values.first
+ if Array === pos
+ pos.each do |p|
+ index_data << [key, p.to_i]
+ end
+ else
+ index_data << [key, pos.to_i]
+ end
+ end
+
+ index = FixWidthTable.get(:memory, value_size, false)
+ index.add_point index_data
+ index.read
+ index
+ end
end
- def pos_index(pos_field, file = nil, update = false)
- value_size = 0
- index_data = []
+ def self.pos_index(file, pos_field = nil, options = {})
+ options = Misc.add_defaults options,
+ :persistence => true, :persistence_file => nil, :persistence_update => false, :persistence_source => file, :tsv_serializer => :list,
+ :data_persistence => false, :data_persistence_file => nil, :data_persistence_update => false, :data_persistence_source => file
- file ||= filename + "-PosIndex[#{ pos_field }]" if filename
-
- through :key, pos_field do |key, values|
- value_size = key.length if key.length > value_size
+ options_data = {
+ :persistence => Misc.process_options(options, :data_persistence),
+ :persistence_file => Misc.process_options(options, :data_persistence_file),
+ :persistence_update => Misc.process_options(options, :data_persistence_update),
+ :persistence_source => Misc.process_options(options, :data_persistence_source),
+ }
- pos = values.first
- if Array === pos
- pos.each do |p|
- index_data << [key, p.to_i]
+
+ prefix = "Pos[#{pos_field}]"
+
+ new = Persistence.persist(file, prefix, :fwt, options.merge({:pos_field => pos_field})) do |file, options, filename|
+ tsv = TSV.new(file, :list, options_data)
+
+ if options.include?(:filters) and Array === options[:filters] and not options[:filters].empty?
+ tsv.filter
+ options[:filters].each do |match, value, persistence|
+ tsv.add_filter(match, value, persistence)
end
- else
- index_data << [key, pos.to_i]
end
- end
-
- pos_index = case
- when file == :memory
- index = FixWidthTable.new(file, value_size, false)
- index.add_point index_data
- index
- when (update or not File.exists? file)
- index = FixWidthTable.new(file, value_size, false, true)
- index.add_point index_data
- index
- else
- FixWidthTable.new(file, value_size, false)
- end
- pos_index
+ tsv.pos_index options[:pos_field], options.merge(:persistence => false, :persistence_file => nil)
+ end
end
- def range_index(start_field, end_field, file = nil, update = false)
- value_size = 0
- index_data = []
+ def range_index(start_field = nil, end_field = nil, options = {})
+ start_field ||= "Start"
+ end_field ||= "End"
+ options = Misc.add_defaults options,
+ :persistence => true, :persistence_file => nil, :persistence_update => false
- file ||= filename + "-PosIndex[#{ start_field }-#{end_field}]" if filename
-
- through :key, [start_field, end_field] do |key, values|
- value_size = key.length if key.length > value_size
+ prefix = "Range[#{start_field}-#{end_field}]"
- start_pos, end_pos = values
-
- if Array === start_pos
- start_pos.zip(end_pos).each do |s,e|
- index_data << [key, [s.to_i, e.to_i]]
+ Persistence.persist(filename, prefix, :fwt, options.merge({
+ :start_field => start_field, :end_field => end_field,
+ :filters => (self.respond_to?(:filters)? filters.collect{|f| [f.match, f.value]} : [])
+ })) do |file, options, filename|
+ start_field, end_field = options.values_at :start_field, :end_field
+
+ value_size = 0
+ index_data = []
+
+ through :key, [start_field, end_field] do |key, values|
+ value_size = key.length if key.length > value_size
+
+ start_pos, end_pos = values
+
+ if Array === start_pos
+ start_pos.zip(end_pos).each do |s,e|
+ index_data << [key, [s.to_i, e.to_i]]
+ end
+ else
+ index_data << [key, [start_pos.to_i, end_pos.to_i]]
end
- else
- index_data << [key, [start_pos.to_i, end_pos.to_i]]
end
+
+ index = FixWidthTable.get(:memory, value_size, true)
+ index.add_range index_data
+ index.read
+ index
end
-
- pos_index = case
- when file == :memory
- index = FixWidthTable.get(file, value_size, true)
- index.add_range index_data
- index.read
- index
- when (update or not File.exists?(file))
- index = FixWidthTable.get(file, value_size, true, true)
- index.add_range index_data
- index.read
- index
- else
- FixWidthTable.get(file, value_size, true)
- end
+ end
- pos_index
+ def self.range_index(file, start_field = nil, end_field = nil, options = {})
+ options = Misc.add_defaults options,
+ :persistence => true, :persistence_file => nil, :persistence_update => false, :persistence_source => file, :tsv_serializer => :list,
+ :data_persistence => false, :data_persistence_file => nil, :data_persistence_update => false, :data_persistence_source => file
+
+ options_data = {
+ :persistence => Misc.process_options(options, :data_persistence),
+ :persistence_file => Misc.process_options(options, :data_persistence_file),
+ :persistence_update => Misc.process_options(options, :data_persistence_update),
+ :persistence_source => Misc.process_options(options, :data_persistence_source),
+ }
+
+ prefix = "Range[#{start_field}-#{end_field}]"
+
+ options_data[:type] = :flat if options[:order] == false
+
+ Persistence.persist(file, prefix, :fwt, options.merge({:start_field => start_field, :end_field => end_field})) do |file, options, filename|
+ tsv = TSV.new(file, :list, options_data)
+
+ if options.include?(:filters) and Array === options[:filters] and not options[:filters].empty?
+ tsv.filter
+ options[:filters].each do |match, value, persistence|
+ tsv.add_filter(match, value, persistence)
+ end
+ end
+
+ tsv.range_index options[:start_field], options[:end_field], options.merge(:persistence => false, :persistence_file => nil)
+ end
end
end
+