lib/rbbt/util/tsv/index.rb in rbbt-util-3.1.0 vs lib/rbbt/util/tsv/index.rb in rbbt-util-3.2.0

- old
+ new

@@ -11,11 +11,11 @@ "Index[#{options[:target]}]" else "Index[:key]" end - new = Persistence.persist(self, prefix, :tsv, options) do |tsv, options, filename| + Persistence.persist(self, prefix, :tsv, options) do |tsv, options, filename| order, target, fields, case_insensitive = Misc.process_options options, :order, :target, :fields, :case_insensitive new = {} ## Ordered @@ -32,11 +32,11 @@ values.each_with_index do |list,i| list = [list] unless Array === list i += 1 if fields.nil? list.each do |elem| - next if elem.empty? + next if elem.nil? or elem.empty? elem.downcase if case_insensitive new[elem] ||= [] new[elem][i] ||= [] new[elem][i].concat keys end @@ -54,14 +54,13 @@ end # flatten new.each do |key, values| - values.flatten! - values.compact! + new[key] = values.flatten.compact end - + ## Not ordered else double_keys = true unless type != :double or identify_field(target) == :key new.each do |key, fields| fields.flatten! end @@ -72,11 +71,11 @@ else list = values.flatten unless type == :flat end list.collect!{|e| e.downcase} if case_insensitive list.each do |elem| - next if elem.empty? + next if elem.nil? or elem.empty? new[elem] ||= [] if double_keys new[elem].concat key else new[elem] << key @@ -101,11 +100,13 @@ when new_key_field.nil? nil else [new_key_field] end + new = TSV.new([new, {:namespace => namespace, :key_field => key_field, :fields => fields, :type => :flat, :filename => (filename.nil? ? nil : "Index:" + filename), :case_insensitive => case_insensitive}]) + new end end def self.index(file, options = {}) @@ -291,11 +292,11 @@ pos_start = fields.first end range = ! pos_end.nil? - index = Persistence.persist(filename, "SortedIndex[#{range ? pos_start + ":" + pos_end: pos_start}]", :fwt, :start => pos_start, :end => pos_end, :range => range) do |filename, options| + index = Persistence.persist(filename, "SortedIndex[#{range ? pos_start + ":" + pos_end : pos_start}]", :fwt, :start => pos_start, :end => pos_end, :range => range) do |filename, options| pos_start, pos_end, range = Misc.process_options options, :start, :end, :range data = case when (type == :double and range) collect do |key, values| p_start, p_end = values.values_at pos_start, pos_end @@ -317,8 +318,79 @@ end index end + def pos_index(pos_field, file = nil, update = false) + value_size = 0 + index_data = [] -end + file ||= filename + "-PosIndex[#{ pos_field }]" if filename + + through :key, pos_field do |key, values| + value_size = key.length if key.length > value_size + pos = values.first + if Array === pos + pos.each do |p| + index_data << [key, p.to_i] + end + else + index_data << [key, pos.to_i] + end + end + + pos_index = case + when file == :memory + index = FixWidthTable.new(file, value_size, false) + index.add_point index_data + index + when (update or not File.exists? file) + index = FixWidthTable.new(file, value_size, false, true) + index.add_point index_data + index + else + FixWidthTable.new(file, value_size, false) + end + + pos_index + end + + def range_index(start_field, end_field, file = nil, update = false) + value_size = 0 + index_data = [] + + file ||= filename + "-PosIndex[#{ start_field }-#{end_field}]" if filename + + through :key, [start_field, end_field] do |key, values| + value_size = key.length if key.length > value_size + + start_pos, end_pos = values + + if Array === start_pos + start_pos.zip(end_pos).each do |s,e| + index_data << [key, [s.to_i, e.to_i]] + end + else + index_data << [key, [start_pos.to_i, end_pos.to_i]] + end + end + + pos_index = case + when file == :memory + index = FixWidthTable.get(file, value_size, true) + index.add_range index_data + index.read + index + when (update or not File.exists?(file)) + index = FixWidthTable.get(file, value_size, true, true) + index.add_range index_data + index.read + index + else + FixWidthTable.get(file, value_size, true) + end + + pos_index + end + +end