lib/rbbt/util/tsv/index.rb in rbbt-util-3.1.0 vs lib/rbbt/util/tsv/index.rb in rbbt-util-3.2.0
- old
+ new
@@ -11,11 +11,11 @@
"Index[#{options[:target]}]"
else
"Index[:key]"
end
- new = Persistence.persist(self, prefix, :tsv, options) do |tsv, options, filename|
+ Persistence.persist(self, prefix, :tsv, options) do |tsv, options, filename|
order, target, fields, case_insensitive = Misc.process_options options, :order, :target, :fields, :case_insensitive
new = {}
## Ordered
@@ -32,11 +32,11 @@
values.each_with_index do |list,i|
list = [list] unless Array === list
i += 1 if fields.nil?
list.each do |elem|
- next if elem.empty?
+ next if elem.nil? or elem.empty?
elem.downcase if case_insensitive
new[elem] ||= []
new[elem][i] ||= []
new[elem][i].concat keys
end
@@ -54,14 +54,13 @@
end
# flatten
new.each do |key, values|
- values.flatten!
- values.compact!
+ new[key] = values.flatten.compact
end
-
+
## Not ordered
else
double_keys = true unless type != :double or identify_field(target) == :key
new.each do |key, fields| fields.flatten! end
@@ -72,11 +71,11 @@
else
list = values.flatten unless type == :flat
end
list.collect!{|e| e.downcase} if case_insensitive
list.each do |elem|
- next if elem.empty?
+ next if elem.nil? or elem.empty?
new[elem] ||= []
if double_keys
new[elem].concat key
else
new[elem] << key
@@ -101,11 +100,13 @@
when new_key_field.nil?
nil
else
[new_key_field]
end
+
new = TSV.new([new, {:namespace => namespace, :key_field => key_field, :fields => fields, :type => :flat, :filename => (filename.nil? ? nil : "Index:" + filename), :case_insensitive => case_insensitive}])
+
new
end
end
def self.index(file, options = {})
@@ -291,11 +292,11 @@
pos_start = fields.first
end
range = ! pos_end.nil?
- index = Persistence.persist(filename, "SortedIndex[#{range ? pos_start + ":" + pos_end: pos_start}]", :fwt, :start => pos_start, :end => pos_end, :range => range) do |filename, options|
+ index = Persistence.persist(filename, "SortedIndex[#{range ? pos_start + ":" + pos_end : pos_start}]", :fwt, :start => pos_start, :end => pos_end, :range => range) do |filename, options|
pos_start, pos_end, range = Misc.process_options options, :start, :end, :range
data = case
when (type == :double and range)
collect do |key, values|
p_start, p_end = values.values_at pos_start, pos_end
@@ -317,8 +318,79 @@
end
index
end
+ def pos_index(pos_field, file = nil, update = false)
+ value_size = 0
+ index_data = []
-end
+ file ||= filename + "-PosIndex[#{ pos_field }]" if filename
+
+ through :key, pos_field do |key, values|
+ value_size = key.length if key.length > value_size
+ pos = values.first
+ if Array === pos
+ pos.each do |p|
+ index_data << [key, p.to_i]
+ end
+ else
+ index_data << [key, pos.to_i]
+ end
+ end
+
+ pos_index = case
+ when file == :memory
+ index = FixWidthTable.new(file, value_size, false)
+ index.add_point index_data
+ index
+ when (update or not File.exists? file)
+ index = FixWidthTable.new(file, value_size, false, true)
+ index.add_point index_data
+ index
+ else
+ FixWidthTable.new(file, value_size, false)
+ end
+
+ pos_index
+ end
+
+ def range_index(start_field, end_field, file = nil, update = false)
+ value_size = 0
+ index_data = []
+
+ file ||= filename + "-PosIndex[#{ start_field }-#{end_field}]" if filename
+
+ through :key, [start_field, end_field] do |key, values|
+ value_size = key.length if key.length > value_size
+
+ start_pos, end_pos = values
+
+ if Array === start_pos
+ start_pos.zip(end_pos).each do |s,e|
+ index_data << [key, [s.to_i, e.to_i]]
+ end
+ else
+ index_data << [key, [start_pos.to_i, end_pos.to_i]]
+ end
+ end
+
+ pos_index = case
+ when file == :memory
+ index = FixWidthTable.get(file, value_size, true)
+ index.add_range index_data
+ index.read
+ index
+ when (update or not File.exists?(file))
+ index = FixWidthTable.get(file, value_size, true, true)
+ index.add_range index_data
+ index.read
+ index
+ else
+ FixWidthTable.get(file, value_size, true)
+ end
+
+ pos_index
+ end
+
+end