require 'external/base' require 'ext_ind' require 'yaml' #-- # later separate out individual objects logically # If writing, create new files: # - base/object_id.aio (new file for recieving appends) # - base/object_id.index (copy of existing index -- made on first insertion) # - in index, -index indicates object_id.aio file whereas +index indicates original file # - .consolidate(rename) resolves changes in index into the object_id file, renaming as needed # requires index rewrite as well, to remove negatives # # If appending, ONLY allow << and all changes get committed to the original file. # # This should allow returning of new arrayio objects under read/write conditions # By default read-only. No insertions. New ExtArr objects inherit parent mode. # # Independent modes: # - r # - r+ # - For safety, w/w+ will by default act as r/r+, simply creating new .aio and .index files # changes to the originals will NOT be made unless .consolidate(rename) is used. Allow option io_w => true # - b ALWAYS on with Windows #++ class ExtArr < External::Base class << self def [](*args) ab = self.new args.each do |arg| ab[ab.length] = arg end ab end def default_index_options {:format => 'II', :nil_value => [0,0], :cached => true} end def default_index_filepath(filepath) filepath.chomp(File.extname(filepath)) + '.index' end end attr_reader :index, :pos_index, :length_index def initialize(io=nil, options={}) super(io) @max_gap = 10000 @max_chunk_size = 1000000 @pos_index = options[:pos_index] == nil ? 0 : options[:pos_index] @length_index = options[:length_index] == nil ? 1 : options[:length_index] # TODO -- merge in specified index options index_options = self.class.default_index_options # determine the index file. if a file is specified, # use it, otherwise infer the index filepath from # the io. Note a nil index_file is ok -- this simply # means the index file will be a Tempfile index_file = if options.has_key?(:index_file) options[:index_file] elsif io.kind_of?(File) self.class.default_index_filepath(io.path) else nil end # ensure the file exists before trying to open it if io.kind_of?(File) && !File.exists?(index_file) FileUtils.touch(index_file) end @index = ExtInd.open(index_file, "r+", index_options) end def closed? super && (!index.respond_to?(:close) || index.closed?) end def close super index.close unless !index.respond_to?(:close) || index.closed? end def reindex index.clear io.flush unless io.generic_mode == "r" io.rewind if block_given? yield(index) else current_pos = 0 entry_begin = 0 io_length = io.length io.each_line('---') do |line| # Note positions MUST be built up using line.length # io.pos cannot return positions greater than ~2.1e9 current_pos += line.length entry_end = current_pos - (current_pos == io_length ? 0 : 3) unless entry_begin == entry_end index.unframed_write [entry_begin, entry_end-entry_begin] entry_begin = entry_end end end end self end def str_to_entry(str) str == nil ? nil : YAML.load(str) end def entry_to_str(entry) # could chop the beginning and end to save space # (ie "--- blah\n" => "blah") but there would be # a tradeoff for time. especially true for Numerics # which don't need anything to be retranslated properly entry.to_yaml end def entry_pos(index) index[pos_index] end def entry_length(index) index[length_index] end ########################### # Array methods ########################### # def &(another) # not_implemented # end # def *(arg) # not_implemented # end def +(another) self.concat(another) end # def -(another) # not_implemented # end def <<(obj) self[length] = obj self end def <=>(another) case another when Array if another.length < self.length # if another is equal to the matching subset of self, # then self is obviously the longer array and wins. result = (self.to_a(another.length) <=> another) result == 0 ? 1 : result else self.to_a <=> another end when ExtArr # if indexes are equal, additional # 'quick' comparisons are allowed if self.index == another.index # equal in comparison if the ios are equal return 0 if self.io.quick_compare(another.io) end self.io.flush another.io.flush # should chunk compare if another.length > self.length result = (self.to_a <=> another.to_a(self.length)) result == 0 ? -1 : result elsif another.length < self.length result = (self.to_a(another.length) <=> another.to_a) result == 0 ? 1 : result else self.to_a <=> another.to_a end else raise TypeError.new("can't convert from #{another.class} to ExtArr or Array") end end def ==(another) case another when Array # test simply based on length return false unless self.length == another.length # compare arrays self.to_a == another when ExtArr # test simply based on length return false unless self.length == another.length # if indexes are equal, additional # 'quick' comparisons are allowed if self.index == another.index # equal in comparison if the ios are equal return true if (self.io.sort_compare(another.io, (self.index.buffer_size/2).ceil)) == 0 end # compare arrays self.to_a == another.to_a else false end end def [](input, length=nil) # two call types are required because while ExtInd can take # a nil length, Array cannot and index can be either entries = (length == nil ? index[input] : index[input, length]) # for conformance with array range retrieval return entries if entries.nil? || entries.empty? if length == nil && !input.kind_of?(Range) pos, length = # single entry, just read it io.pos = entry_pos(entries) str_to_entry( io.read(entry_length(entries)) ) else pos = nil entries.collect do |entry| # only set io position if necessary epos = entry_pos(entry) unless pos == epos pos = epos io.pos = pos end elen = entry_length(entry) pos += elen # read entry str_to_entry( io.read(elen) ) end end end def []=(*args) raise ArgumentError.new("wrong number of arguments (1 for 2)") if args.length < 2 index, length, value = args value = length if args.length == 2 if index.kind_of?(Range) raise TypeError.new("can't convert Range into Integer") if args.length == 3 # for conformance with setting a range with nil (truncates) value = [] if value.nil? offset, length = split_range(index) return (self[offset, length + 1] = value) end index += self.length if index < 0 raise IndexError.new("index #{index} out of range") if index < 0 entry_pos = self.io.length io.pos = entry_pos if args.length == 2 #value = self.to_a if value.kind_of?(ExtInd) # write entry to io first as a check # that io is open for writing. entry_length = io.write( entry_to_str(value) ) io.length += entry_length self.index[index] = [entry_pos, entry_length] else indicies = [] values = case value when Array then value when ExtArr if value.object_id == self.object_id # special case, self will be reading and # writing from the same io, producing # incorrect results # potential to load a huge amount of data self.to_a else value end else [value] end values.each do |value| entry_length = io.write( entry_to_str(value) ) indicies << [entry_pos, entry_length] io.length += entry_length entry_pos += entry_length end self.index[index, length] = indicies end end # def abbrev(pattern=nil) # not_implemented # end # def assoc(obj) # not_implemented # end # Returns entry at index def at(index) self[index] end # Removes all elements from _self_. def clear io.truncate(0) index.clear self end # def compact # not_implemented # end # def compact! # not_implemented # end def concat(another) case another when Array, ExtArr another.each {|item| self[length] = item } else raise TypeError.new("can't convert #{another.class} into ExtArr or Array") end self end # def dclone # not_implemented # end # def delete(obj) # not_implemented # end # def delete_at(index) # not_implemented # end # def delete_if # :yield: item # not_implemented # end def each(&block) # :yield: item pos = nil index.each do |entry| # only set io position if necessary unless pos == entry[pos_index] pos = entry[pos_index] io.pos = pos end pos += entry[length_index] # yield entry yield str_to_entry( io.read(entry[length_index]) ) end self end # def each_index(&block) # :yield: index 0.upto(length-1, &block) self end # Returns true if _self_ contains no elements def empty? length == 0 end def eql?(another) self == another end # def fetch(index, default=nil, &block) # index += index_length if index < 0 # val = (index >= length ? default : self[index]) # block_given? ? yield(val) : val # end # # def fill(*args) # not_implemented # end # Returns the first n entries (default 1) def first(n=nil) n.nil? ? self[0] : self[0,n] end # def flatten # not_implemented # end # def flatten! # not_implemented # end # def frozen? # not_implemented # end # def hash # not_implemented # end # def include?(obj) # not_implemented # end # def index(obj) # not_implemented # end # # def indexes(*args) # values_at(*args) # end # # def indicies(*args) # values_at(*args) # end # def replace(other) # not_implemented # end # def insert(index, *obj) # self[index] = obj # end # def inspect # not_implemented # end # def join(sep=$,) # not_implemented # end # Returns the last n entries (default 1) def last(n=nil) return self[-1] if n.nil? start = length-n start = 0 if start < 0 self[start, n] end # Returns the number of entries in self def length index.length end # def nitems # not_implemented # end # def pack(aTemplateString) # not_implemented # end # def pop # not_implemented # end # def pretty_print(q) # not_implemented # end # def pretty_print_cycle(q) # not_implemented # end # def push(*obj) # not_implemented # end # def quote # not_implemented # end # def rassoc(key) # not_implemented # end # def replace(another) # not_implemented # end # def reverse # not_implemented # end # def reverse! # not_implemented # end # def reverse_each(&block) # reverse_chunk do |offset, length| # self[offset, length].reverse_each(&block) # end # end # def rindex(obj) # not_implemented # end # def select # :yield: item # not_implemented # end # def shift # not_implemented # end # Alias for length def size length end # def slice(*args) # self.call(:[], *args) # end # def slice!(*args) # not_implemented # end def to_a(length=self.length) length == 0 ? [] : self[0, length] end # def to_ary # not_implemented # end # Returns _self_.join. # def to_s # self.join # end # def to_yaml(opts={}) # self[0, self.length].to_yaml(opts) # end # def transpose # not_implemented # end # def uniq # not_implemented # end # def uniq! # not_implemented # end # def unshift(*obj) # not_implemented # end # Returns an array containing the chars in io corresponding to the given # selector(s). The selectors may be either integer indices or ranges # def values_at(*selectors) # selectors.collect {|s| self[s]}.flatten # end # def yaml_initialize(tag, val) # not_implemented # end # def |(another) # not_implemented # end end # class Hold # :nodoc: # # # # # # def each_with_slice(&block) # # index.each do |slice| # # yield( read(slice), slice ) # # end # # end # # # def scan_collect(indicies=index, &block) # return indicies if indicies.nil? || indicies.empty? # # collection = [] # span(*indicies) do |offset, length, spans| # io.pos = offset # scanner = StringScanner.new(io.read(length)) # # spans.collect {|i| indicies[i]}.each do |span| # soffset, slength = span # sbegin = soffset - offset # send = sbegin + slength # # scanner.pos = sbegin # collection << yield(scanner, offset, span) # raise "Oops! Scanned beyond end [begin, end, index]: #{[sbegin, send, span]}" if scanner.pos > send # end # end # collection # end # # # # # # # def set_modes(mode) # # @uncached = parse_mode(mode, 'u') # # end # # # # def parse_mode(mode, let) # # if mode =~ Regexp.new(let, Regexp::IGNORECASE) # # mode.delete!(let) # # true # # else # # false # # end # # end # # #def default_span # # use this rather than io.length because for very large io, io.length is prone to errors # # last = index.last # # [0, last.first + last.last] # #end # # # def io_fetch(index, length) # # io.pos = index unless io.pos = index # # # # if length.nil? # # io.read(1) # # else # # results = [] # # str = io.read(length) # # str.each_char {|c| results << c} if str # # results # # end # # end # # # # # MUST increment io length to the end position if it writes past io.length # # def io_store(index, value) # # io.pos = index unless io.pos = index # # end_pos = index + io.write(value) # # io.length = end_pos if end_pos > io.length # # end # # def index=(index) # @index.close if @index.respond_to?(:close) # # # cache the index if specified # @index = if cached? # # if in cached mode, transform # # case index # when Array then index # when ExtInd # cached_index = index.read(nil, 0) # index.close # cached_index # when nil # if io.respond_to?(:path) # index_file = self.class.default_index_filepath(io.path) # ExtInd.read(index_file, index_options) # else # [] # end # else # raise "unknown index type: #{index}" # end # else # case index # when Array # uncached_index = ExtInd.new(nil, index_options) # uncached_index.write(index) # uncached_index # when ExtInd then index # when nil # if io.respond_to?(:path) # index_file = self.class.default_index_filepath(io.path) # ExtInd.open(index_file, index_options) # else # ExtInd.new(nil, index_options) # end # else # raise "unknown index type: #{index}" # end # end # # # end # end