lib/external/base.rb in external-0.1.0 vs lib/external/base.rb in external-0.3.0

- old
+ new

@@ -1,85 +1,212 @@ -require 'external/io' -require 'external/chunkable' -require 'external/enumerable' +# For some inexplicable reason yaml MUST be required before +# tempfile in order for ExtArrTest::test_LSHIFT to pass. +# Otherwise it fails with 'TypeError: allocator undefined for Proc' + +require 'yaml' require 'tempfile' +require 'external/enumerable' +require 'external/io' + module External - #-- - # Base provides the basic array functionality shared by ExtArr and Index, - # essentially wrapping the IO functions required to access and utilized external - # array data with the standard array functions. Bases can be opened with - # in any of the IO modes; the capabilities of Base will be reduced accordingly - # (ie read-only Bases cannot write values using []=, for instance). - # - # It is VERY IMPORTANT to realize that the underlying IO will be opened using the - # given mode. The 'w' mode will overwrite all existing data; 'r+' is a safer mode - # for full read-write functionality. Note that since Base actively scans over - # the IO, append modes essentially behaves like write, but does not overwrite existing - # data. - # - # To work properly, Base must be subclassed with methods: - # * length - # * io_fetch - #++ - # - # + # Base provides shared IO and Array-like methods used by ExternalArchive, + # ExternalArray, and ExternalIndex. class Base class << self - def open(fd=nil, mode="r", options={}) - fd = File.open(fd, mode) unless fd == nil - ab = self.new(fd, options) + + # Initializes an instance of self with File.open(path, mode) as an io. + # As with File.open, the instance will be passed to the block and + # closed when the block returns. If no block is given, open returns + # the new instance. + # + # Nil may be provided as an fd, in which case a Tempfile will be + # used (in which case mode gets ignored as Tempfiles always open + # in 'r+' mode). + def open(path=nil, mode="rb", *argv) + path = File.open(path, mode) unless path == nil + base = new(path, *argv) if block_given? begin - yield(ab) + yield(base) ensure - ab.close + base.close end else - ab + base end end end include External::Enumerable include External::Chunkable - + + # The underlying io for self. attr_reader :io - # Initializes a new Base given the file descriptor, mode and options. - # (see open_io for details on what io is opened for a given file descriptor) - # - # If mode contains an 's', then the Base will be initialized in strio - # mode where the underlying IO will be a StringIO. In this case the fd - # will be used as the string to initialize the StringIO. - # - # Standard options for Base include: - # nil_value:: the value written to file for nils, and converted to nil on read - # (default ' ') - # max_gap:: the maximum gap size used by Offset (default 10000) - # max_chunk_size:: the chunk size used by Offset (default 1M) + # The default tempfile basename for Base instances + # initialized without an io. + TEMPFILE_BASENAME = "external_base" + + # Creates a new instance of self with the specified io. A + # nil io causes initialization with a Tempfile; a string + # io will be converted into a StringIO. def initialize(io=nil) - self.io = (io.nil? ? Tempfile.new("array_base") : io) + self.io = case io + when nil then Tempfile.new(TEMPFILE_BASENAME) + when String then StringIO.new(io) + else io + end + + @enumerate_to_a = true end # True if io is closed. def closed? io.closed? end - # Closes io. - def close + # Closes io. If a path is specified, io will be dumped to it. If + # io is a File or Tempfile, the existing file is moved (not dumped) + # to path. Raises an error if path already exists and overwrite is + # not specified. + def close(path=nil, overwrite=false) + result = !io.closed? + + if path + if File.exists?(path) && !overwrite + raise ArgumentError, "already exists: #{path}" + end + + case io + when File, Tempfile + io.close unless io.closed? + FileUtils.move(io.path, path) + else + io.flush + io.rewind + File.open(path, "w") do |file| + file << io.read(io.default_blksize) while !io.eof? + end + end + end + io.close unless io.closed? + result end + # Flushes the io and resets the io length. Returns self + def flush + io.flush + io.reset_length + self + end + + # Returns a duplicate of self. This can be a slow operation + # as it may involve copying the full contents of one large + # file to another. + def dup + flush + another.concat(self) + end + + # Returns another instance of self. Must be + # implemented in a subclass. + def another + raise NotImplementedError + end + + ########################### + # Array methods + ########################### + + # Returns true if _self_ contains no elements + def empty? + length == 0 + end + + def eql?(another) + self == another + end + + # Returns the first n entries (default 1) + def first(n=nil) + n.nil? ? self[0] : self[0,n] + end + + # Alias for [] + def slice(one, two = nil) + self[one, two] + end + + # Returns self. + #-- + # Warning -- errors show up when this doesn't return + # an Array... however to return an array with to_ary + # may mean converting a Base into an Array for + # insertions... see/modify convert_to_ary + def to_ary + self + end + + # + def inspect + "#<#{self.class}:#{object_id} #{ellipse_inspect(self)}>" + end + protected - # Sets io and extends the input io with External::Position. - def io=(io) - io.extend External::IO unless io.kind_of?(External::IO) + # Sets io and extends the input io with Io. + def io=(io) # :nodoc: + io.extend Io unless io.kind_of?(Io) @io = io end + + # converts obj to an int using the <tt>to_int</tt> + # method, if the object responds to <tt>to_int</tt> + def convert_to_int(obj) # :nodoc: + obj.respond_to?(:to_int) ? obj.to_int : obj + end + # converts obj to an array using the <tt>to_ary</tt> + # method, if the object responds to <tt>to_ary</tt> + def convert_to_ary(obj) # :nodoc: + obj == nil ? [] : obj.respond_to?(:to_ary) ? obj.to_ary : [obj] + end + + # a more array-compliant version of Chunkable#split_range + def split_range(range, total=length) # :nodoc: + # split the range + start = convert_to_int(range.begin) + raise TypeError, "can't convert #{range.begin.class} into Integer" unless start.kind_of?(Integer) + start += total if start < 0 + + finish = convert_to_int(range.end) + raise TypeError, "can't convert #{range.end.class} into Integer" unless finish.kind_of?(Integer) + finish += total if finish < 0 + + length = finish - start + length -= 1 if range.exclude_end? + + [start, length] + end + + # helper to inspect large arrays + def ellipse_inspect(array) # :nodoc: + if array.length > 10 + "[#{collect_join(array[0,5])} ... #{collect_join(array[-5,5])}] (length = #{array.length})" + else + "[#{collect_join(array.to_a)}]" + end + end + + # another helper to inspect large arrays + def collect_join(array) # :nodoc: + array.collect do |obj| + obj.inspect + end.join(', ') + end + end end \ No newline at end of file