lib/external/base.rb in external-0.1.0 vs lib/external/base.rb in external-0.3.0
- old
+ new
@@ -1,85 +1,212 @@
-require 'external/io'
-require 'external/chunkable'
-require 'external/enumerable'
+# For some inexplicable reason yaml MUST be required before
+# tempfile in order for ExtArrTest::test_LSHIFT to pass.
+# Otherwise it fails with 'TypeError: allocator undefined for Proc'
+
+require 'yaml'
require 'tempfile'
+require 'external/enumerable'
+require 'external/io'
+
module External
- #--
- # Base provides the basic array functionality shared by ExtArr and Index,
- # essentially wrapping the IO functions required to access and utilized external
- # array data with the standard array functions. Bases can be opened with
- # in any of the IO modes; the capabilities of Base will be reduced accordingly
- # (ie read-only Bases cannot write values using []=, for instance).
- #
- # It is VERY IMPORTANT to realize that the underlying IO will be opened using the
- # given mode. The 'w' mode will overwrite all existing data; 'r+' is a safer mode
- # for full read-write functionality. Note that since Base actively scans over
- # the IO, append modes essentially behaves like write, but does not overwrite existing
- # data.
- #
- # To work properly, Base must be subclassed with methods:
- # * length
- # * io_fetch
- #++
- #
- #
+ # Base provides shared IO and Array-like methods used by ExternalArchive,
+ # ExternalArray, and ExternalIndex.
class Base
class << self
- def open(fd=nil, mode="r", options={})
- fd = File.open(fd, mode) unless fd == nil
- ab = self.new(fd, options)
+
+ # Initializes an instance of self with File.open(path, mode) as an io.
+ # As with File.open, the instance will be passed to the block and
+ # closed when the block returns. If no block is given, open returns
+ # the new instance.
+ #
+ # Nil may be provided as an fd, in which case a Tempfile will be
+ # used (in which case mode gets ignored as Tempfiles always open
+ # in 'r+' mode).
+ def open(path=nil, mode="rb", *argv)
+ path = File.open(path, mode) unless path == nil
+ base = new(path, *argv)
if block_given?
begin
- yield(ab)
+ yield(base)
ensure
- ab.close
+ base.close
end
else
- ab
+ base
end
end
end
include External::Enumerable
include External::Chunkable
-
+
+ # The underlying io for self.
attr_reader :io
- # Initializes a new Base given the file descriptor, mode and options.
- # (see open_io for details on what io is opened for a given file descriptor)
- #
- # If mode contains an 's', then the Base will be initialized in strio
- # mode where the underlying IO will be a StringIO. In this case the fd
- # will be used as the string to initialize the StringIO.
- #
- # Standard options for Base include:
- # nil_value:: the value written to file for nils, and converted to nil on read
- # (default ' ')
- # max_gap:: the maximum gap size used by Offset (default 10000)
- # max_chunk_size:: the chunk size used by Offset (default 1M)
+ # The default tempfile basename for Base instances
+ # initialized without an io.
+ TEMPFILE_BASENAME = "external_base"
+
+ # Creates a new instance of self with the specified io. A
+ # nil io causes initialization with a Tempfile; a string
+ # io will be converted into a StringIO.
def initialize(io=nil)
- self.io = (io.nil? ? Tempfile.new("array_base") : io)
+ self.io = case io
+ when nil then Tempfile.new(TEMPFILE_BASENAME)
+ when String then StringIO.new(io)
+ else io
+ end
+
+ @enumerate_to_a = true
end
# True if io is closed.
def closed?
io.closed?
end
- # Closes io.
- def close
+ # Closes io. If a path is specified, io will be dumped to it. If
+ # io is a File or Tempfile, the existing file is moved (not dumped)
+ # to path. Raises an error if path already exists and overwrite is
+ # not specified.
+ def close(path=nil, overwrite=false)
+ result = !io.closed?
+
+ if path
+ if File.exists?(path) && !overwrite
+ raise ArgumentError, "already exists: #{path}"
+ end
+
+ case io
+ when File, Tempfile
+ io.close unless io.closed?
+ FileUtils.move(io.path, path)
+ else
+ io.flush
+ io.rewind
+ File.open(path, "w") do |file|
+ file << io.read(io.default_blksize) while !io.eof?
+ end
+ end
+ end
+
io.close unless io.closed?
+ result
end
+ # Flushes the io and resets the io length. Returns self
+ def flush
+ io.flush
+ io.reset_length
+ self
+ end
+
+ # Returns a duplicate of self. This can be a slow operation
+ # as it may involve copying the full contents of one large
+ # file to another.
+ def dup
+ flush
+ another.concat(self)
+ end
+
+ # Returns another instance of self. Must be
+ # implemented in a subclass.
+ def another
+ raise NotImplementedError
+ end
+
+ ###########################
+ # Array methods
+ ###########################
+
+ # Returns true if _self_ contains no elements
+ def empty?
+ length == 0
+ end
+
+ def eql?(another)
+ self == another
+ end
+
+ # Returns the first n entries (default 1)
+ def first(n=nil)
+ n.nil? ? self[0] : self[0,n]
+ end
+
+ # Alias for []
+ def slice(one, two = nil)
+ self[one, two]
+ end
+
+ # Returns self.
+ #--
+ # Warning -- errors show up when this doesn't return
+ # an Array... however to return an array with to_ary
+ # may mean converting a Base into an Array for
+ # insertions... see/modify convert_to_ary
+ def to_ary
+ self
+ end
+
+ #
+ def inspect
+ "#<#{self.class}:#{object_id} #{ellipse_inspect(self)}>"
+ end
+
protected
- # Sets io and extends the input io with External::Position.
- def io=(io)
- io.extend External::IO unless io.kind_of?(External::IO)
+ # Sets io and extends the input io with Io.
+ def io=(io) # :nodoc:
+ io.extend Io unless io.kind_of?(Io)
@io = io
end
+
+ # converts obj to an int using the <tt>to_int</tt>
+ # method, if the object responds to <tt>to_int</tt>
+ def convert_to_int(obj) # :nodoc:
+ obj.respond_to?(:to_int) ? obj.to_int : obj
+ end
+ # converts obj to an array using the <tt>to_ary</tt>
+ # method, if the object responds to <tt>to_ary</tt>
+ def convert_to_ary(obj) # :nodoc:
+ obj == nil ? [] : obj.respond_to?(:to_ary) ? obj.to_ary : [obj]
+ end
+
+ # a more array-compliant version of Chunkable#split_range
+ def split_range(range, total=length) # :nodoc:
+ # split the range
+ start = convert_to_int(range.begin)
+ raise TypeError, "can't convert #{range.begin.class} into Integer" unless start.kind_of?(Integer)
+ start += total if start < 0
+
+ finish = convert_to_int(range.end)
+ raise TypeError, "can't convert #{range.end.class} into Integer" unless finish.kind_of?(Integer)
+ finish += total if finish < 0
+
+ length = finish - start
+ length -= 1 if range.exclude_end?
+
+ [start, length]
+ end
+
+ # helper to inspect large arrays
+ def ellipse_inspect(array) # :nodoc:
+ if array.length > 10
+ "[#{collect_join(array[0,5])} ... #{collect_join(array[-5,5])}] (length = #{array.length})"
+ else
+ "[#{collect_join(array.to_a)}]"
+ end
+ end
+
+ # another helper to inspect large arrays
+ def collect_join(array) # :nodoc:
+ array.collect do |obj|
+ obj.inspect
+ end.join(', ')
+ end
+
end
end
\ No newline at end of file