lib/external/io.rb in external-0.1.0 vs lib/external/io.rb in external-0.3.0
- old
+ new
@@ -1,398 +1,163 @@
-require 'stringio'
-require 'tempfile'
-require 'external/chunkable'
-
-module External
- # Position gets IO objects to work properly for large files. Additionally,
- # IO adds a length accessor for getting the size of the IO contents. Note
- # that length is not automatically adjusted by write, for performance
- # reasons. length must be managed manually, or reset after writes using
- # reset_length.
- #
- # A variety of bugs needed to be addressed per-platform:
- #
- # == Mac OS X Tiger
- #
- # Using the default (broken) installation of Ruby, StringIO does not correctly
- # position itself when a pos= statement is issued.
- #
- # s = StringIO.new "abc"
- # s.read # => "abc"
- # s.pos = 0
- # s.read # => nil
- #
- # For regular IO objects, as expected, the second read statement returns
- # "abc". Install the a fixed version of Ruby, perhaps with the one-click
- # installer: http://rubyosx.rubyforge.org/
- #
- # == Windows
- #
- # Ruby on Windows has problems with files larger than ~2 gigabytes.
- # Sizes return as negative, and positions cannot be set beyond the max
- # size of a long (2147483647 ~ 2GB = 2475636895). IO corrects both of
- # these issues thanks in large part to a bit of code taken from
- # 'win32/file/stat' (http://rubyforge.org/projects/win32utils/).
- #
- # == Others
- #
- # I haven't found errors on Fedora and haven't tested on any other platforms.
- # If you find and solve some wierd positioning errors, please let me know.
- module IO
-
- # Determines the generic mode of the input io using the _mode
- # method for the input io class. By default IO provides _mode
- # methods for File, Tempfile, and StringIO. The return string
- # is determined as follows:
- #
- # readable & writable:: r+
- # readable:: r
- # writable:: w
- #
- # The _mode method takes the input io and should return an array
- # specifying whether or not io is readable and writable
- # (ie [readable, writable]).
- #
- # See try_handle for more details.
- def self.mode(io)
- readable, writable = try_handle(io, "mode")
-
- case
- when readable && writable then "r+"
- when readable then "r"
- when writable then "w"
- else
- # occurs for r+ mode, for some reason
- "r+"
- end
- end
-
- # Determines the length of the input io using the _length method
- # for the input io class. Non-External::IO inputs are extended
- # in this process.
- #
- # The _length method takes the input io, and should return the
- # current length of the input io (ie a flush operation may be
- # required).
- #
- # See try_handle for more details.
- def self.length(io)
- case io
- when External::IO
- try_handle(io, "length")
- else
- io.extend External::IO
- io.length
- end
- end
-
- # Returns an array of bools determining if the input File
- # is readable and writable.
- def self.file_mode(io)
- begin
- dup = io.dup
-
- # determine readable/writable by sending close methods
- # to the duplicated IO. If the io cannot be closed for
- # read/write then it will raise an error, indicating that
- # it was not open in the given mode.
- [:close_read, :close_write].collect do |method|
- begin
- dup.send(method)
- true
- rescue(IOError)
- false
- end
- end
- ensure
- # Be sure that the io is fully closed before proceeding!
- # (Otherwise Tempfiles will not be properly disposed of
- # ... at least on Windows, perhaps on others)
- dup.close if dup && !dup.closed?
- end
- end
-
- # Returns the length of the input File
- def self.file_length(io)
- io.fsync unless io.generic_mode == 'r'
- File.size(io.path)
- end
-
- # Returns an array of bools determining if the input Tempfile
- # is readable and writable.
- def self.tempfile_mode(io)
- file_mode(io.instance_variable_get("@tmpfile"))
- end
-
- # Returns the length of the input Tempfile
- def self.tempfile_length(io)
- file_length(io)
- end
-
- # Returns an array of bools determining if the input StringIO
- # is readable and writable.
- #
- # s = StringIO.new("abcde", "r+")
- # External::IO.stringio_mode(s) # => [true, true]
- #
- def self.stringio_mode(io)
- [!io.closed_read?, !io.closed_write?]
- end
-
- # Returns the length of the input StringIO
- #
- # s = StringIO.new("abcde", "r+")
- # External::IO.length(s) # => 5
- #
- def self.stringio_length(io)
- io.string.length
- end
-
- def self.extended(base) # :nodoc:
- base.instance_variable_set("@generic_mode", mode(base))
- base.reset_length
- base.default_blksize = 1024
- base.binmode
- end
-
- protected
-
- # try_handle is a forwarding method allowing External::IO to handle
- # non-File, non-Tempfile IO objects. try_handle infers a method
- # name based on the class of the input and trys to forward the
- # input io to that method within External::IO. For instance:
- #
- # * the _mode method for StringIO is 'stringio_mode'
- # * the _length method for StringIO is 'stringio_length'
- #
- # Nested classes have '::' replaced by '_'. Thus to add support
- # for Some::Unknown::IO, extend External::IO as below:
- #
- # module External::IO
- # def some_unknown_io_mode(io)
- # ...
- # end
- #
- # def some_unknown_io_length(io)
- # ...
- # end
- # end
- #
- # See stringio_mode and stringio_length for more details.
- def self.try_handle(io, method)
- method_name = io.class.to_s.downcase.gsub(/::/, "_") + "_#{method}"
- if self.respond_to?(method_name)
- External::IO.send(method_name, io)
- else
- raise "cannot determine #{method} for '%s'" % io.class
- end
- end
-
- public
-
- include Chunkable
- attr_reader :generic_mode
-
- # True if self is a File or Tempfile
- def file?
- self.kind_of?(File) || self.kind_of?(Tempfile)
- end
-
- # Modified truncate that adjusts length
- def truncate(n)
- super
- self.pos = n if self.pos > n
- self.length = n
- end
-
- # Resets length to the length returned by External::IO.length
- def reset_length
- self.length = External::IO.length(self)
- end
-
- #
- # comparison
- #
-
- # Quick comparision with another IO. Returns true if
- # another == self, or if both are file-type IOs and
- # their paths are equal.
- def quick_compare(another)
- self == another || (self.file? && another.file? && self.path == another.path)
- end
-
- # Sort compare with another IO, behaving like a comparison between
- # the full string contents of self and another. Can be a long
- # operation if it requires the full read of two large IO objects.
- def sort_compare(another, blksize=default_blksize)
- # equal in comparison if the ios are equal
- return 0 if quick_compare(another)
-
- if another.length > self.length
- return -1
- elsif self.length < another.length
- return 1
- else
- self.flush unless self.generic_mode == 'r'
- self.pos = 0
- another.flush unless another.generic_mode == 'r'
- another.pos = 0
-
- sa = sb = nil
- while sa == sb
- sa = self.read(blksize)
- sb = another.read(blksize)
- break if sa.nil? || sb.nil?
- end
-
- sa.to_s <=> sb.to_s
- end
- end
-
- # Sort compare with another IO, behaving like a comparison between
- # the full string contents of self and another. Can be a long
- # operation if it requires the full read of two large IO objects.
- def <=>(another)
- sort_compare(another)
- end
-
- #
- # reading
- #
-
- def scan(range_or_span=default_span, blksize=default_blksize, carryover_limit=default_blksize)
- carryover = 0
- chunk(range_or_span, blksize) do |offset, length|
- raise "carryover exceeds limit: #{carryover} (#{carryover_limit})" if carryover > carryover_limit
-
- scan_begin = offset - carryover
- self.pos = scan_begin
- string = self.read(length + carryover)
- carryover = yield(scan_begin, string)
- end
- carryover
- end
-
- #
- # writing
- #
-
- #
- def insert(src, range=0..src.length, pos=nil)
- self.pos = pos unless pos == nil
-
- start_pos = self.pos
- length_written = 0
-
- src.flush unless src.generic_mode == 'r'
- src.pos = range.begin
- src.chunk(range) do |offset, length|
- length_written += write(src.read(length))
- end
-
- end_pos = start_pos + length_written
- self.length = end_pos if end_pos > self.length
- length_written
- end
-
- #
- def concat(src, range=0..src.length)
- insert(src, range, length)
- end
-
- #--
- # it appears that as long as the io opening t.path closes,
- # the tempfile will be deleted at the exit of the ruby
- # instance... otherwise it WILL NOT BE DELETED
- # Make note of this in the documentation to be sure to close
- # files if you start inserting because it may make tempfiles
- #++
- def copy(mode="r", range=0..length)
- self.flush
-
- temp = Tempfile.new("copy")
- temp.extend IO
- temp.insert(self, range)
- temp.close
-
- cp = File.open(temp.path, mode)
- cp.extend IO
-
- if block_given?
- begin
- yield(cp)
- ensure
- cp.close unless cp.closed?
- FileUtils.rm(cp.path) if File.exists?(cp.path)
- end
- else
- cp
- end
- end
-
- end
-end
-
-# This code block modifies IO only if running on windows
-unless RUBY_PLATFORM.index('mswin').nil?
-require 'Win32API'
-
-module External
- module IO
- # Modfied to properly determine file lengths on Windows. Uses code
- # from 'win32/file/stat' (http://rubyforge.org/projects/win32utils/)
- def self.file_length(io) # :nodoc:
- io.fsync unless io.generic_mode == 'r'
-
- # I would have liked to use win32/file/stat to do this... however, some issue
- # arose involving FileUtils.cp, File.stat, and File::Stat.mode. cp raised an
- # error because the mode would be nil for files. I wasn't sure how to fix it,
- # so I've lifted the relevant code for pulling the large file size.
-
- # Note this is a simplified version... if you base.path point to a chardev,
- # this may need to be changed, because apparently the call to the Win32API
- # may fail
-
- stat_buf = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0].pack('ISSssssIILILILIL')
- Win32API.new('msvcrt', '_stat64', 'PP', 'I').call(io.path, stat_buf)
- stat_buf[24, 4].unpack('L').first # Size of file in bytes
- end
-
- POSITION_MAX = 2147483647 # maximum size of long
-
- # Modified to handle positions past the 2Gb limit
- def pos # :nodoc:
- @pos || super
- end
-
- # Positions larger than the max value of a long cannot be directly given
- # to the default +pos=+. This version incrementally seeks to positions
- # beyond the maximum, if necessary.
- #
- # Note: setting the position beyond the 2Gb limit requires the use of a
- # sysseek statement. As such, errors will arise if you try to position
- # an IO object that does not support this method (for example StringIO...
- # but then what are you doing with a 2Gb StringIO anyhow?)
- def pos=(pos)
- if pos < POSITION_MAX
- super(pos)
- @pos = nil
- elsif @pos != pos
- # note sysseek appears to be necessary here, rather than io.seek
- @pos = pos
-
- super(POSITION_MAX)
- pos -= POSITION_MAX
-
- while pos > POSITION_MAX
- pos -= POSITION_MAX
- self.sysseek(POSITION_MAX, Object::IO::SEEK_CUR)
- end
-
- self.sysseek(pos, Object::IO::SEEK_CUR)
- end
- end
-
- end
-end
-
-end # end the windows-specific code
+require 'external/chunkable'
+require 'external/utils'
+
+autoload(:StringIO, 'stringio')
+autoload(:Tempfile, 'tempfile')
+autoload(:FileUtils, 'fileutils')
+
+module External
+
+ # Adds functionality to an IO required by External.
+ #
+ # IO adds/overrides the length accessor for getting the size of the IO contents.
+ # Note that length is not automatically adjusted by write, for performance
+ # reasons. length must be managed manually, or reset after writes using
+ # reset_length.
+ #
+ module Io
+ include Chunkable
+
+ PATCHES = []
+
+ # Add version-specific patches
+ case RUBY_VERSION
+ when /^1.8/ then require "external/patches/ruby_1_8_io"
+ end
+
+ # Add platform-specific patches
+ # case RUBY_PLATFORM
+ # when 'java'
+ # end
+
+ def self.extended(base)
+ PATCHES.each {|patch| base.extend patch }
+ base.reset_length
+ base.default_blksize = 1024
+ base.binmode
+ end
+
+ # Resets length to the length returned by Utils.length
+ def reset_length
+ self.length = Utils.length(self)
+ end
+
+ # Modified truncate that adjusts length
+ def truncate(n)
+ super
+ self.pos = n if self.pos > n
+ self.length = n
+ end
+
+ #
+ def scan(range_or_span=default_span, blksize=default_blksize, carryover_limit=default_blksize)
+ carryover = 0
+ chunk(range_or_span, blksize) do |offset, length|
+ raise "carryover exceeds limit: #{carryover} (#{carryover_limit})" if carryover > carryover_limit
+
+ scan_begin = offset - carryover
+ self.pos = scan_begin
+ string = self.read(length + carryover)
+ carryover = yield(scan_begin, string)
+ end
+ carryover
+ end
+
+ #
+ def insert(src, range=0..src.length, pos=nil)
+ self.pos = pos unless pos == nil
+
+ start_pos = self.pos
+ length_written = 0
+
+ src.flush
+ src.pos = range.begin
+ src.chunk(range) do |offset, length|
+ length_written += write(src.read(length))
+ end
+
+ end_pos = start_pos + length_written
+ self.length = end_pos if end_pos > self.length
+ length_written
+ end
+
+ #
+ def concat(src, range=0..src.length)
+ insert(src, range, length)
+ end
+
+ #--
+ # it appears that as long as the io opening t.path closes,
+ # the tempfile will be deleted at the exit of the ruby
+ # instance... otherwise it WILL NOT BE DELETED
+ # Make note of this in the documentation to be sure to close
+ # files if you start inserting because it may make tempfiles
+ #++
+ def copy(mode="r", range=0..length)
+ self.flush
+
+ temp = Tempfile.new("copy")
+ temp.extend Io
+ temp.insert(self, range)
+ temp.close
+
+ cp = File.open(temp.path, mode)
+ cp.extend Io
+
+ if block_given?
+ begin
+ yield(cp)
+ ensure
+ cp.close unless cp.closed?
+ FileUtils.rm(cp.path) if File.exists?(cp.path)
+ end
+ else
+ cp
+ end
+ end
+
+ # Quick comparision with another IO. Returns true if
+ # another == self, or if both are file-type IOs and
+ # their paths are equal.
+ def quick_compare(another)
+ self == another || (self.kind_of?(File) && another.kind_of?(File) && self.path == another.path)
+ end
+
+ # Sort compare (ie <=>) with another IO, behaving like
+ # a comparison between the full string contents of self
+ # and another. This obviously can be a long operation
+ # if it requires the full read of two large IO objects.
+ def sort_compare(another, blksize=default_blksize)
+ # equal in comparison if the ios are equal
+ return 0 if quick_compare(another)
+
+ self.flush
+ self.reset_length
+
+ another.flush
+ another.reset_length
+
+ if another.length > self.length
+ return -1
+ elsif self.length < another.length
+ return 1
+ else
+ self.pos = 0
+ another.pos = 0
+
+ sa = sb = nil
+ while sa == sb
+ sa = self.read(blksize)
+ sb = another.read(blksize)
+ break if sa.nil? || sb.nil?
+ end
+
+ sa.to_s <=> sb.to_s
+ end
+ end
+
+ # Alias for sort_compare.
+ def <=>(another)
+ sort_compare(another)
+ end
+ end
+end
\ No newline at end of file