require 'addressable/uri' module IMW # Define this constant in your configuration file to add your own # URI handlers to IMW. USER_DEFINED_HANDLERS = [] unless defined?(USER_DEFINED_HANDLERS) # Register a new resource handler which dynamically extends a new # IMW::Resource with the given module +mod+. # # +handler+ must be one of # # 1. Regexp # 2. Proc # 3. +true+ # # In case (1), if the regular expression matches the resource's URI # then the module (+mod+) will be used to extend the resource. # # In case (2), if the Proc returns a value other than +false+ or # +nil+ then the module will be used. # # In case (3), the module will be used. # # @param [String, Module] mod # @param [Regexp, Proc, true] handler def self.register_handler mod, handler raise IMW::ArgumentError.new("Module must be either a Module or String") unless mod.is_a?(Module) || mod.is_a?(String) raise IMW::ArgumentError.new("Handler must be either a Regexp, Proc, or true") unless handler.is_a?(Regexp) || handler.is_a?(Proc) || handler == true self::USER_DEFINED_HANDLERS << [mod, handler] end # A resource can be anything addressable via a URI. Examples # include local files, remote files, webpages, &c. # # The IMW::Resource class takes a URI as input and then dynamically # extends itself with appropriate modules from IMW. As an example, # calling # # my_archive = IMW::Resource.new('/path/to/my/archive.tar.bz2') # # would return an IMW::Resource extended by # IMW::Archives::Tarbz2 (among other modules) which # therefore has methods for extracting, listing, and appending to # the archive. # # Modules are so extended based on handlers defined in the # imw/resources directory and accessible via # IMW::Resource.handlers. You can define your own handlers by # defining the constant IMW::Resource::USER_DEFINED_HANDLERS in your # configuration file. # # The modules extending a particular IMW::Resource instance can be # listed as follows # # my_archive.resource_modules #=> [IMW::Local::Base, IMW::Local::File, IMW::Local::Compressible, IMW::Archives::Tarbz2] # # By default, resources are opened for reading. Passing in the # appropriate :mode option changes this: # # IMW::Resource.new('/path/to/my_new_file', :mode => 'w') # # If the :skip_modules option is passed in then the # resource will not extend itself with any modules and will # essentially only retain the bare functionality of a URI. This can # be useful when subclassing IMW::Resource or dealing with a very # strange kind of resource. # # Read the documentation for modules in IMW::Resources to learn more # about the various behaviors an IMW::Resource can acquire. # # You can also instantiate an IMW::Resource using IMW.open, which # accepts all the same arguments as IMW::Resource.new. class Resource # The URI object associated with this resource. attr_reader :uri # The mode in which to access this resource. attr_accessor :mode # A copy of the options passed to this resource on initialization. attr_accessor :resource_options # Create a new resource representing +uri+. # # IMW will automatically extend the resulting IMW::Resourcen # instance with modules appropriate to the given URI. # # r = IMW::Resource.new("http://www.infochimps.com") # r.resource_modules # => [IMW::Schemes::Remote::Base, IMW::Schemes::Remote::RemoteFile, IMW::Schemes::HTTP, IMW::Formats::Html] # # You can prevent this altogether by passing in # :no_modules: # # r = IMW::Resource.new("http://www.infochimps.com") # r.resource_modules # => [IMW::Schemes::Remote::Base, IMW::Schemes::Remote::RemoteFile, IMW::Schemes::HTTP, IMW::Formats::Html] # # And you can exert more fine-grained control with the # :use_modules and :skip_modules options, see # IMW::Resource.extend_resource! for details. # # @param [String, Addressable::URI] uri # @param [Hash] options # @option options [true, false] no_modules # @option options [String] mode the mode to open the resource in (will be ignored when inapplicable) # @return [IMW::Resource] def initialize uri, options={} self.uri = uri self.resource_options = options self.mode = options[:mode] || 'r' extend_appropriately!(options) unless options[:no_modules] end # Return the modules this resource has been extended by. # # @return [Array] the modules this resource has been extended by. def resource_modules @resource_modules ||= [] end # Works just like Object#extend except it keeps track of the # modules it has extended, see Resource#resource_modules. def extend mod resource_modules << mod super mod end # Extend this resource with modules by passing it through a # collection of handlers defined by IMW::Resource.handlers. # # Accepts the same options as Resource.extend_resource!. def extend_appropriately! options={} self.class.extend_resource!(self, options) end # Set the URI of this resource by parsing the given +uri+ (if # necessary). # # @param [String, Addressable::URI] uri the uri to parse def uri= uri if uri.is_a?(Addressable::URI) @uri = uri else begin @uri = Addressable::URI.parse(uri.to_s) rescue URI::InvalidURIError @uri = Addressable::URI.parse(URI.encode(uri.to_s)) @encoded_uri = true end end end # The scheme of this resource. Will be +nil+ for local resources. # # @return [String] def scheme @scheme ||= uri.scheme end # The directory name of this resource's path. # # @return [String] def dirname @dirname ||= File.dirname(path) end # The basename of this resource's path. # # @return [String] def basename @basename ||= File.basename(path) end # Returns the extension (INCLUDING the '.') of this resource's # path. Redefine this in an including class for which this is # weird ('.tar.gz' I'm talking to you...) # # @return [String] def extname @extname ||= File.extname(path) end # Returns the extension (WITHOUT the '.') of this resource's path. # # @return [String] def extension @extension ||= extname[1..-1] || '' end # Returns the basename of the file with its extension removed # # IMW.open('/path/to/some_file.tar.gz').name # => some_file # # @return [String] def name @name ||= extname ? basename[0,basename.length - extname.length] : basename end # Returns the user associated with the host of this URI. # # @return [String] def user @user ||= uri.user end def to_s uri.to_s end # Raise an error unless this resource exists. # # @param [String] message an optional message to include def should_exist!(message=nil) raise IMW::Error.new([message, "No path defined for #{self.inspect} extended by #{resource_modules.join(' ')}"].compact.join(', ')) unless respond_to?(:path) raise IMW::Error.new([message, "No exist? method defined for #{self.inspect} extended by #{resource_modules.join(' ')}"].compact.join(', ')) unless respond_to?(:exist?) raise IMW::PathError.new([message, "#{path} does not exist"].compact.join(', ')) unless exist? self end # Open a copy of this resource. # # This is useful when wanting to reset file handles. Though -- be # warned -- it does not close any file handles itself... # # @return [IMW::Resource] the new (old) resource def reopen IMW.open(self.uri.to_s) end # If +method+ begins with the strings +is+, +on+, or +via+ and # ends with a question mark then we interpret it as a question # this resource doesn't know how to answer -- so we have it answer # +false+. # # As an example, consider the following loop: # # IMW.open('/tmp').all_contents.each do |obj| # if obj.is_archive? # # ... do something # end # end # # When +obj+ is initialized and it _isn't_ an archive, then it # doesn't know about the is_archive? method -- but it # should therefore answer false anyway. # # This lets a basic text file answer questions about whether it's # an archive (or on S3, or accessed via some user-defined scheme, # &c.) without needing to know anything about archives (or S3 or # the user-defined scheme). def method_missing method, *args if args.empty? && method.to_s =~ /(is|on|via)_.*\?$/ # querying for a boolean response so answer false return false else raise IMW::NoMethodError, "undefined method `#{method}' for #{self}, extended by #{resource_modules.join(', ')}" end end # Iterate through IMW::Resource.handlers and extend the given # +resource+ with modules whose handler conditions match the # resource. # # Passing in :use_modules or :skip_modules # allows overriding the default behavior of handlers. # # @param [IMW::Resource] resource the resource to extend # @param [Hash] options # @option options [Array] use_modules a list of modules used regardless of handlers # @option options [Array] skip_modules a list of modules not to be used regardless of handlers # @return [IMW::Resource] the extended resource def self.extend_resource! resource, options={} options.reverse_merge!(:use_modules => [], :skip_modules => []) handlers.each do |mod_name, handler| case handler when Regexp then extend_resource_with_mod_or_string!(resource, mod_name, options[:skip_modules]) if handler =~ resource.uri.to_s when Proc then extend_resource_with_mod_or_string!(resource, mod_name, options[:skip_modules]) if handler.call(resource) when TrueClass then extend_resource_with_mod_or_string!(resource, mod_name, options[:skip_modules]) else raise IMW::TypeError("A handler must be Regexp, Proc, or true") end end options[:use_modules].each { |mod_name| extend_resource_with_mod_or_string!(resource, mod_name, options[:skip_modules]) } resource end # A list of handlers to match against each new resource. # # When an IMW::Resource is instantiated it eventually calls # IMW::Resource.extend_resource! which will iterate through the # handlers in IMW::Resource.handlers, extending the resource with # modules whose handler conditions are satisfied. # # A handler is just an Array with two elements. The first should be # a module or a string identifying a module. # # If the second element is a Regexp, the corresponding module will # be used if the regexp matches the resource's URI (as a string) # # If the second element is a Proc, it will be called with the # resource as its only argument and if it returns true then the # module will be used. # # You can define your own handlers by appending them to # IMW::Resource::USER_DEFINED_HANDLERS in your .imwrc # file. # # The order in which handlers appear is significant -- # IMW::CompressedFiles::HANDLERS must be _before_ # IMW::Archives::HANDLERS, for example, because of (say) # .tar.bz2 files. # # @return [Array] def self.handlers # order is important! # # # #CompressedFiles must come before # Archives because of tar.bz2 type files IMW::Schemes::HANDLERS + IMW::CompressedFiles::HANDLERS + IMW::Archives::HANDLERS + IMW::Formats::HANDLERS + USER_DEFINED_HANDLERS end protected # Extend +resource+ with +mod_or_string+. Will work hard to try # and interpret +mod_or_string+ as a module if it's a string. # # @param [IMW::Resource] resource the resource to extend # # @param [Module, String] mod_or_string the module or string # representing a module to extend the resource with # # @param [Array] skip_modules modules to exclude def self.extend_resource_with_mod_or_string! resource, mod_or_string, skip_modules return if skip_modules.include?(mod_or_string) if mod_or_string.is_a?(Module) resource.extend(mod_or_string) else m = IMW.class_eval(mod_or_string) resource.extend(m) unless skip_modules.include?(m) end end end end