require 'imw/dataset/workflow' require 'imw/dataset/paths' module IMW # The IMW::Dataset represents a common object in which paths, data # resources, and various tasks can be intermingled to define a # complex transformation of data. # # == Organizing Paths # # IMW encourages you to work within the following directory # structure for a dataset +my_dataset+: # # my_dataset/ # |-- my_dataset.rb # |-- ripd # | `-- ... # |-- rawd # | `-- ... # |-- fixd # | `-- ... # `-- pkgd # `-- ... # # Just like IMW itself, a dataset can manage a collection of paths. # If my_dataset.rb defines a dataset: # # # my_dataset/my_dataset.rb # dataset = IMW::Dataset.new(:my_dataset) # # then the following paths will be defined: # # dataset.path_to(:root) #=> my_dataset # dataset.path_to(:script) #=> my_dataset/my_dataset.rb # dataset.path_to(:ripd) #=> my_dataset/ripd # dataset.path_to(:rawd) #=> my_dataset/rawd # dataset.path_to(:fixd) #=> my_dataset/fixd # dataset.path_to(:pkgd) #=> my_dataset/pkgd # # Just like IMW itself, the +dataset+ supports adding path # references # # dataset.add_path(:raw_data, :ripd, 'raw_data.xml') # dataset.path_to(:raw_data) #=> my_dataset/ripd/raw_data.xml # # as well as removed (via dataset.remove_path)). # # A subclass of IMW::Dataset can customize these paths be overriding # IMW::Dataset#set_default_paths as well as define new ones by # overriding IMW::Dataset#set_paths. # # Setting paths can be skipped altogether by passing the # :skip_paths option when instantiating a dataset: # # dataset = IMW::Dataset.new :my_dataset, :skip_paths => true # # == Utilizing Tasks # # An IMW::Dataset utilizes Rake to manage tasks needed to transform # data. See IMW::Workflow for a description of the pre-defined # tasks (+rip+, +parse+, +fix+, +package+). # # New tasks can be defined # # dataset.task :get_authorization do # # ... get an authorization token # end # # and hooked into the default tasks in the usual Rake manner # # dataset.task :rip => [:get_authorization] # # A dataset also has methods for the workflow step tasks to make # this easier # # dataset.rip [:get_authorized] # # Tasks for a dataset can be accessed and invoked as follows # # dataset[:rip].invoke # # as well as by using the command line +imw+ tool. # # Defining tasks can be skipped altogether by passing the # :skip_workflow option when instantiating a dataset # # dataset = IMW::Dataset.new :my_dataset, :skip_workflow => true # # == Working with Repositories # # A dataset can be added to a repository by passing the # :repository option # # repo = IMW::Repository.new # dataset = IMW::Dataset.new :my_dataset, :repository => repo class Dataset # The handle this dataset goes by. Used for identifying it within # a repository. attr_accessor :handle # Options for this dataset. attr_accessor :options def initialize handle, options = {} @options = options @handle = handle set_default_paths unless options[:skip_paths] set_paths unless options[:skip_paths] initialize_workflow unless options[:skip_workflow] if options[:repository] options[:repository][handle] = self end end # Provides this dataset with a workflow of tasks managed by Rake. include IMW::Workflow # Provides this dataset with DSL like methods to construct a # schema in an IMW file. include IMW::Metadata::DSL end end