module Unbreakable # Processors are {http://markevans.github.com/dragonfly/file.Processing.html # Dragonfly} processors. For example: # # class MyProcessor # def coolify(temp_object, opts = {}) # SomeLib.coolify(temp_object.data, opts) # end # # def uglify(temp_object, ugliness) # `uglify -i #{temp_object.path} -u #{ugliness}` # end # # def conditional(temp_object, format, pages) # throw :unable_to_handle unless format == :pdf # # do stuff # end # # private # # def my_helper_method # # do stuff # end # end # MyScraper.processor.register MyProcessor # # Public methods must return an object with which a +TempObject+ may be # initialized (+String+, +File+, +Tempfile+, +Pathname+ or +TempObject+). # # You can raise +Dragonfly::Configurable::NotConfigured+ if a configurable # variable is required but missing. If a variable is invalid, you can raise # +Dragonfly::Configurable::BadConfigAttribute+. # # If a process has dependencies or conditions, then you can test for these # conditions and throw +:unable_to_handle+ to skip processing. # # If multiple processors define a public method by the same name, the methods # will be run in reverse order from the last processor to define the method # until one fails to throw +:unable_to_handle+. If all raise an error, then # +Dragonfly::FunctionManager::UnableToHandle+ will be thrown. # # As such, if you are writing a document to plain-text converter, you can # write a pdftotext processor, a doctopdf processor, etc. which all define # a +to_text+ public method, and use +:unable_to_handle+ to make sure the # correct processor runs. module Processors # If you are writing a simple scraper and only need one processor, you may # implement a single +transform+ processor method by subclassing this class: # # require 'nokogiri' # class MyProcessor < Unbreakable::Processors::Transform # # Extracts the page title from an HTML page. # def perform # Nokogiri::HTML(temp_object.data).at_css('title') # end # # # Saves the page title to an external database. # def persist(arg) # MyModel.create(:title => arg) # end # end # MyScraper.processor.register MyProcessor # # The following instance methods must be implemented in sub-classes: # # * +perform+ # * +persist+ # # +transform+ calls +persist+ with the output of +perform+. This makes it # easy for others to subclass your processor and just change the +persist+ # method to change the external database, for example, while still taking # advantage of the hard work done by +perform+. class Transform include Dragonfly::Configurable include Dragonfly::Loggable attr_reader :temp_object, :opts # +#transform+ must be defined on the subclass for Dragonfly to see it. # @param [Class] subclass a subclass def self.inherited(subclass) subclass.class_eval do # @param [Dragonfly::TempObject] temp_object # @param [Hash] opts # @return [Dragonfly::TempObject] the same object def transform(temp_object, opts = {}) @temp_object, @opts = temp_object, opts persist perform temp_object end end end private # Transforms a record. # @return [Hash] the transformed record def perform raise NotImplementedError end # Persists a transformed record. # @param arg a transformed record def persist(arg) raise NotImplementedError end end end end