lib/unbreakable/processors/transform.rb in unbreakable-0.0.3 vs lib/unbreakable/processors/transform.rb in unbreakable-0.0.4

- old
+ new

@@ -1,8 +1,53 @@ module Unbreakable + # Processors are {http://markevans.github.com/dragonfly/file.Processing.html + # Dragonfly} processors. For example: + # + # class MyProcessor + # def coolify(temp_object, opts = {}) + # SomeLib.coolify(temp_object.data, opts) + # end + # + # def uglify(temp_object, ugliness) + # `uglify -i #{temp_object.path} -u #{ugliness}` + # end + # + # def conditional(temp_object, format, pages) + # throw :unable_to_handle unless format == :pdf + # # do stuff + # end + # + # private + # + # def my_helper_method + # # do stuff + # end + # end + # MyScraper.processor.register MyProcessor + # + # Public methods must return an object with which a +TempObject+ may be + # initialized (+String+, +File+, +Tempfile+, +Pathname+ or +TempObject+). + # + # You can raise +Dragonfly::Configurable::NotConfigured+ if a configurable + # variable is required but missing. If a variable is invalid, you can raise + # +Dragonfly::Configurable::BadConfigAttribute+. + # + # If a process has dependencies or conditions, then you can test for these + # conditions and throw +:unable_to_handle+ to skip processing. + # + # If multiple processors define a public method by the same name, the methods + # will be run in reverse order from the last processor to define the method + # until one fails to throw +:unable_to_handle+. If all raise an error, then + # +Dragonfly::FunctionManager::UnableToHandle+ will be thrown. + # + # As such, if you are writing a document to plain-text converter, you can + # write a pdftotext processor, a doctopdf processor, etc. which all define + # a +to_text+ public method, and use +:unable_to_handle+ to make sure the + # correct processor runs. module Processors - # You may implement a transform process by subclassing this class: + # If you are writing a simple scraper and only need one processor, you may + # implement a single +transform+ processor method by subclassing this class: # # require 'nokogiri' # class MyProcessor < Unbreakable::Processors::Transform # # Extracts the page title from an HTML page. # def perform @@ -19,11 +64,13 @@ # The following instance methods must be implemented in sub-classes: # # * +perform+ # * +persist+ # - # You may also override +transform+, which calls +perform+ and +persist+ in - # the default implementation, but you probably won't have to. + # +transform+ calls +persist+ with the output of +perform+. This makes it + # easy for others to subclass your processor and just change the +persist+ + # method to change the external database, for example, while still taking + # advantage of the hard work done by +perform+. class Transform include Dragonfly::Configurable include Dragonfly::Loggable attr_reader :temp_object, :opts