lib/unbreakable/processors/transform.rb in unbreakable-0.0.3 vs lib/unbreakable/processors/transform.rb in unbreakable-0.0.4
- old
+ new
@@ -1,8 +1,53 @@
module Unbreakable
+ # Processors are {http://markevans.github.com/dragonfly/file.Processing.html
+ # Dragonfly} processors. For example:
+ #
+ # class MyProcessor
+ # def coolify(temp_object, opts = {})
+ # SomeLib.coolify(temp_object.data, opts)
+ # end
+ #
+ # def uglify(temp_object, ugliness)
+ # `uglify -i #{temp_object.path} -u #{ugliness}`
+ # end
+ #
+ # def conditional(temp_object, format, pages)
+ # throw :unable_to_handle unless format == :pdf
+ # # do stuff
+ # end
+ #
+ # private
+ #
+ # def my_helper_method
+ # # do stuff
+ # end
+ # end
+ # MyScraper.processor.register MyProcessor
+ #
+ # Public methods must return an object with which a +TempObject+ may be
+ # initialized (+String+, +File+, +Tempfile+, +Pathname+ or +TempObject+).
+ #
+ # You can raise +Dragonfly::Configurable::NotConfigured+ if a configurable
+ # variable is required but missing. If a variable is invalid, you can raise
+ # +Dragonfly::Configurable::BadConfigAttribute+.
+ #
+ # If a process has dependencies or conditions, then you can test for these
+ # conditions and throw +:unable_to_handle+ to skip processing.
+ #
+ # If multiple processors define a public method by the same name, the methods
+ # will be run in reverse order from the last processor to define the method
+ # until one fails to throw +:unable_to_handle+. If all raise an error, then
+ # +Dragonfly::FunctionManager::UnableToHandle+ will be thrown.
+ #
+ # As such, if you are writing a document to plain-text converter, you can
+ # write a pdftotext processor, a doctopdf processor, etc. which all define
+ # a +to_text+ public method, and use +:unable_to_handle+ to make sure the
+ # correct processor runs.
module Processors
- # You may implement a transform process by subclassing this class:
+ # If you are writing a simple scraper and only need one processor, you may
+ # implement a single +transform+ processor method by subclassing this class:
#
# require 'nokogiri'
# class MyProcessor < Unbreakable::Processors::Transform
# # Extracts the page title from an HTML page.
# def perform
@@ -19,11 +64,13 @@
# The following instance methods must be implemented in sub-classes:
#
# * +perform+
# * +persist+
#
- # You may also override +transform+, which calls +perform+ and +persist+ in
- # the default implementation, but you probably won't have to.
+ # +transform+ calls +persist+ with the output of +perform+. This makes it
+ # easy for others to subclass your processor and just change the +persist+
+ # method to change the external database, for example, while still taking
+ # advantage of the hard work done by +perform+.
class Transform
include Dragonfly::Configurable
include Dragonfly::Loggable
attr_reader :temp_object, :opts