lib/unbreakable/scraper.rb in unbreakable-0.0.1 vs lib/unbreakable/scraper.rb in unbreakable-0.0.2
- old
+ new
@@ -8,16 +8,16 @@
# You may implement a scraper by subclassing this class:
#
# require 'open-uri'
# class MyScraper < Unbreakable::Scraper
# # Stores the contents of +http://www.example.com/+ in +index.html+.
- # def retrieve
+ # def retrieve(*args)
# store(:path => 'index.html'){ open('http://www.example.com/').read }
# end
#
# # Processes +index.html+.
- # def process
+ # def process(*args)
# fetch('index.html').process(:transform).apply
# end
#
# # Alternatively, you can just set the files to fetch, which will be
# # processed using a +:transform+ processor which you must implement.
@@ -94,13 +94,13 @@
def run(args)
opts.parse!(args)
command = args.shift
case command
when 'retrieve'
- retrieve
+ retrieve(*args)
when 'process'
- process
+ process(*args)
when 'config'
print_configuration @app
when nil
puts opts
else
@@ -146,17 +146,19 @@
string
end
end
# Caches remote files to the datastore for later processing.
- def retrieve
+ # @param [Array] args splat of command-line arguments
+ def retrieve(*args)
raise NotImplementedError
end
# Processes cached files into machine-readable data.
- def process
+ # @param [Array] args splat of command-line arguments
+ def process(*args)
processable.each do |record|
- fetch(record).process(:transform).apply
+ fetch(record).process(:transform, :args => args).apply
end
end
# Returns a list of record IDs to process.
# @return [Array<String>] a list of record IDs to process