lib/pupa/processor.rb in pupa-0.0.5 vs lib/pupa/processor.rb in pupa-0.0.6
- old
+ new
@@ -15,11 +15,11 @@
include Helper
class_attribute :tasks
self.tasks = []
- attr_reader :report
+ attr_reader :report, :client, :options
def_delegators :@logger, :debug, :info, :warn, :error, :fatal
# @param [String] output_dir the directory in which to dump JSON documents
# @param [String] cache_dir the directory in which to cache HTTP responses
@@ -52,20 +52,20 @@
params[key] = value.first
end
end
end
- @client.get(url, params).body
+ client.get(url, params).body
end
# Retrieves and parses a document with a POST request.
#
# @param [String] url a URL to an HTML document
# @param [String,Hash] params query string parameters
# @return a parsed document
def post(url, params = {})
- @client.post(url, params).body
+ client.post(url, params).body
end
# Adds a scraping task to Pupa.rb.
#
# Defines a method whose name is identical to `task_name`. This method
@@ -275,9 +275,11 @@
#
# @param [Hash] objects a hash of scraped objects keyed by ID
# @return [Hash] a mapping from an object ID to the ID of its duplicate
def build_losers_to_winners_map(objects)
{}.tap do |map|
+ # We don't need to iterate on the last item in the hash, but skipping
+ # the last item is more effort than running the last item.
objects.each_with_index do |(id1,object1),index|
unless map.key?(id1) # Don't search for duplicates of duplicates.
objects.drop(index + 1).each do |id2,object2|
if object1 == object2
map[id2] = id1