lib/pupa/processor.rb in pupa-0.0.5 vs lib/pupa/processor.rb in pupa-0.0.6

- old
+ new

@@ -15,11 +15,11 @@ include Helper class_attribute :tasks self.tasks = [] - attr_reader :report + attr_reader :report, :client, :options def_delegators :@logger, :debug, :info, :warn, :error, :fatal # @param [String] output_dir the directory in which to dump JSON documents # @param [String] cache_dir the directory in which to cache HTTP responses @@ -52,20 +52,20 @@ params[key] = value.first end end end - @client.get(url, params).body + client.get(url, params).body end # Retrieves and parses a document with a POST request. # # @param [String] url a URL to an HTML document # @param [String,Hash] params query string parameters # @return a parsed document def post(url, params = {}) - @client.post(url, params).body + client.post(url, params).body end # Adds a scraping task to Pupa.rb. # # Defines a method whose name is identical to `task_name`. This method @@ -275,9 +275,11 @@ # # @param [Hash] objects a hash of scraped objects keyed by ID # @return [Hash] a mapping from an object ID to the ID of its duplicate def build_losers_to_winners_map(objects) {}.tap do |map| + # We don't need to iterate on the last item in the hash, but skipping + # the last item is more effort than running the last item. objects.each_with_index do |(id1,object1),index| unless map.key?(id1) # Don't search for duplicates of duplicates. objects.drop(index + 1).each do |id2,object2| if object1 == object2 map[id2] = id1