Sha256: fcd8f03194ee8f15339255d3e98329686202ed60a2729c2f205116dd81c7822c

Contents?: true

Size: 1.84 KB

Versions: 21

Compression:

Stored size: 1.84 KB

Contents

# The crawl class gives easy access to information about the crawl, and gives the ability to stop a crawl
class CobwebCrawlHelper
  
  attr_accessor :id
  
  BATCH_SIZE = 200
  FINISHED = "Finished"
  STARTING = "Starting"
  CANCELLED = "Cancelled"
  
  def initialize(data)
    @data = data
    
    # TAKING A LONG TIME TO RUN ON PRODUCTION BOX
    @stats = Stats.new(data)
  end
  
  def destroy(options={})
    
    options[:queue_name] = "cobweb_crawl_job" unless options.has_key?(:queue_name)
    options[:finished_resque_queue] = CobwebFinishedJob unless options.has_key?(:finished_resque_queue)
    
    # set status as cancelled now so that we don't enqueue any further pages
    self.statistics.end_crawl(@data, true)
    
    if options[:finished_resque_queue]
      
      additional_stats = {:crawl_id => id, :crawled_base_url => @stats.redis.get("crawled_base_url")}
      additional_stats[:redis_options] = @data[:redis_options] unless @data[:redis_options] == {}
      additional_stats[:source_id] = options[:source_id] unless options[:source_id].nil?
      
      Resque.enqueue(options[:finished_resque_queue], @stats.get_statistics.merge(additional_stats))
    end
    
    counter = 0
    while(counter < 200) do
      break if self.statistics.get_status == CANCELLED
      sleep 1
      counter += 1
    end
    position = Resque.size(options[:queue_name])
    until position == 0
      position-=BATCH_SIZE
      position = 0 if position < 0
      job_items = Resque.peek(options[:queue_name], position, BATCH_SIZE)
      job_items.each do |item|
        if item["args"][0]["crawl_id"] == id
          # remove this job from the queue
          Resque.dequeue(CrawlJob, item["args"][0])
        end
      end
    end
    
  end
  
  def statistics
    @stats
  end
  
  def status
    statistics.get_status
  end
  
  def id
    @data[:crawl_id]
  end
  
end

Version data entries

21 entries across 21 versions & 1 rubygems

Version Path
cobweb-1.0.19 lib/cobweb_crawl_helper.rb
cobweb-1.0.18 lib/cobweb_crawl_helper.rb
cobweb-1.0.17 lib/cobweb_crawl_helper.rb
cobweb-1.0.16 lib/cobweb_crawl_helper.rb
cobweb-1.0.15 lib/cobweb_crawl_helper.rb
cobweb-1.0.12 lib/cobweb_crawl_helper.rb
cobweb-1.0.11 lib/cobweb_crawl_helper.rb
cobweb-1.0.10 lib/cobweb_crawl_helper.rb
cobweb-1.0.9 lib/cobweb_crawl_helper.rb
cobweb-1.0.8 lib/cobweb_crawl_helper.rb
cobweb-1.0.6 lib/cobweb_crawl_helper.rb
cobweb-1.0.5 lib/cobweb_crawl_helper.rb
cobweb-1.0.4 lib/cobweb_crawl_helper.rb
cobweb-1.0.3 lib/cobweb_crawl_helper.rb
cobweb-1.0.2 lib/cobweb_crawl_helper.rb
cobweb-1.0.1 lib/cobweb_crawl_helper.rb
cobweb-1.0.0 lib/cobweb_crawl_helper.rb
cobweb-0.0.77 lib/cobweb_crawl_helper.rb
cobweb-0.0.76 lib/cobweb_crawl_helper.rb
cobweb-0.0.75 lib/cobweb_crawl_helper.rb