Sha256: 5ee18061df66a6b2ef129d8493780cad5892c90a6c92d767c669bd45c2b1bd2f
Contents?: true
Size: 932 Bytes
Versions: 3
Compression:
Stored size: 932 Bytes
Contents
require 'page_by_page/common' module PageByPage class Jump include Common def start url @start = url end def iterate selector @iterate = selector end def process url, items, page_count = @start, [], 0 while true do doc = parse url doc.css(@selector).each{ |item| items << item } page_count += 1 update_progress Thread.current, page_count if @progress break if page_count >= limit next_url = doc.at_css(@iterate) break unless next_url path = next_url.attr('href') url = concat_host path sleep @interval if @interval end puts if @progress items end private def concat_host path @prefix = ( regex = path.start_with?('/') ? /([^:|\/])\/.*/ : /(.*[^:|\/])\/.*/ @start.gsub(regex, '\1') ) File.join @prefix, path end end end
Version data entries
3 entries across 3 versions & 1 rubygems
Version | Path |
---|---|
page_by_page-0.1.13 | lib/page_by_page/jump.rb |
page_by_page-0.1.12 | lib/page_by_page/jump.rb |
page_by_page-0.1.11 | lib/page_by_page/jump.rb |