Sha256: e2e1e334af7fcac3f6978857cad37c84b3e0e4aae3d2e3b4d94cc48f286e9f31

Contents?: true

Size: 965 Bytes

Versions: 1

Compression:

Stored size: 965 Bytes

Contents

require 'page_by_page/version'
require 'page_by_page/url'
require 'nokogiri'
require 'open-uri'

class PageByPage

  class << self
    def fetch &block
      pbp = self.new &block
      pbp.fetch
    end
  end

  def initialize &block
    instance_eval &block
  end

  def url tmpl
    @tmpl = tmpl
  end

  def selector sl
    @selector = sl
  end

  def from n
    @from = n
  end

  def step n
    @step = n
  end

  def fetch
    url = Url.new @tmpl, options
    items, all_items = [nil], []
    catch :no_more do
      until items.empty?
        doc = parse url.next
        items = doc.css @selector
        all_items << items
      end
    end
    all_items.flatten
  end

  private

  def parse url
    Nokogiri::HTML open url
  rescue OpenURI::HTTPError => e
    if e.message == '404 Not Found'
      throw :no_more
    else
      raise e
    end
  end

  def options
    opt = {}
    opt[:from] = @from || 1
    opt[:step] = @step || 1
    opt
  end

end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
page_by_page-0.1.3 lib/page_by_page.rb