Sha256: 1926938793e25f25fd44c02dc54b0c719343ecc443709d0c2d4054fd4a63baea

Contents?: true

Size: 1.1 KB

Versions: 1

Compression:

Stored size: 1.1 KB

Contents

require 'page_by_page/version'
require 'page_by_page/enum'
require 'nokogiri'
require 'open-uri'

class PageByPage

  class << self
    def fetch &block
      pbp = self.new &block
      pbp.fetch
    end
  end

  def initialize &block
    instance_eval &block
  end

  def url tmpl
    @tmpl = ERB.new tmpl
  end

  def selector sl
    @selector = sl
  end

  def from n
    @from = n
  end

  def step n
    @step = n
  end

  def to n
    @to = n
  end

  def fetch
    enum = Enum.new options
    items, all_items = [nil], []
    catch :no_more do
      until items.empty?
        n = enum.next
        break if n > limit
        url = @tmpl.result binding
        doc = parse url
        items = doc.css @selector
        all_items << items
      end
    end
    all_items.flatten
  end

  private

  def parse url
    Nokogiri::HTML open url
  rescue OpenURI::HTTPError => e
    if e.message == '404 Not Found'
      throw :no_more
    else
      raise e
    end
  end

  def options
    opt = {}
    opt[:from] = @from || 1
    opt[:step] = @step || 1
    opt
  end

  def limit
    @to || Float::INFINITY
  end

end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
page_by_page-0.1.4 lib/page_by_page.rb