Sha256: 2bc6164fb5effa90296cb1ffa961e537b75880f126dacd893abe85c9e87581a4

Contents?: true

Size: 1.38 KB

Versions: 1

Compression:

Stored size: 1.38 KB

Contents

#coding: utf-8
require 'wombat/property_locator'
require 'mechanize'
require 'restclient'

module Wombat
  module Parser
    include PropertyLocator
    attr_accessor :mechanize, :context, :response_code

    def initialize
      @mechanize = Mechanize.new
    end

    def parse(metadata)
      self.context = parser_for metadata
      original_context = self.context

      metadata.iterators.each do |it|
        it.reset # Clean up iterator results before starting
        select_nodes(it.selector).each do |node|
          self.context = node
          it.parse { |p| locate p }
        end
      end

      self.context = original_context

      metadata.parse { |p| locate p }

      metadata.flatten
    end

    private
    def parser_for(metadata)
      url = "#{metadata[:base_url]}#{metadata[:list_page]}"
      page = nil
      parser = nil
      begin
        if metadata[:document_format] == :html
          page = @mechanize.get(url)
          parser = page.parser
        else
          page = RestClient.get(url)
          parser = Nokogiri::XML page
        end
        self.response_code = page.code.to_i if page.respond_to? :code
        parser
      rescue
        if $!.respond_to? :http_code
          self.response_code = $!.http_code.to_i
        elsif $!.respond_to? :response_code
          self.response_code = $!.response_code.to_i
        end
        raise $!
      end
    end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
wombat-1.0.0 lib/wombat/parser.rb