Sha256: f81615ae2cd6f201cdca571589f09df15172ea20dcc25f84c82e7f92b4a489b6

Contents?: true

Size: 1.55 KB

Versions: 4

Compression:

Stored size: 1.55 KB

Contents

require 'spec_helper'

describe Wombat::Processing::Parser do
  before(:each) do
    crawler = Class.new
    crawler.send(:include, Wombat::Processing::Parser)
    @parser = crawler.new
    @metadata = Wombat::DSL::Metadata.new
  end

  it 'should request page document with correct url' do
    @metadata.base_url "http://www.google.com"
    @metadata.path "/search"
    fake_document = double :document
    fake_parser = double :parser
    fake_header = double :header
    fake_document.should_receive(:parser).and_return(fake_parser)
    fake_document.should_receive(:header).and_return(fake_header)
    fake_parser.should_receive(:headers=)
    @parser.mechanize.should_receive(:get).with("http://www.google.com/search").and_return fake_document

    @parser.parse @metadata
  end

  it 'should correctly parse xml documents' do
    fake_document = double :xml
    fake_parser = double :parser
    fake_headers = double :headers
    @metadata.document_format :xml
    @parser.mechanize.should_not_receive(:get)
    RestClient.should_receive(:get).and_return fake_document
    Nokogiri.should_receive(:XML).with(fake_document).and_return fake_parser
    fake_document.should_receive(:headers).and_return(fake_headers)
    fake_parser.should_receive(:headers=)

    @parser.parse @metadata
  end

  it 'should accept a Mechanize::Page' do
    VCR.use_cassette('basic_crawler_page') do
      m = Mechanize.new
      page = m.get('http://www.terra.com.br/portal')
      @metadata.page page

      @parser.mechanize.should_not_receive(:get)

      @parser.parse @metadata
    end
  end

end

Version data entries

4 entries across 4 versions & 1 rubygems

Version Path
wombat-2.4.0 spec/processing/parser_spec.rb
wombat-2.3.0 spec/processing/parser_spec.rb
wombat-2.2.1 spec/processing/parser_spec.rb
wombat-2.2.0 spec/processing/parser_spec.rb