Sha256: f81615ae2cd6f201cdca571589f09df15172ea20dcc25f84c82e7f92b4a489b6
Contents?: true
Size: 1.55 KB
Versions: 4
Compression:
Stored size: 1.55 KB
Contents
require 'spec_helper' describe Wombat::Processing::Parser do before(:each) do crawler = Class.new crawler.send(:include, Wombat::Processing::Parser) @parser = crawler.new @metadata = Wombat::DSL::Metadata.new end it 'should request page document with correct url' do @metadata.base_url "http://www.google.com" @metadata.path "/search" fake_document = double :document fake_parser = double :parser fake_header = double :header fake_document.should_receive(:parser).and_return(fake_parser) fake_document.should_receive(:header).and_return(fake_header) fake_parser.should_receive(:headers=) @parser.mechanize.should_receive(:get).with("http://www.google.com/search").and_return fake_document @parser.parse @metadata end it 'should correctly parse xml documents' do fake_document = double :xml fake_parser = double :parser fake_headers = double :headers @metadata.document_format :xml @parser.mechanize.should_not_receive(:get) RestClient.should_receive(:get).and_return fake_document Nokogiri.should_receive(:XML).with(fake_document).and_return fake_parser fake_document.should_receive(:headers).and_return(fake_headers) fake_parser.should_receive(:headers=) @parser.parse @metadata end it 'should accept a Mechanize::Page' do VCR.use_cassette('basic_crawler_page') do m = Mechanize.new page = m.get('http://www.terra.com.br/portal') @metadata.page page @parser.mechanize.should_not_receive(:get) @parser.parse @metadata end end end
Version data entries
4 entries across 4 versions & 1 rubygems
Version | Path |
---|---|
wombat-2.4.0 | spec/processing/parser_spec.rb |
wombat-2.3.0 | spec/processing/parser_spec.rb |
wombat-2.2.1 | spec/processing/parser_spec.rb |
wombat-2.2.0 | spec/processing/parser_spec.rb |