spec/agent/actions_spec.rb in spidr-0.5.0 vs spec/agent/actions_spec.rb in spidr-0.6.0

- old
+ new

@@ -1,60 +1,186 @@ -require 'spidr/agent' - require 'spec_helper' +require 'example_app' +require 'spidr/agent' + describe Agent do - describe "actions" do - let(:url) { URI('http://spidr.rubyforge.org/') } + describe "#continue!" do + before { subject.pause = true } + before { subject.continue! } - it "should be able to pause spidering" do - count = 0 - agent = Agent.host('spidr.rubyforge.org') do |spider| - spider.every_page do |page| - count += 1 - spider.pause! if count >= 2 + it "should un-pause the Agent" do + expect(subject.paused?).to be false + end + end + + describe "#pause=" do + it "should change the paused state" do + subject.pause = true + + expect(subject.paused?).to be true + end + end + + describe "#pause!" do + it "should raise Action::Paused" do + expect { + subject.pause! + }.to raise_error(described_class::Actions::Paused) + end + end + + describe "#paused?" do + context "when the agent is paused" do + before do + begin + subject.pause! + rescue described_class::Actions::Paused end end - expect(agent).to be_paused - expect(agent.history.length).to eq(2) + it { expect(subject.paused?).to be true } end - it "should be able to continue spidering after being paused" do - agent = Agent.new do |spider| - spider.every_page do |page| - spider.pause! + context "when the agent is not paused" do + it { expect(subject.paused?).to be false } + end + end + + describe "#skip_link!" do + it "should raise Actions::SkipLink" do + expect { + subject.skip_link! + }.to raise_error(described_class::Actions::SkipLink) + end + end + + describe "#skip_page!" do + it "should raise Actions::SkipPage" do + expect { + subject.skip_page! + }.to raise_error(described_class::Actions::SkipPage) + end + end + + context "when spidering" do + include_context "example App" + + context "when pause! is called" do + app do + get '/' do + %{<html><body><a href="/link">link</a></body></html>} end + + get '/link' do + %{<html><body>should not get here</body></html>} + end end - agent.enqueue(url) - agent.continue! + subject do + described_class.new(host: host) do |agent| + agent.every_page do |page| + if page.url.path == '/' + agent.pause! + end + end + end + end - expect(agent.visited?(url)).to eq(true) + it "should pause spidering" do + expect(subject).to be_paused + expect(subject.history).to be == Set[ + URI("http://#{host}/") + ] + end + + context "and continue! is called afterwards" do + before do + subject.enqueue "http://#{host}/link" + subject.continue! + end + + it "should continue spidering" do + expect(subject.history).to be == Set[ + URI("http://#{host}/"), + URI("http://#{host}/link") + ] + end + end end - it "should allow skipping of enqueued links" do - agent = Agent.new do |spider| - spider.every_url do |url| - spider.skip_link! + context "when skip_link! is called" do + app do + get '/' do + %{<html><body><a href="/link1">link1</a> <a href="/link2">link2</a> <a href="/link3">link3</a></body></html>} end + + get '/link1' do + %{<html><body>link1</body></html>} + end + + get '/link2' do + %{<html><body>link2</body></html>} + end + + get '/link3' do + %{<html><body>link3</body></html>} + end end - agent.enqueue(url) + subject do + described_class.new(host: host) do |agent| + agent.every_url do |url| + if url.path == '/link2' + agent.skip_link! + end + end + end + end - expect(agent.queue).to be_empty + it "should skip all links on the page" do + expect(subject.history).to be == Set[ + URI("http://#{host}/"), + URI("http://#{host}/link1"), + URI("http://#{host}/link3") + ] + end end - it "should allow skipping of visited pages" do - agent = Agent.new do |spider| - spider.every_page do |url| - spider.skip_page! + context "when skip_page! is called" do + app do + get '/' do + %{<html><body><a href="/link">entry link</a></body></html>} end + + get '/link' do + %{<html><body><a href="/link1">link1</a> <a href="/link2">link2</a></body></html>} + end + + get '/link1' do + %{<html><body>should not get here</body></html>} + end + + get '/link2' do + %{<html><body>should not get here</body></html>} + end end - agent.visit_page(url) + subject do + described_class.new(host: host) do |agent| + agent.every_page do |page| + if page.url.path == '/link' + agent.skip_page! + end + end + end + end - expect(agent.history).to eq(Set[url]) - expect(agent.queue).to be_empty + it "should skip all links on the page" do + expect(subject.history).to be == Set[ + URI("http://#{host}/"), + URI("http://#{host}/link") + ] + end end end end