require 'spec_helper' require 'example_page' require 'spidr/page' describe Page do include_context "example Page" let(:body) { %{example

hello

} } describe "#title" do context "when there is a title" do it "should return the title inner_text" do expect(subject.title).to be == 'example' end end context "when there is no title" do let(:body) { %{

hello

} } it "should return nil" do expect(subject.title).to be nil end end end describe "#each_meta_redirect" do context "when the Content-Type is text/html" do let(:content_type) { 'text/html' } context "and the HTML is valid" do let(:link) { '/link' } let(:refresh) { 'refresh' } let(:body) { %{Redirecting...} } it "should yield each meta http-equiv='refresh' URL" do expect { |b| subject.each_meta_redirect(&b) }.to yield_successive_args(link) end context "but when http-equiv is REFRESH" do let(:refresh) { 'REFRESH' } it "should ignore the case of refresh" do expect { |b| subject.each_meta_redirect(&b) }.to yield_successive_args(link) end end context "but the http-equiv attribute is missing" do let(:body) { %{Redirecting...} } it "should ignore those meta tags" do expect { |b| subject.each_meta_redirect(&b) }.to yield_successive_args(link) end end context "but http-equiv is not refresh" do let(:body) { %{} } it "should ignore those meta tags" do expect { |b| subject.each_meta_redirect(&b) }.to yield_successive_args(link) end end context "but the content attribute is missing" do let(:body) { %{Redirecting...} } it "should ignore those meta tags" do expect { |b| subject.each_meta_redirect(&b) }.to yield_successive_args(link) end end context "but the content attribute does not contain url=..." do let(:body) { %{Redirecting...} } it "should ignore those meta tags" do expect { |b| subject.each_meta_redirect(&b) }.to yield_successive_args(link) end end end context "but the HTML cannot be parsed" do let(:body) { "Redirecting...} } it { expect(subject.meta_redirect?).to be true } end context "when there are no meta refresh redirects" do let(:body) { %{Redirecting...} } it { expect(subject.meta_redirect?).to be false } end end describe "#meta_redirects" do context "when there are meta refresh redirects" do let(:link1) { "/link1" } let(:link2) { "/link2" } let(:body) { %{Redirecting...} } it "should return each meta refresh redirect URL" do expect(subject.meta_redirects).to be == [link1, link2] end end context "when there are no meta refresh redirects" do let(:body) { %{Redirecting...} } it { expect(subject.meta_redirects).to be == [] } end end describe "#each_redirect" do context "when the Location header is set" do let(:link) { "http://#{host}/link" } let(:headers) { {'Location' => link} } it "should yield the Location header" do expect { |b| subject.each_redirect(&b) }.to yield_successive_args(link) end end context "when there are multiple Location headers" do let(:link1) { "http://#{host}/link1" } let(:link2) { "http://#{host}/link2" } let(:headers) { {'Location' => [link1, link2]} } it "should yield each Location header value" do expect { |b| subject.each_redirect(&b) }.to yield_successive_args(link1, link2) end end context "when there is no Location header set" do context "but there are meta refresh redirects" do let(:link1) { "/link1" } let(:link2) { "/link2" } let(:body) { %{Redirecting...} } it "should yield each meta refresh redirect URL" do expect { |b| subject.each_redirect(&b) }.to yield_successive_args(link1, link2) end end context "and there are no meta refresh redirects" do it do expect { |b| subject.each_redirect(&b) }.not_to yield_control end end end context "when not given a block" do it "should return an Enumerator" do expect(subject.each_redirect).to be_kind_of(Enumerator) end end end context "#redirects_to" do context "when there are redirects" do let(:link1) { "http://#{host}/link1" } let(:link2) { "http://#{host}/link2" } let(:headers) { {'Location' => [link1, link2]} } it "should return the redirects as an Array" do expect(subject.redirects_to).to be == [link1, link2] end end context "when there are no redirects" do it { expect(subject.redirects_to).to be == [] } end end describe "#each_mailto" do context "when the Content-Type is text/html" do let(:content_type) { 'text/html' } context "and the HTML is valid" do let(:email1) { "bob@example.com" } let(:email2) { "jim@example.com" } let(:body) { %{email1 link email2} } it "should yield each a link where the href starts with 'mailto:'" do expect { |b| subject.each_mailto(&b) }.to yield_successive_args(email1, email2) end end context "but the HTML is not valid" do let(:body) { "email1 link email2} } it "should return all 'mailto:' links" do expect(subject.mailtos).to be == [email1, email2] end end context "when there are no 'mailto:' links" do it { expect(subject.mailtos).to be == [] } end end describe "#each_link" do context "when the page contains a links" do let(:link1) { '/link1' } let(:link2) { '/link2' } let(:body) { %{link1 link2} } it "should yield each a/@href value" do expect { |b| subject.each_link(&b) }.to yield_successive_args(link1, link2) end end context "when the page contains frames" do let(:frame1) { '/frame1' } let(:frame2) { '/frame2' } let(:body) { %{} } it "should yield each frame/@src value" do expect { |b| subject.each_link(&b) }.to yield_successive_args(frame1, frame2) end end context "when the page contains iframes" do let(:iframe1) { '/iframe1' } let(:iframe2) { '/iframe2' } let(:body) { %{} } it "should yield each iframe/@src value" do expect { |b| subject.each_link(&b) }.to yield_successive_args(iframe1, iframe2) end end context "when the page contains remote stylesheets" do let(:stylesheet1) { '/stylesheet1.css' } let(:stylesheet2) { '/stylesheet2.css' } let(:body) { %{

hello

} } it "should yield each link/@href value" do expect { |b| subject.each_link(&b) }.to yield_successive_args(stylesheet1, stylesheet2) end end context "when the page contains remote javascript" do let(:javascript1) { '/script1.js' } let(:javascript2) { '/script2.js' } let(:body) { %{

hello

} } it "should yield each script/@src value" do expect { |b| subject.each_link(&b) }.to yield_successive_args(javascript1, javascript2) end end end describe "#links" do context "when the page contains an 'a' link" do let(:link) { '/link' } let(:body) do <<-HTML link HTML end it "should return an Array of links" do expect(subject.links).to be == [ link ] end end context "when the page contains a 'frame'" do let(:frame) { '/frame' } let(:body) do <<-HTML HTML end it "should return an Array of links" do expect(subject.links).to be == [ frame ] end end context "when the page contains a 'iframe'" do let(:iframe) { '/iframe' } let(:body) do <<-HTML HTML end it "should return an Array of links" do expect(subject.links).to be == [ iframe ] end end context "when the page contains a 'link' element" do let(:stylesheet) { '/stylesheet.css' } let(:body) do <<-HTML HTML end it "should return an Array of links" do expect(subject.links).to be == [ stylesheet ] end end context "when the page contains a 'script' element" do let(:javascript) { '/script.js' } let(:body) do <<-HTML