require 'retriever' describe 'Link' do t = Retriever::Target.new('http://www.cnet.com/reviews/') let(:links) do Retriever::Page.new('http://www.cnet.com/reviews/', @source, t).links end it 'collects links in anchor tags' do @source = (<download SOURCE expect(links).to include('http://www.cnet.com/download.exe') end it 'collects links in link tags' do @source = (< SOURCE expect(links[0]).to include('formreset.css?ver=1.7.12') end it 'does not collect bare links (ones not in an href)' do @source = (<gadgets gadgets2 SOURCE expect(links.size).to eq(1) end it 'adds a protocol to urls missing them (www.)' do @source = (<download SOURCE expect(links).to include('http://www.cnet.com/download.exe') end it "doesn\'t care about any extra attributes on the anchor tag" do @source = (<gadgets SOURCE expect(links.size).to eq(1) end it 'returns relative urls with full path based on hostname' do @source = (<test about SOURCE expect(links).to include('http://www.cnet.com/test.html', 'http://www.cnet.com/reviews/cpage_18') end it 'collects files even when query strings exist' do @source = (<Download audio file SOURCE expect(links).to include('http://mises.org/system/tdf/Robert%20Nozick%20and%20Murray%20Rothbard%20David%20Gordon.mp3?file=1&type=audio') end end