Sha256: 424fd606c05bf48a787934cb5186a8e7b73fe1bdffae001a4393ae225d8940af

Contents?: true

Size: 1.95 KB

Versions: 2

Compression:

Stored size: 1.95 KB

Contents

require 'retriever'

describe "Link" do

    t = Retriever::Target.new("http://www.cnet.com/reviews/")
    let(:links) { Retriever::Page.new(@source,t).links }

    it "collects links in anchor tags" do
      @source = (<<SOURCE).strip
<a href='http://www.cnet.com/download.exe'>download</a>
SOURCE

      expect(links).to include('http://www.cnet.com/download.exe')
    end

    it "collects links in link tags" do
      @source = (<<SOURCE).strip
 <link rel='stylesheet' id='gforms_reset_css-css'  href='http://www.cnet.com/wp-content/plugins/gravityforms/css/formreset.css?ver=1.7.12' type='text/css' media='all' />
SOURCE

      expect(links).to include('http://www.cnet.com/wp-content/plugins/gravityforms/css/formreset.css?ver=1.7.12')
    end

    it "does not collect bare links (ones not in an href)" do
      @source = (<<SOURCE).strip
http://www.google.com
SOURCE

      expect(links).to_not include('http://www.google.com')
    end

    it "collects only unique href links on the page" do
      @source = (<<SOURCE).strip
<a href='http://www.cnet.com/products/gadgets'>gadgets</a>
<a href='http://www.cnet.com/products/gadgets'>gadgets2</a>
SOURCE

      expect(links.size).to eq(1)
    end

    it "adds a protocol to urls missing them (www.)" do
      @source = (<<SOURCE).strip
<a href='www.cnet.com/download.exe'>download</a>
SOURCE

      expect(links).to include('http://www.cnet.com/download.exe')
    end

    it "doesn't care about any extra attributes on the anchor tag" do
      @source = (<<SOURCE).strip
<a href='http://www.cnet.com/products/gadgets/'>gadgets </a>
<a href='http://www.cnet.com/products/gadgets/' data-vanity-rewritten='true'></a>
SOURCE

      expect(links.size).to eq(1)
    end

    it "returns relative urls with full path based on hostname" do
      @source = (<<SOURCE).strip
<a href='/test.html'>test</a>
<a href='cpage_18'>about</a>
SOURCE

      expect(links).to include("http://www.cnet.com/test.html","http://www.cnet.com/cpage_18")
    end
end

Version data entries

2 entries across 2 versions & 1 rubygems

Version Path
rubyretriever-1.1.0 spec/link_spec.rb
rubyretriever-1.0.3 spec/link_spec.rb