require 'retriever/page'
require 'retriever/fetch'
t = Retriever::Target.new('http://www.cnet.com/reviews/', /\.exe\z/)
describe 'Page' do
let(:common_source) do
<<-SOURCE
SOURCE
end
describe '#url' do
let(:page) { Retriever::Page.new('http://www.cnet.com/', common_source, t) }
it 'returns current page URL' do
expect(page.url).to eq('http://www.cnet.com/')
end
end
describe '#links' do
let(:source) { "profiletop " }
let(:page) { Retriever::Page.new('http://www.cnet.com/', source, t) }
it 'collects all unique href links on the page, skips div anchors' do
expect(page.links.size).to eq(2)
end
end
describe '#parse_internal' do
let(:page) { Retriever::Page.new('http://www.cnet.com/', common_source, t) }
let(:links) { page.parse_internal }
it 'filters links by host' do
expect(links.size).to eq(3)
end
end
describe '#parse_internal_visitable' do
let(:source) { "profile " }
let(:page) { Retriever::Page.new('http://www.cnet.com/', source, t) }
let(:links) { page.parse_internal_visitable }
it "filters out 'unvisitable' URLS like JS, Stylesheets, Images" do
expect(links.size).to eq(1)
end
end
describe '#parse_files' do
let(:page) { Retriever::Page.new('http://www.cnet.com/', common_source, t) }
let(:files) { page.parse_files(page.parse_internal) }
it 'filters links by filetype' do
expect(files.size).to eq(1)
end
end
describe '#parse_by_css' do
let(:page) { Retriever::Page.new('http://www.cnet.com/', common_source, t) }
it 'returns the text from the received css selector' do
expect(page.parse_by_css('#gadgets-link')).to eq('gadgets ')
end
end
describe '#title' do
let(:page) { Retriever::Page.new('http://www.cnet.com/', common_source, t) }
it 'returns page title' do
expect(page.title).to eq('test')
end
end
describe '#desc' do
let(:page) { Retriever::Page.new('http://www.cnet.com/', common_source, t) }
it 'returns meta description' do
expect(page.desc).to eq('test2 ')
end
end
describe '#h1' do
let(:page) { Retriever::Page.new('http://www.cnet.com/', common_source, t) }
it 'returns h1 text' do
expect(page.h1).to eq('test 3')
end
end
describe '#h2' do
let(:page) { Retriever::Page.new('http://www.cnet.com/', common_source, t) }
it 'returns h2 text' do
expect(page.h2).to eq(' test 4 ')
end
end
end