# -*- encoding: utf-8 -*-
require File.join(File.dirname(__FILE__), "/spec_helper")
describe MetaInspector::Document do
describe 'passing the contents of the document as html' do
before(:each) do
@m = MetaInspector::Document.new('http://cnn.com/', :document => "
Hello From Passed HtmlHello link")
end
it "should get correct links when the url html is passed as an option" do
@m.links.should == ["http://cnn.com/hello"]
end
it "should get the title" do
@m.title.should == "Hello From Passed Html"
end
end
it "should return a String as to_s" do
MetaInspector::Document.new('http://pagerankalert.com').to_s.class.should == String
end
it "should return a Hash with all the values set" do
@m = MetaInspector::Document.new('http://pagerankalert.com')
@m.to_hash.should == {
"url" =>"http://pagerankalert.com/",
"title" =>"PageRankAlert.com :: Track your PageRank changes & receive alerts",
"favicon" =>"http://pagerankalert.com/src/favicon.ico",
"links" => ["http://pagerankalert.com/",
"http://pagerankalert.com/es?language=es",
"http://pagerankalert.com/users/sign_up",
"http://pagerankalert.com/users/sign_in",
"mailto:pagerankalert@gmail.com",
"http://pagerankalert.posterous.com/",
"http://twitter.com/pagerankalert",
"http://twitter.com/share"],
"internal_links" => ["http://pagerankalert.com/",
"http://pagerankalert.com/es?language=es",
"http://pagerankalert.com/users/sign_up",
"http://pagerankalert.com/users/sign_in"],
"external_links" => ["mailto:pagerankalert@gmail.com",
"http://pagerankalert.posterous.com/",
"http://twitter.com/pagerankalert",
"http://twitter.com/share"],
"images" => ["http://pagerankalert.com/images/pagerank_alert.png?1305794559"],
"charset" => "utf-8",
"feed" => "http://feeds.feedburner.com/PageRankAlert",
"content_type" =>"text/html",
"meta_tags" => { "name" => { "description" => ["Track your PageRank(TM) changes and receive alerts by email"],
"keywords" => ["pagerank, seo, optimization, google"], "robots"=>["all,follow"],
"csrf-param" => ["authenticity_token"],
"csrf-token" => ["iW1/w+R8zrtDkhOlivkLZ793BN04Kr3X/pS+ixObHsE="] },
"http-equiv" => {},
"property" => {},
"charset" => ["utf-8"] }
}
end
describe 'exception handling' do
let(:logger) { MetaInspector::ExceptionLog.new }
it "should parse images when parse_html_content_type_only is not specified" do
logger.should_not receive(:<<)
image_url = MetaInspector::Document.new('http://pagerankalert.com/image.png', exception_log: logger)
image_url.title
end
it "should parse images when parse_html_content_type_only is false" do
logger.should_not receive(:<<)
image_url = MetaInspector::Document.new('http://pagerankalert.com/image.png', html_content_only: false, exception_log: logger)
image_url.title
end
it "should handle errors when content is image/jpeg and html_content_type_only is true" do
logger.should_receive(:<<).with(an_instance_of(RuntimeError))
image_url = MetaInspector::Document.new('http://pagerankalert.com/image.png', html_content_only: true, exception_log: logger)
image_url.title
end
it "should handle errors when content is not text/html and html_content_type_only is true" do
logger.should_receive(:<<).with(an_instance_of(RuntimeError))
tar_url = MetaInspector::Document.new('http://pagerankalert.com/file.tar.gz', html_content_only: true, exception_log: logger)
tar_url.title
end
end
describe 'headers' do
it "should include default headers" do
url = 'http://example.com/headers'
request = double('Request', base_uri: url)
expected_headers = {'User-Agent' => "MetaInspector/#{MetaInspector::VERSION} (+https://github.com/jaimeiniesta/metainspector)"}
MetaInspector::Request.any_instance.should_receive(:open)
.with(url, expected_headers)
.and_return(request)
MetaInspector::Document.new(url)
end
it "should include passed headers on the request" do
url = 'http://example.com/headers'
headers = {'User-Agent' => 'Mozilla', 'Referer' => 'https://github.com/'}
request = double('Request', base_uri: url)
MetaInspector::Request.any_instance.should_receive(:open)
.with(url, headers)
.and_return(request)
MetaInspector::Document.new(url, headers: headers)
end
end
end