spec/postrank-uri_spec.rb in postrank-uri-1.0.5 vs spec/postrank-uri_spec.rb in postrank-uri-1.0.6
- old
+ new
@@ -110,12 +110,31 @@
it "should remove awesm/sms parameters" do
c('igvita.com/?id=a&utm_source=a&awesm=b').should == 'http://igvita.com/?id=a'
c('igvita.com/?id=a&sms_ss=a').should == 'http://igvita.com/?id=a'
end
+ end
+ context "hashbang" do
+ it "should rewrite twitter links to crawlable versions" do
+ c('http://twitter.com/#!/igrigorik').should == 'http://twitter.com/igrigorik'
+ c('http://twitter.com/#!/a/statuses/1').should == 'http://twitter.com/a/statuses/1'
+ c('http://nontwitter.com/#!/a/statuses/1').should == 'http://nontwitter.com/#!/a/statuses/1'
+ end
end
+
+ context "embedded links" do
+ it "should extract embedded redirects from Google News" do
+ u = c('http://news.google.com/news/url?sa=t&fd=R&&url=http://www.ctv.ca/CTVNews/Politics/20110111/')
+ u.should == 'http://www.ctv.ca/CTVNews/Politics/20110111/'
+ end
+
+ it "should extract embedded redirects from xfruits.com" do
+ u = c('http://xfruits.com/MrGroar/?url=http%3A%2F%2Faap.lesroyaumes.com%2Fdepeches%2Fdepeche351820908.html')
+ u.should == 'http://aap.lesroyaumes.com/depeches/depeche351820908.html'
+ end
+ end
end
context "clean" do
def c(uri)
PostRank::URI.clean(uri)
@@ -167,9 +186,13 @@
end
it "should not pickup bad TLDS" do
e('stuff.zz a.b.c d.zq').should be_empty
end
+ end
+
+ it "should extract twitter links with hashbangs" do
+ e('test http://twitter.com/#!/igrigorik').should include('http://twitter.com/igrigorik')
end
it "should handle a URL that comes after text without a space" do
e("text:http://spn.tw/tfnLT").should include("http://spn.tw/tfnLT")
e("text;http://spn.tw/tfnLT").should include("http://spn.tw/tfnLT")