# -*- encoding: utf-8 -*-

require File.join(File.dirname(__FILE__), "/spec_helper")

describe MetaInspector::Parser do
  describe 'Doing a basic scrape' do

    before(:each) do
      @m = MetaInspector::Parser.new(doc 'http://pagerankalert.com')
    end

    it "should get the title" do
      @m.title.should == 'PageRankAlert.com :: Track your PageRank changes & receive alerts'
    end

    it "should not find an image" do
      @m.image.should == nil
    end

    describe "get image" do
      it "should find the og image" do
        @m = MetaInspector::Parser.new(doc 'http://www.theonion.com/articles/apple-claims-new-iphone-only-visible-to-most-loyal,2772/')
        @m.image.should == "http://o.onionstatic.com/images/articles/article/2772/Apple-Claims-600w-R_jpg_130x110_q85.jpg"
        @m.meta_og_image.should == "http://o.onionstatic.com/images/articles/article/2772/Apple-Claims-600w-R_jpg_130x110_q85.jpg"
      end

      it "should find image on youtube" do
        MetaInspector::Parser.new(doc 'http://www.youtube.com/watch?v=iaGSSrp49uc').image.should == "http://i2.ytimg.com/vi/iaGSSrp49uc/mqdefault.jpg"
      end
    end

    describe "get images" do
      it "should find all page images" do
        @m.images.should == ["http://pagerankalert.com/images/pagerank_alert.png?1305794559"]
      end

      it "should find images on twitter" do
        m = MetaInspector::Parser.new(doc 'https://twitter.com/markupvalidator')
        m.images.length.should == 6
        m.images.join("; ").should == "https://twimg0-a.akamaihd.net/profile_images/2380086215/fcu46ozay5f5al9kdfvq_reasonably_small.png; https://twimg0-a.akamaihd.net/profile_images/2380086215/fcu46ozay5f5al9kdfvq_normal.png; https://twimg0-a.akamaihd.net/profile_images/2293774732/v0pgo4xpdd9rou2xq5h0_normal.png; https://twimg0-a.akamaihd.net/profile_images/1538528659/jaime_nov_08_normal.jpg; https://si0.twimg.com/sticky/default_profile_images/default_profile_6_mini.png; https://twimg0-a.akamaihd.net/a/1342841381/images/bigger_spinner.gif"
      end
    end

    it "should ignore malformed image tags" do
      # There is an image tag without a source. The scraper should not fatal.
      @m = MetaInspector::Parser.new(doc "http://www.guardian.co.uk/media/pda/2011/sep/15/techcrunch-arrington-startups")
      @m.images.size.should == 11
    end

    it "should have a Nokogiri::HTML::Document as parsed" do
      @m.parsed.class.should == Nokogiri::HTML::Document
    end

    it "should return the document as a string" do
      @m.to_s.class.should == String
    end

    describe "Feed" do
      it "should get rss feed" do
        @m = MetaInspector::Parser.new(doc 'http://www.iteh.at')
        @m.feed.should == 'http://www.iteh.at/de/rss/'
      end

      it "should get atom feed" do
        @m = MetaInspector::Parser.new(doc 'http://www.tea-tron.com/jbravo/blog/')
        @m.feed.should == 'http://www.tea-tron.com/jbravo/blog/feed/'
      end

      it "should return nil if no feed found" do
        @m = MetaInspector::Parser.new(doc 'http://www.alazan.com')
        @m.feed.should == nil
      end
    end

    describe "get description" do
      it "should find description on youtube" do
        MetaInspector::Parser.new(doc 'http://www.youtube.com/watch?v=iaGSSrp49uc').description.should == ""
      end
    end
  end

  describe 'Page with missing meta description' do
    it "should find a secondary description" do
      @m = MetaInspector::Parser.new(doc 'http://theonion-no-description.com')
      @m.description.should == "SAN FRANCISCO—In a move expected to revolutionize the mobile device industry, Apple launched its fastest and most powerful iPhone to date Tuesday, an innovative new model that can only be seen by the company's hippest and most dedicated customers. This is secondary text picked up because of a missing meta description."
    end
  end

  describe 'Links' do
    before(:each) do
      @m = MetaInspector::Parser.new(doc 'http://pagerankalert.com')
    end

    it "should get the links" do
      @m.links.should == [ "http://pagerankalert.com/",
                           "http://pagerankalert.com/es?language=es",
                           "http://pagerankalert.com/users/sign_up",
                           "http://pagerankalert.com/users/sign_in",
                           "mailto:pagerankalert@gmail.com",
                           "http://pagerankalert.posterous.com/",
                           "http://twitter.com/pagerankalert",
                           "http://twitter.com/share" ]
    end

    it "should get correct absolute links for internal pages" do
      @m.internal_links.should == [ "http://pagerankalert.com/",
                           "http://pagerankalert.com/es?language=es",
                           "http://pagerankalert.com/users/sign_up",
                           "http://pagerankalert.com/users/sign_in" ]
    end

    it "should get correct absolute links for external pages" do
      @m.external_links.should == [ "mailto:pagerankalert@gmail.com",
                           "http://pagerankalert.posterous.com/",
                           "http://twitter.com/pagerankalert",
                           "http://twitter.com/share" ]
    end

    it "should get correct absolute links, correcting relative links from URL not ending with slash" do
      m = MetaInspector::Parser.new(doc 'http://alazan.com/websolution.asp')
      m.links.should == [ "http://alazan.com/index.asp",
                          "http://alazan.com/faqs.asp" ]
    end

    it "should return empty array if no links found" do
      m = MetaInspector::Parser.new(doc 'http://example.com/empty')
      m.links.should == []
    end

    describe "links with international characters" do
      it "should get correct absolute links, encoding the URLs as needed" do
        m = MetaInspector::Parser.new(doc 'http://international.com')
        m.links.should == [ "http://international.com/espa%C3%B1a.asp",
                            "http://international.com/roman%C3%A9e",
                            "http://international.com/faqs#cami%C3%B3n",
                            "http://international.com/search?q=cami%C3%B3n",
                            "http://international.com/search?q=espa%C3%B1a#top",
                            "http://international.com/index.php?q=espa%C3%B1a&url=aHR0zZQ==&cntnt01pageid=21",
                            "http://example.com/espa%C3%B1a.asp",
                            "http://example.com/roman%C3%A9e",
                            "http://example.com/faqs#cami%C3%B3n",
                            "http://example.com/search?q=cami%C3%B3n",
                            "http://example.com/search?q=espa%C3%B1a#top"]
      end

      describe "internal links" do
        it "should get correct internal links, encoding the URLs as needed but respecting # and ?" do
          m = MetaInspector::Parser.new(doc 'http://international.com')
          m.internal_links.should == [ "http://international.com/espa%C3%B1a.asp",
                                       "http://international.com/roman%C3%A9e",
                                       "http://international.com/faqs#cami%C3%B3n",
                                       "http://international.com/search?q=cami%C3%B3n",
                                       "http://international.com/search?q=espa%C3%B1a#top",
                                       "http://international.com/index.php?q=espa%C3%B1a&url=aHR0zZQ==&cntnt01pageid=21"]
        end

        it "should not crash when processing malformed hrefs" do
          m = MetaInspector::Parser.new(doc 'http://example.com/malformed_href')
          expect {
            m.internal_links.should == [ "http://example.com/faqs" ]
            m.should be_ok
          }.to_not raise_error
        end
      end

      describe "external links" do
        it "should get correct external links, encoding the URLs as needed but respecting # and ?" do
          m = MetaInspector::Parser.new(doc 'http://international.com')
          m.external_links.should == [ "http://example.com/espa%C3%B1a.asp",
                                       "http://example.com/roman%C3%A9e",
                                       "http://example.com/faqs#cami%C3%B3n",
                                       "http://example.com/search?q=cami%C3%B3n",
                                       "http://example.com/search?q=espa%C3%B1a#top"]
        end

        it "should not crash when processing malformed hrefs" do
          m = MetaInspector::Parser.new(doc 'http://example.com/malformed_href')
          expect {
            m.external_links.should == ["skype:joeuser?call", "telnet://telnet.cdrom.com",
                                        "javascript:alert('ok');", "javascript://", "mailto:email(at)example.com"]
            m.should be_ok
          }.to_not raise_error
        end
      end
    end

    it "should not crash with links that have weird href values" do
      m = MetaInspector::Parser.new(doc 'http://example.com/invalid_href')
      m.links.should == ["%3Cp%3Eftp://ftp.cdrom.com", "skype:joeuser?call", "telnet://telnet.cdrom.com"]
    end
  end

  describe 'Relative links' do
    describe 'From a root URL' do
      before(:each) do
        @m = MetaInspector::Parser.new(doc 'http://relative.com/')
      end

      it 'should get the relative links' do
        @m.internal_links.should == ['http://relative.com/about', 'http://relative.com/sitemap']
      end
    end

    describe 'From a document' do
      before(:each) do
        @m = MetaInspector::Parser.new(doc 'http://relative.com/company')
      end

      it 'should get the relative links' do
        @m.internal_links.should == ['http://relative.com/about', 'http://relative.com/sitemap']
      end
    end

    describe 'From a directory' do
      before(:each) do
        @m = MetaInspector::Parser.new(doc 'http://relative.com/company/')
      end

      it 'should get the relative links' do
        @m.internal_links.should == ['http://relative.com/company/about', 'http://relative.com/sitemap']
      end
    end
  end

  describe 'Relative links with base' do
    it 'should get the relative links from a document' do
      m = MetaInspector::Parser.new(doc 'http://relativewithbase.com/company/page2')
      m.internal_links.should == ['http://relativewithbase.com/about', 'http://relativewithbase.com/sitemap']
    end

    it 'should get the relative links from a directory' do
      m = MetaInspector::Parser.new(doc 'http://relativewithbase.com/company/page2/')
      m.internal_links.should == ['http://relativewithbase.com/about', 'http://relativewithbase.com/sitemap']
    end
  end

  describe 'Non-HTTP links' do
    before(:each) do
      @m = MetaInspector::Parser.new(doc 'http://example.com/nonhttp')
    end

    it "should get the links" do
      @m.links.sort.should == [
                                "ftp://ftp.cdrom.com/",
                                "javascript:alert('hey');",
                                "mailto:user@example.com",
                                "skype:joeuser?call",
                                "telnet://telnet.cdrom.com"
                              ]
    end
  end

  describe 'Protocol-relative URLs' do
    before(:each) do
      @m_http   = MetaInspector::Parser.new(doc 'http://protocol-relative.com')
      @m_https  = MetaInspector::Parser.new(doc 'https://protocol-relative.com')
    end

    it "should convert protocol-relative links to http" do
      @m_http.links.should include('http://protocol-relative.com/contact')
      @m_http.links.should include('http://yahoo.com/')
    end

    it "should convert protocol-relative links to https" do
      @m_https.links.should include('https://protocol-relative.com/contact')
      @m_https.links.should include('https://yahoo.com/')
    end
  end

  describe 'Getting meta tags by ghost methods' do
    before(:each) do
      @m = MetaInspector::Parser.new(doc 'http://pagerankalert.com')
    end

    it "should get the robots meta tag" do
      @m.meta_robots.should == 'all,follow'
    end

    it "should get the robots meta tag" do
      @m.meta_RoBoTs.should == 'all,follow'
    end

    it "should get the description meta tag" do
      @m.meta_description.should == 'Track your PageRank(TM) changes and receive alerts by email'
    end

    it "should get the keywords meta tag" do
      @m.meta_keywords.should == "pagerank, seo, optimization, google"
    end

    it "should get the content-language meta tag" do
      pending "mocks"
      @m.meta_content_language.should == "en"
    end

    it "should get the Csrf_pAram meta tag" do
      @m.meta_Csrf_pAram.should == "authenticity_token"
    end

    it "should return nil for nonfound meta_tags" do
      @m.meta_lollypop.should == nil
    end

    it "should get the generator meta tag" do
      @m = MetaInspector::Parser.new(doc 'http://www.inkthemes.com/')
      @m.meta_generator.should == 'WordPress 3.4.2'
    end

    it "should find a meta_og_title" do
      @m = MetaInspector::Parser.new(doc 'http://www.theonion.com/articles/apple-claims-new-iphone-only-visible-to-most-loyal,2772/')
      @m.meta_og_title.should == "Apple Claims New iPhone Only Visible To Most Loyal Of Customers"
    end

    it "should not find a meta_og_something" do
      @m = MetaInspector::Parser.new(doc 'http://www.theonion.com/articles/apple-claims-new-iphone-only-visible-to-most-loyal,2772/')
      @m.meta_og_something.should == nil
    end

    it "should find a meta_twitter_site" do
      @m = MetaInspector::Parser.new(doc 'http://www.youtube.com/watch?v=iaGSSrp49uc')
      @m.meta_twitter_site.should == "@youtube"
    end

    it "should find a meta_twitter_player_width" do
      @m = MetaInspector::Parser.new(doc 'http://www.youtube.com/watch?v=iaGSSrp49uc')
      @m.meta_twitter_player_width.should == "1920"
    end

    it "should not find a meta_twitter_dummy" do
      @m = MetaInspector::Parser.new(doc 'http://www.youtube.com/watch?v=iaGSSrp49uc')
      @m.meta_twitter_dummy.should == nil
    end

    it "should find a meta_og_video_width" do
      @m = MetaInspector::Parser.new(doc 'http://www.youtube.com/watch?v=iaGSSrp49uc')
      @m.meta_og_video_width.should == "1920"
    end
  end

  describe 'Charset detection' do
    it "should get the charset from <meta charset />" do
      @m = MetaInspector::Parser.new(doc 'http://charset001.com')
      @m.charset.should == "utf-8"
    end

    it "should get the charset from meta content type" do
      @m = MetaInspector::Parser.new(doc 'http://charset002.com')
      @m.charset.should == "windows-1252"
    end

    it "should get nil if no declared charset is found" do
      @m = MetaInspector::Parser.new(doc 'http://charset000.com')
      @m.charset.should == nil
    end
  end

  describe 'to_hash' do
    it "should return a hash with all the values set" do
      @m = MetaInspector::Parser.new(doc 'http://pagerankalert.com')
      @m.to_hash.should == { "meta" => { "name" => { "description" => "Track your PageRank(TM) changes and receive alerts by email",
                                                     "keywords"    => "pagerank, seo, optimization, google",
                                                     "robots"      => "all,follow",
                                                     "csrf_param"  => "authenticity_token",
                                                     "csrf_token"  => "iW1/w+R8zrtDkhOlivkLZ793BN04Kr3X/pS+ixObHsE="},
                                         "property"=>{}}}
    end
  end

  private

  def doc(url, options = {})
    MetaInspector::Document.new(url, options)
  end
end