Sha256: ed79171aaea019548e7b71de33be0da611b1e438d8581f34b21d5b4d07805e12

Contents?: true

Size: 1.57 KB

Versions: 3

Compression:

Stored size: 1.57 KB

Contents

require 'nokogiri'
require 'open-uri'
require 'pry'

module WhatsOnNetflix
    class Scraper

      def self.get_html(url)
        Nokogiri::HTML(open(url))
      end

      def self.scrape_title_list(url)
        html = self.get_html(url)

        titles = []

        html.css("h4 + ul li").each do |title|
            titles << title.text
        end

        titles
      end



      def self.scrape_imdb_info(name)

        search_page = Nokogiri::HTML(open("http://www.imdb.com/find?s=tt&q=" + URI.escape(name)))
        
        movie_page = Nokogiri::HTML(open("http://www.imdb.com" + "#{search_page.css("td a").attribute("href").value}"))

        info = {}

        info[:plot] = movie_page.css("div.summary_text").text.strip
        info[:genre] = ""
        info[:stars] = ""
        info[:year] = ""

        # getting info[:genre] into a readable format

        movie_page.css('span[itemprop="genre"]').each do |genre|
            info[:genre].concat("| #{genre.text} |")
        end

        # getting info[:stars] into a readable format

        movie_page.css('span[itemprop="actors"]').each do |actor|
          info[:stars].concat("#{actor.text.strip} ")
        end

        # getting info[:year] - TV show and movie pages are formatted a little differently

        if movie_page.css('a[title="See more release dates"]').text.include?("TV Series")
          info[:year] = movie_page.css('a[title="See more release dates"]').text
        else
          info[:year] = movie_page.css('span#titleYear').text.strip.gsub("(", "").gsub(")", "")
        end

        info
      end


    end
end

Version data entries

3 entries across 3 versions & 1 rubygems

Version Path
whats-on-netflix-0.1.7 lib/whats_on_netflix/scraper.rb
whats-on-netflix-0.1.5 lib/whats_on_netflix/scraper.rb
whats-on-netflix-0.1.4 lib/whats_on_netflix/scraper.rb