Sha256: 092834071746984d5ebe6c06d66d260ab82e7772afc66d9f69c3ad61afea7e79

Contents?: true

Size: 1.69 KB

Versions: 3

Compression:

Stored size: 1.69 KB

Contents

require 'nokogiri'
require 'open-uri'
require 'pry'

module WhatsOnNetflix
    class Scraper
    
      def self.get_html(url)
        Nokogiri::HTML(open(url))
      end
    
      def self.scrape_title_list(url)
        html = self.get_html(url)
        
        titles = []
        
        html.css("h4 + ul li").each do |title|
            titles << title.text
        end
        
        titles
      end
    


      def self.scrape_imdb_info(name)

        search_page = Nokogiri::HTML(open("http://www.imdb.com/find?s=tt&q=" + URI.escape(name)))
      
        movie_page = Nokogiri::HTML(open("http://www.imdb.com" + "#{search_page.css("td a").attribute("href").value}"))
        
        info = {}
        
        info[:plot] = movie_page.css("div.summary_text").text.strip
        info[:genre] = ""
        info[:stars] = ""
        info[:year] = ""
        
        # getting info[:genre] into a readable format
        
        movie_page.css('span[itemprop="genre"]').each do |genre|
            info[:genre].concat("| #{genre.text} |")
        end
        
        # getting info[:stars] into a readable format
        
        movie_page.css('span[itemprop="actors"]').each do |actor|
          info[:stars].concat("#{actor.text.strip} ")
        end
        
        # getting info[:year] - TV show and movie pages are formatted a little differently
        
        if movie_page.css('a[title="See more release dates"]').text.include?("TV Series")
          info[:year] = movie_page.css('a[title="See more release dates"]').text
        else
          info[:year] = movie_page.css('span#titleYear').text.strip.gsub("(", "").gsub(")", "")
        end
        
        info
      end
      
        
    end
end

Version data entries

3 entries across 3 versions & 1 rubygems

Version Path
whats-on-netflix-0.1.3 lib/whats_on_netflix/scraper.rb
whats-on-netflix-0.1.2 lib/whats_on_netflix/scraper.rb
whats-on-netflix-0.1.1 lib/whats_on_netflix/scraper.rb