Sha256: f906aa5d2c9d6bb3fe6722269ebb60fdf627d345eac1941c53fdb35168f1b5a7

Contents?: true

Size: 1.19 KB

Versions: 3

Compression:

Stored size: 1.19 KB

Contents

class Top10BoxOffice::Scraper
  
  def get_page(url)
    Nokogiri::HTML(open('http://www.imdb.com' + url))
  end

  def get_movies
    doc = get_page('/chart/boxoffice')
    Top10BoxOffice::Movie.set_date(doc.css('div#boxoffice h4').text)

    doc.css('table.chart tbody tr').each do |movie|
      Top10BoxOffice::Movie.new({
        title: movie.css('td.titleColumn a').text,
        url:  movie.css('td.titleColumn a').attribute('href').value,
        weekend_total: movie.css('td.ratingColumn')[0].text.strip,
        gross: movie.css('span.secondaryInfo').text,
        weeks: movie.css('td.weeksColumn').text 
      })
    end
  end

  def get_movie_details(url)
    doc = get_page(url)

    {
      imdb_rating: doc.css('span[itemprop=ratingValue]').text + "/10",
      actors: doc.css('span[itemprop=actors]').collect { |actor| actor.css('a').text }.join(", "),
      director: doc.css('span[itemprop=director] a').text,
      genre: doc.css('div[itemprop=genre] a').collect { |genre| genre.text.strip }.join(", "),
      content_rating: doc.css('span[itemprop=contentRating]').text,
      runtime: doc.css('time[itemprop=duration]')[1].text,
      summary: doc.css('div.summary_text').text.strip
    }
  end

end

Version data entries

3 entries across 3 versions & 1 rubygems

Version Path
top_10_box_office-0.1.2 lib/top_10_box_office/scraper.rb
top_10_box_office-0.1.1 lib/top_10_box_office/scraper.rb
top_10_box_office-0.1.0 lib/top_10_box_office/scraper.rb