Sha256: c95e0d44c3204274009c2275b5fb4c34cf42961761a745ef35cd0803b15583ca
Contents?: true
Size: 1.54 KB
Versions: 1
Compression:
Stored size: 1.54 KB
Contents
class ComingSoon::Scraper def self.scrape_movies doc = Nokogiri::HTML(open("http://www.fandango.com/moviescomingsoon")) # name: doc.css("li.visual-item a.visual-title").text.strip # start_date: doc.css("li.visual-item span").text # url: doc.css("li.visual-item a").attribute("href").value movie_list = doc.css("li.visual-item") count = 0 movie_list.each do |movie| soon = ComingSoon::Movie.new soon.name = movie.css("a.visual-title").text.strip soon.start_date = movie.css("span").text soon.url = movie.css("a").attribute("href").value self.scrape_synopsis(soon) count+=1 if count > 19 # Displays only 20 movies break end end end def self.scrape_synopsis(soon) redirect_failed = false begin @doc_synop1 = Nokogiri::HTML(open(soon.url)) # Uses the HTTP 'movieoverview' url rescue redirect_failed = true # An HTTP to HTTPS redirect failed end if !@doc_synop1.css("a.movie-synopsis-link").any? && !redirect_failed && @doc_synop1.css("span#SynopsisTextLabel").any? # If not a redirect failure and a READ FULL SYNOPSIS link is not # present and any text is available, use the text for the synopsis soon.synopsis = @doc_synop1.css("span#SynopsisTextLabel").text else # Scrape the synopsis using the HTTP 'plotsummary' url # This is also executed after an HTTP to HTTPS redirect failed synop_url = soon.url.sub(/movieoverview/, 'plotsummary') doc_synop2 = Nokogiri::HTML(open(synop_url)) soon.synopsis = doc_synop2.css("p.subpage-descriptive-content").text end end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
coming_soon-0.2.6 | lib/coming_soon/scraper.rb |