Sha256: a68a7f3c0b59b32cfadfc79313d5d1f54c7ffb1e0c682298394120dfe21b9d5f

Contents?: true

Size: 1.6 KB

Versions: 2

Compression:

Stored size: 1.6 KB

Contents

class ComingSoon::Scraper

	def scrape_movies

		doc = Nokogiri::HTML(open("http://www.fandango.com/moviescomingsoon"))
		# name: doc.css("li.visual-item a.visual-title").text.strip
		# start_date: doc.css("li.visual-item span").text
		# url: doc.css("li.visual-item a").attribute("href").value

		movie_list = doc.css("li.visual-item")
		
		count = 0

		movie_list.each do |movie| 
			soon = ComingSoon::Movie.new
			soon.name = movie.css("a.visual-title").text.strip
			soon.start_date = movie.css("span").text
			soon.url = movie.css("a").attribute("href").value

			scrape_details(soon)

			count+=1
			if count > 19 # Scrapes only 20 movies
				break
			end
		end
		
	end

	def scrape_details(soon)
		
		redirect_failed = false

		begin
			doc_synop1 = Nokogiri::HTML(open(soon.url)) # Uses the HTTP 'movieoverview' url
		rescue
			redirect_failed = true # A HTTP to HTTPS redirect failed
		end

		if !redirect_failed
			if !doc_synop1.css("a.movie-synopsis-link").any? &&
				doc_synop1.css("span#SynopsisTextLabel").any?
				# If a READ FULL SYNOPSIS link is not present and any
				# text is available, use that text for the synopsis
				soon.synopsis = doc_synop1.css("span#SynopsisTextLabel").text
			else
				scrape_plotsummary(soon)
			end
		else
			scrape_plotsummary(soon)
		end

	end

	def scrape_plotsummary(soon)
		# Scrape the synopsis using the HTTP 'plotsummary' url
		# This is also executed after an HTTP to HTTPS redirect failed
		synop_url = soon.url.sub(/movieoverview/, 'plotsummary')
		doc_synop2 = Nokogiri::HTML(open(synop_url))
		soon.synopsis = doc_synop2.css("p.subpage-descriptive-content").text
		
	end

end

Version data entries

2 entries across 2 versions & 1 rubygems

Version Path
coming_soon-0.2.8 lib/coming_soon/scraper.rb
coming_soon-0.2.7 lib/coming_soon/scraper.rb