Sha256: 117a6f02d093d345f43c505258d99bab17832417f79bf6ab1bae9a8177b2cee7

Contents?: true

Size: 973 Bytes

Versions: 19

Compression:

Stored size: 973 Bytes

Contents

require 'open-uri'
require 'nokogiri'
require 'pry'

class Scraper

	def scrape_office_quotes(page)
		index_page = Nokogiri::HTML(open(page))
		line_string = ""
		index_page.css("div.quotes blockquote").each do |quote|
			character = quote.css("small").text.strip
			if character.length > 40
				character = ""
			end
			quote.css("p").each do |line|
				# Using .text will not push items with breaks right next to each other
				# Using .to_s on the nokogiri element allow me to identify <br>'s create space between each line within the <p> element
				line_string = line.to_s
				line_string.gsub!("<br>", " ") 
				line_string.gsub!("<p>", "") 
				line_string.gsub!("</p>", "") 
				line_string = line_string.strip
			end	
			Quote.new(line_string, character)
		end
	end

	def get_quote_pages
		216.times do |i|
		# 10.times do |i|
			page = "https://www.tvfanatic.com/quotes/shows/the-office/page-" + "#{i + 1}" + ".html"
			scrape_office_quotes(page)
		end
	end

end

Version data entries

19 entries across 19 versions & 1 rubygems

Version Path
the-office-quote-generator-0.1.9 lib/scraper.rb
the-office-quote-generator-0.1.8 lib/scraper.rb
the-office-quote-generator-0.1.7 lib/scraper.rb
the-office-quote-generator-0.1.6 lib/scraper.rb
the-office-quote-generator-0.1.5 lib/scraper.rb
the-office-quote-generator-0.1.4 lib/scraper.rb
the-office-quote-generator-0.1.3 lib/scraper.rb
the-office-quote-generator-0.1.2 lib/scraper.rb
the-office-quote-generator-0.1.1 lib/scraper.rb
the-office-quote-generator-0.1.0 lib/scraper.rb
the-office-quote-generator-0.0.9 lib/scraper.rb
the-office-quote-generator-0.0.8 lib/scraper.rb
the-office-quote-generator-0.0.7 lib/scraper.rb
the-office-quote-generator-0.0.6 lib/scraper.rb
the-office-quote-generator-0.0.5 lib/scraper.rb
the-office-quote-generator-0.0.4 lib/scraper.rb
the-office-quote-generator-0.0.3 lib/scraper.rb
the-office-quote-generator-0.0.2 lib/scraper.rb
the-office-quote-generator-0.0.1 lib/scraper.rb