lib/libri/scraper.rb in libri-0.2.4 vs lib/libri/scraper.rb in libri-0.2.5

- old
+ new

@@ -1,88 +1,65 @@ -class Scraper +class Libri::Scraper attr_accessor :url, :award, :book - def self.scrape_barnes_noble + def scrape_barnes_noble html = "https://www.barnesandnoble.com/b/books/awards/_/N-8q8Z1d6q?showMoreIds=10008" awards_page = Nokogiri::HTML(open(html)) - awards_array = [] awards = {} - awards_page.css("ul#sidebar-section-0 li a").take(28).each { |award| + # Chosen .take(28) because without it, our awards list will include a 'Show Less' + awards_array = awards_page.css("ul#sidebar-section-0 li a").take(28).map { |award| awards = { :name => award.text.chomp, :url => "https://www.barnesandnoble.com" + award.attribute("href").value } - awards_array << awards } - awards_array end - def self.scrape_award(award) + def scrape_award(award) html = award[:url] books_page = Nokogiri::HTML(open(html)) - books_array = [] books = {} - books_page.css("div.product-shelf-info").take(20).each { |book| + books_array = books_page.css("div.product-shelf-info").take(20).map { |book| books = { :title => book.css("div.product-shelf-title").text.strip, :author => book.css("div.product-shelf-author").text.strip, :url => "https://www.barnesandnoble.com" + book.css("a").attribute("href").value } - - books_array << books - } - - books_array.uniq + }.uniq end - def self.scrape_book(book) + def scrape_book(book) html = book[:url] book_page = Nokogiri::HTML(open(html)) info_section = book_page.css("div.tabpanel") - # related_books_hash = {} - book_info_hash = { :title_by_author => info_section.css("div#productInfoOverview div.mb-m").text, :blurbs_and_plot => info_section.css("div#productInfoOverview p").map(&:text).join("\n").strip, :about_author => info_section.css("div#MeetTheAuthor div.text--medium").text.strip, - :excerpt => info_section.xpath("//div[@class='read-an-excerpt']/p[not(@class) and position()<5]").map(&:text).join("\n"), - # :related_books => book_page.css("div.product-shelf-info").each { |book| - # related_books_hash = { - # :title => book.css("div.product-shelf-title").text.strip, - # :author => book.css("div.product-shelf-author").text.strip, - # :url => "https://www.barnesandnoble.com" + book.css("a").attribute("href").value - # } - # }, + :excerpt => info_section.xpath("//div[@class='read-an-excerpt']/p[not(@class) and position()<3]").map(&:text).join("\n"), :availability => book_page.css("button#pdp-marketplace-btn").text.chomp, :url => book[:url] - } - - book_info_hash.delete_if { |key, val| val.to_s.strip.empty? } - + }.delete_if { |key, val| val.to_s.strip.empty? } end - def self.scrape_quote + def scrape_quote html = "https://www.goodreads.com/quotes/tag/books" quotes_page = Nokogiri::HTML(open(html)) quote_section = quotes_page.css("div.quote") - quotes_array = [] quote_hash = {} - quote_section.each { |quote| + quotes_array = quote_section.map { |quote| quote_hash = { :quote => quote.css("div.quoteText").first.text.scan(/(“.+”)/).join(""), :author => quote.css("div.quoteText a").first.text } - - quotes_array << quote_hash } - quotes_array end end