lib/libri/scraper.rb in libri-0.2.2 vs lib/libri/scraper.rb in libri-0.2.3

- old
+ new

@@ -4,13 +4,10 @@ def self.scrape_barnes_noble html = "https://www.barnesandnoble.com/b/books/awards/_/N-29Z8q8Z1d6q" awards_page = Nokogiri::HTML(open(html)) - # awards.name = awards_page.css("ul#sidebar-section-0 li a").text - # awards.url = awards_page.css("ul#sidebar-section-0 li a").attribute("href").value - awards_array = [] awards = {} awards_page.css("ul#sidebar-section-0 li a").take(15).each { |award| awards = { @@ -36,11 +33,12 @@ :url => "https://www.barnesandnoble.com" + book.css("a").attribute("href").value } books_array << books } - books_array + + books_array.uniq end def self.scrape_book(book) html = book[:url] book_page = Nokogiri::HTML(open(html)) @@ -50,11 +48,11 @@ book_info_hash = { :title_by_author => info_section.css("div#productInfoOverview div.mb-m").text, :blurbs_and_plot => info_section.css("div#productInfoOverview p").map(&:text).join("\n").strip, :about_author => info_section.css("div#MeetTheAuthor div.text--medium").text.strip, - :excerpt => info_section.xpath("//div[@class='read-an-excerpt']/p[not(@class) and position()<7]").map(&:text).join("\n"), + :excerpt => info_section.xpath("//div[@class='read-an-excerpt']/p[not(@class) and position()<5]").map(&:text).join("\n"), # :related_books => book_page.css("div.product-shelf-info").each { |book| # related_books_hash = { # :title => book.css("div.product-shelf-title").text.strip, # :author => book.css("div.product-shelf-author").text.strip, # :url => "https://www.barnesandnoble.com" + book.css("a").attribute("href").value @@ -62,10 +60,10 @@ # }, :availability => book_page.css("button#pdp-marketplace-btn").text.chomp, :url => book[:url] } - # book_info_hash.delete_if { |key, val| val.to_s.strip.empty? } + book_info_hash.delete_if { |key, val| val.to_s.strip.empty? } end def self.scrape_quote html = "https://www.goodreads.com/quotes/tag/books"