lib/libri/scraper.rb in libri-0.2.2 vs lib/libri/scraper.rb in libri-0.2.3
- old
+ new
@@ -4,13 +4,10 @@
def self.scrape_barnes_noble
html = "https://www.barnesandnoble.com/b/books/awards/_/N-29Z8q8Z1d6q"
awards_page = Nokogiri::HTML(open(html))
- # awards.name = awards_page.css("ul#sidebar-section-0 li a").text
- # awards.url = awards_page.css("ul#sidebar-section-0 li a").attribute("href").value
-
awards_array = []
awards = {}
awards_page.css("ul#sidebar-section-0 li a").take(15).each { |award|
awards = {
@@ -36,11 +33,12 @@
:url => "https://www.barnesandnoble.com" + book.css("a").attribute("href").value
}
books_array << books
}
- books_array
+
+ books_array.uniq
end
def self.scrape_book(book)
html = book[:url]
book_page = Nokogiri::HTML(open(html))
@@ -50,11 +48,11 @@
book_info_hash = {
:title_by_author => info_section.css("div#productInfoOverview div.mb-m").text,
:blurbs_and_plot => info_section.css("div#productInfoOverview p").map(&:text).join("\n").strip,
:about_author => info_section.css("div#MeetTheAuthor div.text--medium").text.strip,
- :excerpt => info_section.xpath("//div[@class='read-an-excerpt']/p[not(@class) and position()<7]").map(&:text).join("\n"),
+ :excerpt => info_section.xpath("//div[@class='read-an-excerpt']/p[not(@class) and position()<5]").map(&:text).join("\n"),
# :related_books => book_page.css("div.product-shelf-info").each { |book|
# related_books_hash = {
# :title => book.css("div.product-shelf-title").text.strip,
# :author => book.css("div.product-shelf-author").text.strip,
# :url => "https://www.barnesandnoble.com" + book.css("a").attribute("href").value
@@ -62,10 +60,10 @@
# },
:availability => book_page.css("button#pdp-marketplace-btn").text.chomp,
:url => book[:url]
}
- # book_info_hash.delete_if { |key, val| val.to_s.strip.empty? }
+ book_info_hash.delete_if { |key, val| val.to_s.strip.empty? }
end
def self.scrape_quote
html = "https://www.goodreads.com/quotes/tag/books"