lib/libri/scraper.rb in libri-0.2.5 vs lib/libri/scraper.rb in libri-0.2.6
- old
+ new
@@ -13,14 +13,16 @@
awards = {
:name => award.text.chomp,
:url => "https://www.barnesandnoble.com" + award.attribute("href").value
}
}
+
+ Libri::Awards.create_from_collection(awards_array)
end
def scrape_award(award)
- html = award[:url]
+ html = award.url
books_page = Nokogiri::HTML(open(html))
books = {}
books_array = books_page.css("div.product-shelf-info").take(20).map { |book|
@@ -28,25 +30,30 @@
:title => book.css("div.product-shelf-title").text.strip,
:author => book.css("div.product-shelf-author").text.strip,
:url => "https://www.barnesandnoble.com" + book.css("a").attribute("href").value
}
}.uniq
+
+ Libri::Books.create_from_collection(books_array)
end
def scrape_book(book)
- html = book[:url]
+ html = book.url
book_page = Nokogiri::HTML(open(html))
info_section = book_page.css("div.tabpanel")
book_info_hash = {
:title_by_author => info_section.css("div#productInfoOverview div.mb-m").text,
:blurbs_and_plot => info_section.css("div#productInfoOverview p").map(&:text).join("\n").strip,
:about_author => info_section.css("div#MeetTheAuthor div.text--medium").text.strip,
- :excerpt => info_section.xpath("//div[@class='read-an-excerpt']/p[not(@class) and position()<3]").map(&:text).join("\n"),
+ :excerpt => info_section.css("div.read-an-excerpt p").text,
+ # info_section.xpath("//div[@class='read-an-excerpt']/p[not(@class) and position()<3]").map(&:text).join("\n"),
:availability => book_page.css("button#pdp-marketplace-btn").text.chomp,
- :url => book[:url]
+ :url => book.url
}.delete_if { |key, val| val.to_s.strip.empty? }
+
+ Libri::Book.create_from_collection(book_info_hash)
end
def scrape_quote
html = "https://www.goodreads.com/quotes/tag/books"
quotes_page = Nokogiri::HTML(open(html))
@@ -58,8 +65,10 @@
quote_hash = {
:quote => quote.css("div.quoteText").first.text.scan(/(“.+”)/).join(""),
:author => quote.css("div.quoteText a").first.text
}
}
+
+ Libri::Quote.create_from_collection(quotes_array)
end
end