lib/onebox/engine/amazon_onebox.rb in onebox-2.2.14 vs lib/onebox/engine/amazon_onebox.rb in onebox-2.2.15
- old
+ new
@@ -17,23 +17,22 @@
@raw ||= nil
# If possible, fetch the cached HTML body immediately so we can
# try to grab the canonical URL from that document,
# rather than guess at the best URL structure to use
- if body_cacher&.respond_to?('cache_response_body?')
- if body_cacher.cache_response_body?(uri.to_s) && body_cacher.cached_response_body_exists?(uri.to_s)
- @raw ||= Onebox::Helpers.fetch_html_doc(@url, http_params, body_cacher)
- end
+ if !@raw && has_cached_body
+ @raw = Onebox::Helpers.fetch_html_doc(@url, http_params, body_cacher)
end
if @raw
canonical_link = @raw.at('//link[@rel="canonical"]/@href')
return canonical_link.to_s if canonical_link
end
if match && match[:id]
- return "https://www.amazon.#{tld}/dp/#{Onebox::Helpers.uri_encode(match[:id])}"
+ id = Addressable::URI.encode_component(match[:id], Addressable::URI::CharacterClasses::PATH)
+ return "https://www.amazon.#{tld}/dp/#{id}"
end
@url
end
@@ -47,31 +46,39 @@
end
end
private
+ def has_cached_body
+ body_cacher&.respond_to?('cache_response_body?') &&
+ body_cacher.cache_response_body?(uri.to_s) &&
+ body_cacher.cached_response_body_exists?(uri.to_s)
+ end
+
def match
@match ||= @url.match(/(?:d|g)p\/(?:product\/|video\/detail\/)?(?<id>[A-Z0-9]+)(?:\/|\?|$)/mi)
end
def image
if (main_image = raw.css("#main-image")) && main_image.any?
attributes = main_image.first.attributes
- return attributes["data-a-hires"].to_s if attributes["data-a-hires"]
-
- if attributes["data-a-dynamic-image"]
+ if attributes["data-a-hires"]
+ return attributes["data-a-hires"].to_s
+ elsif attributes["data-a-dynamic-image"]
return ::JSON.parse(attributes["data-a-dynamic-image"].value).keys.first
end
end
if (landing_image = raw.css("#landingImage")) && landing_image.any?
attributes = landing_image.first.attributes
- return attributes["data-old-hires"].to_s if attributes["data-old-hires"]
-
- landing_image.first["src"].to_s
+ if attributes["data-old-hires"]
+ return attributes["data-old-hires"].to_s
+ else
+ return landing_image.first["src"].to_s
+ end
end
if (ebook_image = raw.css("#ebooksImgBlkFront")) && ebook_image.any?
::JSON.parse(ebook_image.first.attributes["data-a-dynamic-image"].value).keys.first
end
@@ -89,19 +96,19 @@
raw.css(".mediaMatrixListItem.a-active .a-color-price").inner_text
end
end
def multiple_authors(authors_xpath)
- author_list = raw.xpath(authors_xpath)
- authors = []
- author_list.each { |a| authors << a.inner_text.strip }
- authors.join(", ")
+ raw
+ .xpath(authors_xpath)
+ .map { |a| a.inner_text.strip }
+ .join(", ")
end
def data
og = ::Onebox::OpenGraph.new(raw)
- if raw.at_css('#dp.book_mobile') #printed books
+ if raw.at_css('#dp.book_mobile') # printed books
title = raw.at("h1#title")&.inner_text
authors = raw.at_css('#byline_secondary_view_div') ? multiple_authors("//div[@id='byline_secondary_view_div']//span[@class='a-text-bold']") : raw.at("#byline")&.inner_text
rating = raw.at("#averageCustomerReviews_feature_div .a-icon")&.inner_text || raw.at("#cmrsArcLink .a-icon")&.inner_text
table_xpath = "//div[@id='productDetails_secondary_view_div']//table[@id='productDetails_techSpec_section_1']"