lib/onebox/engine/amazon_onebox.rb in onebox-2.2.8 vs lib/onebox/engine/amazon_onebox.rb in onebox-2.2.9

- old
+ new

@@ -9,28 +9,38 @@ include Engine include LayoutSupport include HTML always_https - matches_regexp(/^https?:\/\/(?:www\.)?(?:smile\.)?(amazon|amzn)\.(?<tld>com|ca|de|it|es|fr|co\.jp|co\.uk|cn|in|com\.br|com\.mx)\//) + matches_regexp(/^https?:\/\/(?:www\.)?(?:smile\.)?(amazon|amzn)\.(?<tld>com|ca|de|it|es|fr|co\.jp|co\.uk|cn|in|com\.br|com\.mx|nl|pl|sa|sg|se|com\.tr|ae)\//) def url + # Have we cached the HTML body of the requested URL? + # If so, try to grab the canonical URL from that document, + # rather than guess at the best URL structure to use + if @body_cacher && @body_cacher.respond_to?('cache_response_body?') + if @body_cacher.cached_response_body_exists?(uri.to_s) + @raw ||= Onebox::Helpers.fetch_html_doc(@url, http_params, @body_cacher) + canonical_link = @raw.at('//link[@rel="canonical"]/@href') + return canonical_link.to_s if canonical_link + end + end + if match && match[:id] - return "https://www.amazon.#{tld}/gp/aw/d/#{Onebox::Helpers.uri_encode(match[:id])}" + return "https://www.amazon.#{tld}/dp/#{Onebox::Helpers.uri_encode(match[:id])}" end @url end def tld @tld || @@matcher.match(@url)["tld"] end def http_params - { - 'User-Agent' => - 'Mozilla/5.0 (iPhone; CPU iPhone OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148' - } + if @options && @options[:user_agent] + { 'User-Agent' => @options[:user_agent] } + end end private def match