lib/onebox/engine/standard_embed.rb in onebox-1.5.50 vs lib/onebox/engine/standard_embed.rb in onebox-1.5.60

- old
+ new

@@ -4,123 +4,100 @@ def self.oembed_providers @@oembed_providers ||= {} end - def self.opengraph_providers - @@opengraph_providers ||= Array.new - end - def self.add_oembed_provider(regexp, endpoint) oembed_providers[regexp] = endpoint end + def self.opengraph_providers + @@opengraph_providers ||= [] + end + def self.add_opengraph_provider(regexp) - opengraph_providers.push(regexp) + opengraph_providers << regexp end # Some oembed providers (like meetup.com) don't provide links to themselves - add_oembed_provider /(.*\.)?gfycat\.com\//, 'http://gfycat.com/cajax/oembed' - add_oembed_provider /www\.kickstarter\.com\//, 'https://www.kickstarter.com/services/oembed' - add_oembed_provider /www\.meetup\.com\//, 'http://api.meetup.com/oembed' - add_oembed_provider /www\.ted\.com\//, 'http://www.ted.com/services/v1/oembed.json' - add_oembed_provider /(.*\.)?vimeo\.com\//, 'http://vimeo.com/api/oembed.json' + add_oembed_provider(/www\.meetup\.com\//, 'http://api.meetup.com/oembed') - # Sites that work better with OpenGraph - add_opengraph_provider /nytimes\.com\// - def always_https? WhitelistedGenericOnebox.host_matches(uri, WhitelistedGenericOnebox.https_hosts) end def raw return @raw if @raw - StandardEmbed.oembed_providers.each do |regexp, endpoint| - if url =~ regexp - fetch_oembed_raw("#{endpoint}?url=#{url}") - return @raw if @raw - end + og = get_opengraph + twitter = get_twitter + oembed = get_oembed + + @raw = {} + + og.each { |k, v| @raw[k] ||= v unless Onebox::Helpers::blank?(v) } + twitter.each { |k, v| @raw[k] ||= v unless Onebox::Helpers::blank?(v) } + oembed.each { |k, v| @raw[k] ||= v unless Onebox::Helpers::blank?(v) } + + @raw + end + + private + + def html_doc + return @html_doc if @html_doc + response = Onebox::Helpers.fetch_response(url) + @html_doc = Nokogiri::HTML(response.body) end - response = Onebox::Helpers.fetch_response(url) - html_doc = Nokogiri::HTML(response.body) + def get_oembed + application_json = html_doc.at("//link[@type='application/json+oembed']/@href") + oembed_url = application_json.value if application_json - StandardEmbed.opengraph_providers.each do |regexp| - if url =~ regexp - @raw = parse_open_graph(html_doc, url) - return @raw if @raw + text_json = html_doc.at("//link[@type='text/json+oembed']/@href") + oembed_url ||= text_json.value if text_json + + if Onebox::Helpers.blank?(oembed_url) + StandardEmbed.oembed_providers.each do |regexp, endpoint| + if url[regexp] + oembed_url = "#{endpoint}?url=#{url}" + break + end + end end - end - # Determine if we should use oEmbed or OpenGraph (prefers oEmbed) - oembed_alternate = html_doc.at("//link[@type='application/json+oembed']") || html_doc.at("//link[@type='text/json+oembed']") - # Do not use oEmbed for WordPress sites (https://meta.discourse.org/t/onebox-for-wordpress-4-4-sites/36765) - fetch_oembed_raw(oembed_alternate) unless oembed_alternate.nil? || oembed_alternate['href'] =~ /public-api.wordpress.com\/oembed/ || oembed_alternate['href'] =~ /wp-json\/oembed/ + return {} if Onebox::Helpers.blank?(oembed_url) - open_graph = parse_open_graph(html_doc, url) - if @raw - @raw[:image] = open_graph.images.first if @raw[:image].nil? && open_graph && open_graph.images - return @raw + Onebox::Helpers.symbolize_keys(::MultiJson.load(Onebox::Helpers.fetch_response(oembed_url).body)) + rescue Errno::ECONNREFUSED, Net::HTTPError, MultiJson::LoadError + {} end - @raw = open_graph - end + def get_opengraph + og = {} - private - - def fetch_oembed_raw(oembed_url) - return unless oembed_url - oembed_url = oembed_url['href'] unless oembed_url['href'].nil? - oembed_data = Onebox::Helpers.symbolize_keys(::MultiJson.load(Onebox::Helpers.fetch_response(oembed_url).body)) - @raw = - if oembed_data[:html] && oembed_data[:html].bytesize > 4000 - # fallback to OpenGraph if oEmbed data size is more than 4000 bytes - nil - else - oembed_data + html_doc.css('meta').each do |m| + if m["property"] && m["property"][/^og:(.+)$/i] + value = m["content"].to_s + og[$1.tr('-:','_').to_sym] ||= value unless Onebox::Helpers::blank?(value) + end end - rescue Errno::ECONNREFUSED, Net::HTTPError, MultiJson::LoadError - @raw = nil - end - def parse_open_graph(html, og_url) - og = Struct.new(:url, :type, :title, :description, :images, :metadata, :html).new - og.url = og_url - og.images = [] - og.metadata = {} + og + end - attrs_list = %w(title url type description) - html.css('meta').each do |m| - if m.attribute('property') && m.attribute('property').to_s.match(/^og:/i) - # og properties - m_content = m.attribute('content').to_s.strip - m_name = m.attribute('property').to_s.gsub('og:', '') - og.metadata[m_name.to_sym] ||= [] - og.metadata[m_name.to_sym].push m_content - if m_name == "image" - image_uri = URI.parse(m_content) rescue nil - if image_uri - if image_uri.host.nil? - image_uri.host = URI.parse(og_url).host - end - og.images.push image_uri.to_s - end - elsif attrs_list.include? m_name - og.send("#{m_name}=", m_content) unless m_content.empty? + def get_twitter + twitter = {} + + html_doc.css('meta').each do |m| + if m["name"] && m["name"][/^twitter:(.+)$/i] + value = (m["content"] || m["value"]).to_s + twitter[$1.tr('-:','_').to_sym] ||= value unless Onebox::Helpers::blank?(value) end end - if m.attribute('name') && m.attribute('name').to_s.match(/^twitter:/i) - # twitter properties - m_content = m.attribute('content').to_s.strip if m.attribute('content') - m_content = m.attribute('value').to_s.strip if m.attribute('value') - m_name = m.attribute('name').to_s - og.metadata[m_name.to_sym] ||= [] - og.metadata[m_name.to_sym].push m_content - end + + twitter end - og - end end end end