lib/onebox/engine/standard_embed.rb in onebox-1.5.50 vs lib/onebox/engine/standard_embed.rb in onebox-1.5.60
- old
+ new
@@ -4,123 +4,100 @@
def self.oembed_providers
@@oembed_providers ||= {}
end
- def self.opengraph_providers
- @@opengraph_providers ||= Array.new
- end
-
def self.add_oembed_provider(regexp, endpoint)
oembed_providers[regexp] = endpoint
end
+ def self.opengraph_providers
+ @@opengraph_providers ||= []
+ end
+
def self.add_opengraph_provider(regexp)
- opengraph_providers.push(regexp)
+ opengraph_providers << regexp
end
# Some oembed providers (like meetup.com) don't provide links to themselves
- add_oembed_provider /(.*\.)?gfycat\.com\//, 'http://gfycat.com/cajax/oembed'
- add_oembed_provider /www\.kickstarter\.com\//, 'https://www.kickstarter.com/services/oembed'
- add_oembed_provider /www\.meetup\.com\//, 'http://api.meetup.com/oembed'
- add_oembed_provider /www\.ted\.com\//, 'http://www.ted.com/services/v1/oembed.json'
- add_oembed_provider /(.*\.)?vimeo\.com\//, 'http://vimeo.com/api/oembed.json'
+ add_oembed_provider(/www\.meetup\.com\//, 'http://api.meetup.com/oembed')
- # Sites that work better with OpenGraph
- add_opengraph_provider /nytimes\.com\//
-
def always_https?
WhitelistedGenericOnebox.host_matches(uri, WhitelistedGenericOnebox.https_hosts)
end
def raw
return @raw if @raw
- StandardEmbed.oembed_providers.each do |regexp, endpoint|
- if url =~ regexp
- fetch_oembed_raw("#{endpoint}?url=#{url}")
- return @raw if @raw
- end
+ og = get_opengraph
+ twitter = get_twitter
+ oembed = get_oembed
+
+ @raw = {}
+
+ og.each { |k, v| @raw[k] ||= v unless Onebox::Helpers::blank?(v) }
+ twitter.each { |k, v| @raw[k] ||= v unless Onebox::Helpers::blank?(v) }
+ oembed.each { |k, v| @raw[k] ||= v unless Onebox::Helpers::blank?(v) }
+
+ @raw
+ end
+
+ private
+
+ def html_doc
+ return @html_doc if @html_doc
+ response = Onebox::Helpers.fetch_response(url)
+ @html_doc = Nokogiri::HTML(response.body)
end
- response = Onebox::Helpers.fetch_response(url)
- html_doc = Nokogiri::HTML(response.body)
+ def get_oembed
+ application_json = html_doc.at("//link[@type='application/json+oembed']/@href")
+ oembed_url = application_json.value if application_json
- StandardEmbed.opengraph_providers.each do |regexp|
- if url =~ regexp
- @raw = parse_open_graph(html_doc, url)
- return @raw if @raw
+ text_json = html_doc.at("//link[@type='text/json+oembed']/@href")
+ oembed_url ||= text_json.value if text_json
+
+ if Onebox::Helpers.blank?(oembed_url)
+ StandardEmbed.oembed_providers.each do |regexp, endpoint|
+ if url[regexp]
+ oembed_url = "#{endpoint}?url=#{url}"
+ break
+ end
+ end
end
- end
- # Determine if we should use oEmbed or OpenGraph (prefers oEmbed)
- oembed_alternate = html_doc.at("//link[@type='application/json+oembed']") || html_doc.at("//link[@type='text/json+oembed']")
- # Do not use oEmbed for WordPress sites (https://meta.discourse.org/t/onebox-for-wordpress-4-4-sites/36765)
- fetch_oembed_raw(oembed_alternate) unless oembed_alternate.nil? || oembed_alternate['href'] =~ /public-api.wordpress.com\/oembed/ || oembed_alternate['href'] =~ /wp-json\/oembed/
+ return {} if Onebox::Helpers.blank?(oembed_url)
- open_graph = parse_open_graph(html_doc, url)
- if @raw
- @raw[:image] = open_graph.images.first if @raw[:image].nil? && open_graph && open_graph.images
- return @raw
+ Onebox::Helpers.symbolize_keys(::MultiJson.load(Onebox::Helpers.fetch_response(oembed_url).body))
+ rescue Errno::ECONNREFUSED, Net::HTTPError, MultiJson::LoadError
+ {}
end
- @raw = open_graph
- end
+ def get_opengraph
+ og = {}
- private
-
- def fetch_oembed_raw(oembed_url)
- return unless oembed_url
- oembed_url = oembed_url['href'] unless oembed_url['href'].nil?
- oembed_data = Onebox::Helpers.symbolize_keys(::MultiJson.load(Onebox::Helpers.fetch_response(oembed_url).body))
- @raw =
- if oembed_data[:html] && oembed_data[:html].bytesize > 4000
- # fallback to OpenGraph if oEmbed data size is more than 4000 bytes
- nil
- else
- oembed_data
+ html_doc.css('meta').each do |m|
+ if m["property"] && m["property"][/^og:(.+)$/i]
+ value = m["content"].to_s
+ og[$1.tr('-:','_').to_sym] ||= value unless Onebox::Helpers::blank?(value)
+ end
end
- rescue Errno::ECONNREFUSED, Net::HTTPError, MultiJson::LoadError
- @raw = nil
- end
- def parse_open_graph(html, og_url)
- og = Struct.new(:url, :type, :title, :description, :images, :metadata, :html).new
- og.url = og_url
- og.images = []
- og.metadata = {}
+ og
+ end
- attrs_list = %w(title url type description)
- html.css('meta').each do |m|
- if m.attribute('property') && m.attribute('property').to_s.match(/^og:/i)
- # og properties
- m_content = m.attribute('content').to_s.strip
- m_name = m.attribute('property').to_s.gsub('og:', '')
- og.metadata[m_name.to_sym] ||= []
- og.metadata[m_name.to_sym].push m_content
- if m_name == "image"
- image_uri = URI.parse(m_content) rescue nil
- if image_uri
- if image_uri.host.nil?
- image_uri.host = URI.parse(og_url).host
- end
- og.images.push image_uri.to_s
- end
- elsif attrs_list.include? m_name
- og.send("#{m_name}=", m_content) unless m_content.empty?
+ def get_twitter
+ twitter = {}
+
+ html_doc.css('meta').each do |m|
+ if m["name"] && m["name"][/^twitter:(.+)$/i]
+ value = (m["content"] || m["value"]).to_s
+ twitter[$1.tr('-:','_').to_sym] ||= value unless Onebox::Helpers::blank?(value)
end
end
- if m.attribute('name') && m.attribute('name').to_s.match(/^twitter:/i)
- # twitter properties
- m_content = m.attribute('content').to_s.strip if m.attribute('content')
- m_content = m.attribute('value').to_s.strip if m.attribute('value')
- m_name = m.attribute('name').to_s
- og.metadata[m_name.to_sym] ||= []
- og.metadata[m_name.to_sym].push m_content
- end
+
+ twitter
end
- og
- end
end
end
end