require 'htmlentities' module Onebox module Engine class WhitelistedGenericOnebox include Engine include StandardEmbed include LayoutSupport def self.whitelist=(list) @whitelist = list end def self.whitelist @whitelist ||= default_whitelist.dup end def self.default_whitelist %w(23hq.com 500px.com 8tracks.com about.com answers.com arstechnica.com ask.com battle.net bbc.co.uk bbs.boingboing.net bestbuy.ca bestbuy.com blip.tv bloomberg.com businessinsider.com clikthrough.com cnet.com cnn.com collegehumor.com coursera.org codepen.io cracked.com dailymail.co.uk dailymotion.com deadline.com dell.com deviantart.com digg.com dotsub.com ebay.ca ebay.co.uk ebay.com ehow.com espn.go.com etsy.com findery.com flickr.com folksy.com forbes.com foxnews.com funnyordie.com gfycat.com groupon.com howtogeek.com huffingtonpost.com huffingtonpost.ca hulu.com ign.com ikea.com imdb.com indiatimes.com instagr.am instagram.com itunes.apple.com khanacademy.org kickstarter.com kinomap.com liveleak.com livestream.com lessonplanet.com mashable.com medium.com meetup.com mixcloud.com mlb.com myspace.com nba.com nytimes.com npr.org photobucket.com pinterest.com reference.com revision3.com rottentomatoes.com samsung.com screenr.com scribd.com slideshare.net sourceforge.net speakerdeck.com spotify.com squidoo.com techcrunch.com ted.com thefreedictionary.com theglobeandmail.com thenextweb.com theonion.com thestar.com thesun.co.uk thinkgeek.com tmz.com torontosun.com tumblr.com twitch.tv twitpic.com usatoday.com viddler.com videojug.com vimeo.com vine.co walmart.com washingtonpost.com wikia.com wikihow.com wired.com wistia.com wi.st wonderhowto.com wsj.com zappos.com zillow.com) end # Often using the `html` attribute is not what we want, like for some blogs that # include the entire page HTML. However for some providers like Flickr it allows us # to return gifv and galleries. def self.default_html_providers ['Flickr', 'Meetup'] end def self.html_providers @html_providers ||= default_html_providers.dup end def self.html_providers=(new_provs) @html_providers = new_provs end # A re-written URL coverts http:// -> https:// def self.rewrites @rewrites ||= https_hosts.dup end def self.rewrites=(new_list) @rewrites = new_list end def self.https_hosts %w(slideshare.net dailymotion.com livestream.com) end def self.host_matches(uri, list) !!list.find {|h| %r((^|\.)#{Regexp.escape(h)}$).match(uri.host) } end def self.probable_discourse(uri) !!(uri.path =~ /\/t\/[^\/]+\/\d+(\/\d+)?(\?.*)?$/) end def self.probable_wordpress(uri) !!(uri.path =~ /\d{4}\/\d{2}\//) end def self.===(other) if other.kind_of?(URI) return WhitelistedGenericOnebox.host_matches(other, WhitelistedGenericOnebox.whitelist) || WhitelistedGenericOnebox.probable_wordpress(other) || WhitelistedGenericOnebox.probable_discourse(other) else super end end # Generates the HTML for the embedded content def photo_type? ( (data[:type] =~ /photo/ || data[:type] =~ /image/) && data[:type] !~ /photostream/ ) end def article_type? data[:type] == "article" end def rewrite_https(html) return html unless html uri = URI(@url) if WhitelistedGenericOnebox.host_matches(uri, WhitelistedGenericOnebox.rewrites) html.gsub!(/http:\/\//, 'https://') end html end def html_type? return data && data[:html] && ( (data[:html] =~ /iframe/) || WhitelistedGenericOnebox.html_providers.include?(data[:provider_name]) ) end def generic_html return data[:html] if html_type? return layout.to_html if article_type? return html_for_video(data[:video]) if data[:video] return image_html if photo_type? return nil unless data[:title] layout.to_html end def to_html rewrite_https(generic_html) end def placeholder_html result = nil return to_html if article_type? result = image_html if (data[:html] && data[:html] =~ /iframe/) || data[:video] || photo_type? result || to_html end def data if raw.is_a?(Hash) raw[:link] ||= link raw[:title] = HTMLEntities.new.decode(raw[:title]) return raw end data_hash = { link: link, title: HTMLEntities.new.decode(raw.title), description: raw.description } data_hash[:image] = raw.images.first if raw.images && raw.images.first data_hash[:type] = raw.type if raw.type if raw.metadata && raw.metadata[:"video:secure_url"] && raw.metadata[:"video:secure_url"].first data_hash[:video] = raw.metadata[:"video:secure_url"].first elsif raw.metadata && raw.metadata[:video] && raw.metadata[:video].first data_hash[:video] = raw.metadata[:video].first end if raw.metadata && raw.metadata[:"twitter:label1"] && raw.metadata[:"twitter:data1"] data_hash[:twitter_label1] = raw.metadata[:"twitter:label1"].first data_hash[:twitter_data1] = raw.metadata[:"twitter:data1"].first end if raw.metadata && raw.metadata[:"twitter:label2"] && raw.metadata[:"twitter:data2"] data_hash[:twitter_label2] = raw.metadata[:"twitter:label2"].first data_hash[:twitter_data2] = raw.metadata[:"twitter:data2"].first end data_hash end private def image_html return @image_html if @image_html return @image_html = "" if data[:image] if data[:thumbnail_url] @image_html = "" end @image_html end def html_for_video(video) if video.is_a?(String) video_url = video elsif video.is_a?(Hash) video_url = video[:_value] else return end if video_url # opengraph support multiple elements (videos, images ,etc). # We attempt to find a video element with the type of video/mp4 # and generate a native