lib/onebox/engine/wikipedia_onebox.rb in onebox-1.1.0 vs lib/onebox/engine/wikipedia_onebox.rb in onebox-1.2.0
- old
+ new
@@ -1,9 +1,10 @@
module Onebox
module Engine
class WikipediaOnebox
include Engine
+ include LayoutSupport
include HTML
matches do
http
anything
@@ -12,15 +13,36 @@
end
private
def data
- {
- url: @url,
- name: raw.css("html body h1").inner_text,
- image: raw.css(".infobox .image img").first["src"],
- description: raw.css("html body p").inner_text
+ # get all the paras
+ paras = raw.search("p")
+ text = ""
+
+ unless paras.empty?
+ cnt = 0
+ while text.length < Onebox::LayoutSupport.max_text && cnt <= 3
+ text << " " unless cnt == 0
+ paragraph = paras[cnt].inner_text[0..Onebox::LayoutSupport.max_text]
+ paragraph.gsub!(/\[\d+\]/mi, "")
+ text << paragraph
+ cnt += 1
+ end
+ end
+
+ text = "#{text[0..Onebox::LayoutSupport.max_text]}..." if text.length > Onebox::LayoutSupport.max_text
+ result = {
+ link: link,
+ title: raw.css("html body h1").inner_text,
+ description: text
}
+ img = raw.css(".infobox .image img")
+ if img && img.first
+ result[:image] = img.first["src"]
+ end
+
+ result
end
end
end
end