example/index-html.rb in groonga-0.0.7 vs example/index-html.rb in groonga-0.9.0
- old
+ new
@@ -63,12 +63,22 @@
html_document = Nokogiri::HTML(html)
html_document.css("title").each do |title|
values[:title] = title.text
end
+ contents = []
html_document.css("body").each do |body|
- values[:content] = body.text
+ contents << body.text
end
+ html_document.css("img").each do |image|
+ image_content = []
+ title = image['title']
+ alt = image['alt']
+ image_content << title if title and !title.empty?
+ image_content << alt if alt and !alt.empty?
+ contents.concat(image_content) unless image_content.empty?
+ end
+ values[:content] = contents.join("\n")
values["last-modified"] = path.mtime
values.each do |key, value|
document[key] = value
end