lib/display.rb in fastreader-1.0.1 vs lib/display.rb in fastreader-1.0.2
- old
+ new
@@ -130,44 +130,44 @@
def tags_to_text(html)
doc = Hpricot(html)
doc.search('//comment()').remove
- doc.search('div') do |p|
- p.swap( "\n\n" + p.inner_text.gsub("\n", ' ').squeeze(' ').strip + "\n\n" )
+ doc.search('p, div') do |p|
+ p.swap( "\n\n" + p.inner_html.gsub("\n", ' ').squeeze(' ').strip + "\n\n" )
end
- doc.search('p') do |p|
- p.swap( "\n\n" + p.inner_text.gsub("\n", ' ').squeeze(' ').strip + "\n\n" )
- end
-
- doc.search('//blockquote') do |x|
+ doc.search('blockquote, pre') do |x|
# compress extra spaces
text = x.inner_text.squeeze(' ').strip
# collapse the spacing in the text
text.gsub!(/\s{2,}/, ' ')
text = wrap_text(text, @width - 4).gsub(/^/, ' ') # indent 4 spaces
x.swap("\n\n" + text + "\n\n")
end
- doc.search('h1,h2,h3,h4') do |p|
+ doc.search('big, h1,h2,h3,h4') do |p|
p.swap( "\n\n= #{p.inner_text}\n\n" )
end
doc.search('//img') do |img|
img.swap( "(img)" )
end
- doc.search('object').remove
+ doc.search('svg, object').remove
doc.search('table').remove
doc.search('script').remove
doc.search('//br') do |p|
p.swap( "\n" )
end
- doc.search('i, b') do |p|
+ doc.search('i, b, strong, em') do |p|
p.swap( "*#{p.inner_text}*" )
end
+ doc.search('abbr') do |p|
+ p.swap( "+#{p.inner_text}+" )
+ end
+ #
# anchor tags are processed after real links
doc.search('a') do |p|
p.swap( "#{p.inner_text}" )
end
doc.search('dt') do |x|