lib/awestruct/context_helper.rb in awestruct-0.1.9 vs lib/awestruct/context_helper.rb in awestruct-0.2.0

- old
+ new

@@ -9,17 +9,39 @@ def clean_html(str) str.gsub( /&nbsp;/, ' ' ) end - def summarize(text, numwords=20) - text.split()[0, numwords].join(' ') + def without_images(str) + str.gsub(/<img[^>]+>/,'').gsub(/<a[^>]+>([^<]*)<\/a>/, '\1') end - + + def close_tags(s) + stack = [] + s.scan(/<\/?[^>]+>/).each do |tag| + if tag[1] != '/' + tag = tag[1..-1].scan(/\w+/).first + stack = [ tag ] + stack + else + tag = tag[2..-1].scan(/\w+/).first + if stack[0] == tag + stack = stack.drop(1) + else + raise "Malformed HTML expected #{tag[0]} but got #{tag} '#{s}'" + end + end + end + stack.inject(s) { |memo,tag| memo += "</#{tag}>" } + end + + def summarize(text, numwords=20, ellipsis='...') + close_tags(text.split()[0, numwords].join(' ') + ellipsis) + end + def fully_qualify_urls(base_url, text) doc = Hpricot( text ) - + doc.search( "//a" ).each do |a| a['href'] = fix_url( base_url, a['href'] ) end doc.search( "//link" ).each do |link| link['href'] = fix_url( base_url, link['href'] ) @@ -27,10 +49,10 @@ doc.search( "//img" ).each do |img| img['src'] = fix_url( base_url, img['src'] ) end return doc.to_s end - + def fix_url(base_url, url) return url unless ( url =~ /^\// ) "#{base_url}#{url}" end end