lib/awestruct/context_helper.rb in awestruct-0.1.9 vs lib/awestruct/context_helper.rb in awestruct-0.2.0
- old
+ new
@@ -9,17 +9,39 @@
def clean_html(str)
str.gsub( / /, ' ' )
end
- def summarize(text, numwords=20)
- text.split()[0, numwords].join(' ')
+ def without_images(str)
+ str.gsub(/<img[^>]+>/,'').gsub(/<a[^>]+>([^<]*)<\/a>/, '\1')
end
-
+
+ def close_tags(s)
+ stack = []
+ s.scan(/<\/?[^>]+>/).each do |tag|
+ if tag[1] != '/'
+ tag = tag[1..-1].scan(/\w+/).first
+ stack = [ tag ] + stack
+ else
+ tag = tag[2..-1].scan(/\w+/).first
+ if stack[0] == tag
+ stack = stack.drop(1)
+ else
+ raise "Malformed HTML expected #{tag[0]} but got #{tag} '#{s}'"
+ end
+ end
+ end
+ stack.inject(s) { |memo,tag| memo += "</#{tag}>" }
+ end
+
+ def summarize(text, numwords=20, ellipsis='...')
+ close_tags(text.split()[0, numwords].join(' ') + ellipsis)
+ end
+
def fully_qualify_urls(base_url, text)
doc = Hpricot( text )
-
+
doc.search( "//a" ).each do |a|
a['href'] = fix_url( base_url, a['href'] )
end
doc.search( "//link" ).each do |link|
link['href'] = fix_url( base_url, link['href'] )
@@ -27,10 +49,10 @@
doc.search( "//img" ).each do |img|
img['src'] = fix_url( base_url, img['src'] )
end
return doc.to_s
end
-
+
def fix_url(base_url, url)
return url unless ( url =~ /^\// )
"#{base_url}#{url}"
end
end