lib/jkl/url_doc_handler.rb in jakal-0.0.9 vs lib/jkl/url_doc_handler.rb in jakal-0.1.0

- old
+ new

@@ -1,31 +1,35 @@ require 'hpricot' require 'rest_client' module Jkl - def self.sanitize(text) - str = "" - text = text.to_s.gsub(/((<[\s\/]*script\b[^>]*>)([^>]*)(<\/script>))/i,"") #remove script tags - with contents - text.to_s.gsub(/<\/?[^>]*>/, "").split("\r").each do |l| # remove all tags - l = l.gsub(/^[ \t]/,"") #remove tabs - l = l.gsub(/^[ \s]/,"") - l.split("\n").each do |l| - str << l unless l.count(" ") < 5 # remove short lines - ususally just navigation + class << self + + def sanitize(text) + str = "" + text = text.to_s.gsub(/((<[\s\/]*script\b[^>]*>)([^>]*)(<\/script>))/i,"") #remove script tags - with contents + text.to_s.gsub(/<\/?[^>]*>/, "").split("\r").each do |l| # remove all tags + l = l.gsub(/^[ \t]/,"") #remove tabs + l = l.gsub(/^[ \s]/,"") + l.split("\n").each do |l| + str << l unless l.count(" ") < 5 # remove short lines - ususally just navigation + end end + str end - str - end - - def self.from_doc(response) - begin - Hpricot(response) - rescue URI::InvalidURIError => e - puts("WARN: Problem with getting a connection: #{e}") - rescue SocketError => e - puts("WARN: Could not connect to feed: #{e}") - rescue Errno::ECONNREFUSED => e - puts("WARN: Connection refused: #{e}") + + def from_doc(response) + begin + Hpricot(response) + rescue URI::InvalidURIError => e + puts("WARN: Problem with getting a connection: #{e}") + rescue SocketError => e + puts("WARN: Could not connect to feed: #{e}") + rescue Errno::ECONNREFUSED => e + puts("WARN: Connection refused: #{e}") + end end + end end