Sha256: df92476f9a4a7d46ebb76e637f4a3f05c4eda6fc18d16ddb5ff791563694369e
Contents?: true
Size: 862 Bytes
Versions: 3
Compression:
Stored size: 862 Bytes
Contents
require 'hpricot' require 'rest_client' module Jkl def self.sanitize(text) str = "" text = text.to_s.gsub(/((<[\s\/]*script\b[^>]*>)([^>]*)(<\/script>))/i,"") #remove script tags - with contents text.to_s.gsub(/<\/?[^>]*>/, "").split("\r").each do |l| # remove all tags l = l.gsub(/^[ \t]/,"") #remove tabs l = l.gsub(/^[ \s]/,"") l.split("\n").each do |l| str << l unless l.count(" ") < 5 # remove short lines - ususally just navigation end end str end def self.from_doc(response) begin Hpricot(response) rescue URI::InvalidURIError => e puts("WARN: Problem with getting a connection: #{e}") rescue SocketError => e puts("WARN: Could not connect to feed: #{e}") rescue Errno::ECONNREFUSED => e puts("WARN: Connection refused: #{e}") end end end
Version data entries
3 entries across 3 versions & 1 rubygems
Version | Path |
---|---|
jakal-0.0.9 | lib/jkl/url_doc_handler.rb |
jakal-0.0.8 | lib/jkl/url_doc_handler.rb |
jakal-0.0.7 | lib/jkl/url_doc_handler.rb |