Sha256: cb0363c401b2c3928696c912e927eb18ac5b6cf615f309d544facb360e516583

Contents?: true

Size: 706 Bytes

Versions: 2

Compression:

Stored size: 706 Bytes

Contents

require 'hpricot'
require 'lib/jkl/rest_client'

module Jkl
  
  def sanitize(text)
    str = ""
    text.to_s.gsub(/<\/?[^>]*>/, "").split("\r").each do |l| # remove tags
      l = l.chomp.gsub("\t",'').gsub(/\s{2,}/,'') # remove tabs and larger spaces
      str << l unless l.count(" ") < 5 # remove short lines - ususally just navigation
    end
    str
  end
  
  def from_doc(response)
    begin
      Hpricot(response)
    rescue  URI::InvalidURIError => e
      puts("WARN: Problem with getting a connection: #{e}")
    rescue SocketError => e
      puts("WARN: Could not connect to feed: #{e}")
    rescue Errno::ECONNREFUSED  => e
      puts("WARN: Connection refused: #{e}")
    end
  end
  
end

Version data entries

2 entries across 2 versions & 1 rubygems

Version Path
sshingler-jkl-0.0.4 lib/jkl/url_doc_handler.rb
sshingler-jkl-0.0.5 lib/jkl/url_doc_handler.rb