Sha256: bdf7d3e821b9bed29bec000a5b031a371cdbec421956c0da9003687b93d94aa5

Contents?: true

Size: 1.92 KB

Versions: 3

Compression:

Stored size: 1.92 KB

Contents

#!/usr/bin/ruby
module FeedParserUtilities
  def stripDoctype(data)
    #Strips DOCTYPE from XML document, returns (rss_version, stripped_data)
    #rss_version may be 'rss091n' or None
    #stripped_data is the same XML document, minus the DOCTYPE
    entity_pattern = /<!ENTITY(.*?)>/m # m is for Regexp::MULTILINE
    
    data = data.gsub(entity_pattern,'')

    doctype_pattern = /<!DOCTYPE(.*?)>/m
    doctype_results = data.scan(doctype_pattern)
    if doctype_results and doctype_results[0]
      doctype = doctype_results[0][0]
    else
      doctype = ''
    end

    if /netscape/ =~ doctype.downcase
      version = 'rss091n'
    else
      version = nil
    end
    data = data.sub(doctype_pattern, '')
    return version, data
  end

  def resolveRelativeURIs(htmlSource, baseURI, encoding)
    $stderr << "entering resolveRelativeURIs\n" if $debug # FIXME write a decent logger
    relative_uris = [ ['a','href'],
      ['applet','codebase'],
      ['area','href'],
      ['blockquote','cite'],
      ['body','background'],
      ['del','cite'],
      ['form','action'],
      ['frame','longdesc'],
      ['frame','src'],
      ['iframe','longdesc'],
      ['iframe','src'],
      ['head','profile'],
      ['img','longdesc'],
      ['img','src'],
      ['img','usemap'],
      ['input','src'],
      ['input','usemap'],
      ['ins','cite'],
      ['link','href'],
      ['object','classid'],
      ['object','codebase'],
      ['object','data'],
      ['object','usemap'],
      ['q','cite'],
      ['script','src'],
    ]
    h = Hpricot(htmlSource)
    relative_uris.each do |l|
      ename, eattr = l
      h.search(ename).each do |elem|
        euri = elem.attributes[eattr]
        # FIXME uses the URI.encode method.  should it?
        if euri and not euri.empty? and ForgivingURI.parse(URI.encode(euri)).relative? 
          elem.attributes[eattr] = urljoin(baseURI, euri)
        end
      end
    end
    return h.to_html
  end
end


Version data entries

3 entries across 3 versions & 2 rubygems

Version Path
rfeedparser-ictv-0.9.931 lib/rfeedparser/markup_helpers.rb
rfeedparser_ictv-0.9.931 lib/rfeedparser/markup_helpers.rb
rfeedparser_ictv-0.9.932 lib/rfeedparser/markup_helpers.rb