#!/usr/bin/ruby class BetterSGMLParserError < Exception; end; class BetterSGMLParser < HTML::SGMLParser # Replaced Tagfind and Charref Regexps with the ones in feedparser.py # This makes things work. Interesting = /[&<]/u Incomplete = Regexp.compile('&([a-zA-Z][a-zA-Z0-9]*|#[0-9]*)?|<([a-zA-Z][^<>]*|/([a-zA-Z][^<>]*)?|![^<>]*)?', 64) # 64 is the unicode flag Entityref = /&([a-zA-Z][-.a-zA-Z0-9]*)[^-.a-zA-Z0-9]/u Charref = /&#(x?[0-9A-Fa-f]+)[^0-9A-Fa-f]/u Shorttagopen = /'<[a-zA-Z][-.a-zA-Z0-9]*/u Shorttag = /'<([a-zA-Z][-.a-zA-Z0-9]*)\/([^\/]*)\//u Endtagopen = /<\//u # Changed the RegExps to match the Python SGMLParser Endbracket = /[<>]/u Declopen = //u Commentopen = /