module DespamilatorFilter class HtmlTags < Despamilator::Filter def parse subject text = subject.text.downcase html_tags.each do |tag| opening_elements = text.count(/<\s*#{tag}\W/) closing_elements = text.count(/\W#{tag}\s*\/>/) if opening_elements > 0 or closing_elements > 0 safest_element_count = opening_elements > closing_elements ? opening_elements : closing_elements subject.register_match!({:score => 0.6 * safest_element_count, :filter => self}) end end end def name 'HTML tags' end def description 'Detects HTML tags in text' end def html_tags # make sure these are lowercase, in order to save processing [ '!--', '!doctype', 'a', 'abbr', 'acronym', 'address', 'applet', 'area', 'b', 'base', 'basefont', 'bdo', 'big', 'blockquote', 'body', 'br', 'button', 'caption', 'center', 'cite', 'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt', 'em', 'fieldset', 'font', 'form', 'frame', 'frameset', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'hr', 'html', 'i', 'iframe', 'img', 'input', 'ins', 'isindex', 'kbd', 'label', 'legend', 'li', 'link', 'map', 'menu', 'meta', 'noframes', 'noscript', 'object', 'ol', 'optgroup', 'option', 'p', 'param', 'pre', 'q', 's', 'samp', 'select', 'small', 'span', 'strike', 'strong', 'style', 'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'title', 'tr', 'tt', 'u', 'ul', 'var', 'xmp' ] end end end