require 'nokogiri' module Boilerpipe::SAX class BoilerpipeHTMLParser def self.parse(text) #script bug - delete script tags text = text.gsub(/\