module Boilerpipe::SAX class Preprocessor def self.strip(text) # script bug - delete script tags text = text.gsub(/\/im, '') # nokogiri uses libxml for mri and nekohtml for jruby # mri doesn't remove   when missing the semicolon text.gsub(/( ) /, '\1; ') end end end