module Boilerpipe java_import java.net.URL module Extractors class ArticleExtractor java_import 'com.kohlschutter.boilerpipe.extractors.ArticleExtractor' def self.process(doc) ArticleExtractor::INSTANCE.process doc end def self.get_text(s) url = nil begin url = Java::JavaNet::URL.new(s) rescue Java::JavaNet::MalformedURLException => e # not a URL end input = url ? url : s ArticleExtractor::INSTANCE.get_text(input) end class <