lib/boilerpipe/extractors/article_extractor.rb in boilerpipe-ruby-0.1.1 vs lib/boilerpipe/extractors/article_extractor.rb in boilerpipe-ruby-0.2.0

- old
+ new

@@ -1,10 +1,11 @@ module Boilerpipe::Extractors class ArticleExtractor def self.text(contents) doc = ::Boilerpipe::SAX::BoilerpipeHTMLParser.parse(contents) ::Boilerpipe::Extractors::ArticleExtractor.process(doc) + doc.content end def self.process(doc) title = doc.title @@ -44,9 +45,9 @@ filters::LargeBlockSameTagLevelToContentFilter.process doc # Marks nested list-item blocks after the end of the main content as content. filters::ListAtEndFilter.process doc - doc.content + doc end end end