Sha256: 618f5a8c451cbf5c5a030d0ed8deba63e357502d6ef48d07ff3e3b0d3b6062e6
Contents?: true
Size: 481 Bytes
Versions: 5
Compression:
Stored size: 481 Bytes
Contents
# Marks trailing headlines TextBlocks that have the label :#HEADING # as boilerplate. Trailing means they are marked content and are # below any other content block. module Boilerpipe::Filters class TrailingHeadlineToBoilerplateFilter def self.process(doc) doc.text_blocks.each do |tb| next unless tb.is_content? if tb.has_label? :HEADING tb.content = false else break end end doc end end end
Version data entries
5 entries across 5 versions & 1 rubygems