Sha256: ad598bd6a007d532dd1df025ea608591e46c606cf728ff115d35fa9fae9d3ad4
Contents?: true
Size: 556 Bytes
Versions: 5
Compression:
Stored size: 556 Bytes
Contents
# Removes TextBlocks which have explicitly been marked as "not content". module Boilerpipe::Filters class BoilerplateBlockFilter def initialize(label) @label_to_keep = label end INSTANCE_KEEP_TITLE = BoilerplateBlockFilter.new(:TITLE) def process(doc) combined = doc.text_blocks.delete_if do |tb| if tb.is_not_content? && (@label_to_keep.nil? || !tb.has_label?(:TITLE)) true else false end end doc.replace_text_blocks!(combined) doc end end end
Version data entries
5 entries across 5 versions & 1 rubygems