Sha256: 4f1ccac3fbd751603f660ef41d1c986d4e3cfe17c62b745f51e14fc94491b46f
Contents?: true
Size: 560 Bytes
Versions: 5
Compression:
Stored size: 560 Bytes
Contents
# Removes TextBlocks which have explicitly been marked as "not content". module Boilerpipe::Filters class BoilerplateBlockFilter def initialize(label) @label_to_keep = label end INSTANCE_KEEP_TITLE = BoilerplateBlockFilter.new(:TITLE) def process(doc) combined = doc.text_blocks.delete_if do |tb| if tb.is_not_content? && (@label_to_keep.nil? || !tb.has_label?(:TITLE)) true else false end end doc.replace_text_blocks!(combined) doc end end end
Version data entries
5 entries across 5 versions & 1 rubygems