Sha256: a19c9768be580663237bd02d2555a3f275ff74f1b83cb05588a2c90f7eafb7c1
Contents?: true
Size: 625 Bytes
Versions: 5
Compression:
Stored size: 625 Bytes
Contents
# Marks nested list-item blocks after the end of the main content as content. # Used downstream of keep_largest_block_filter. module Boilerpipe::Filters class ListAtEndFilter MAX = 99999999 def self.process(doc) tag_level = MAX doc.text_blocks.each do |tb| if tb.is_content? && tb.has_label?(:VERY_LIKELY_CONTENT) tag_level = tb.tag_level elsif (tb.tag_level > tag_level && tb.has_label?(:MIGHT_BE_CONTENT) && tb.has_label?(:LI) && tb.link_density == 0) tb.content = true else tag_level = MAX end end doc end end end
Version data entries
5 entries across 5 versions & 1 rubygems