Sha256: 2cdfb060c200a8237f2c94e0ac2ad73e7d078a6e1907e5762ca18f65531ec6fa
Contents?: true
Size: 622 Bytes
Versions: 5
Compression:
Stored size: 622 Bytes
Contents
# Marks nested list-item blocks after the end of the main content as content. # Used downstream of keep_largest_block_filter. module Boilerpipe::Filters class ListAtEndFilter MAX = 99999999 def self.process(doc) tag_level = MAX doc.text_blocks.each do |tb| if tb.is_content? && tb.has_label?(:VERY_LIKELY_CONTENT) tag_level = tb.tag_level elsif tb.tag_level > tag_level && tb.has_label?(:MIGHT_BE_CONTENT) && tb.has_label?(:LI) && tb.link_density == 0 tb.content = true else tag_level = MAX end end doc end end end
Version data entries
5 entries across 5 versions & 1 rubygems