Sha256: 123060c821e051ec1206a6479f269975430b95d3288088b9b4ee2bf00175f773

Contents?: true

Size: 545 Bytes

Versions: 5

Compression:

Stored size: 545 Bytes

Contents

# Merges two subsequent blocks if their text densities are equal.

module Boilerpipe::Filters
  class SimpleBlockFusionProcessor
    def self.process(doc)
      tbs = doc.text_blocks
      return doc if tbs.size < 2

      blocks_to_remove = []
      tb1 = tbs.first
      tbs.drop(1).each do |tb|
        if tb1.text_density == tb.text_density
          tb1.merge_next(tb)
          blocks_to_remove << tb
        else
          tb1 = tb
        end
      end

      doc.replace_text_blocks!(tbs - blocks_to_remove)
      doc
    end
  end
end

Version data entries

5 entries across 5 versions & 1 rubygems

Version Path
boilerpipe-ruby-0.5.0 lib/boilerpipe/filters/simple_block_fusion_processor.rb
boilerpipe-ruby-0.4.4 lib/boilerpipe/filters/simple_block_fusion_processor.rb
boilerpipe-ruby-0.4.3 lib/boilerpipe/filters/simple_block_fusion_processor.rb
boilerpipe-ruby-0.4.2 lib/boilerpipe/filters/simple_block_fusion_processor.rb
boilerpipe-ruby-0.4.1 lib/boilerpipe/filters/simple_block_fusion_processor.rb