Sha256: 95b0712a1896e1ac244bf2d07a83aceb429470145cc755958fe53f0e080bc557

Contents?: true

Size: 1.84 KB

Versions: 1

Compression:

Stored size: 1.84 KB

Contents

# coding: utf-8

class PDF::Reader
  # remove duplicates from a collection of TextRun objects. This can be helpful when a PDF
  # uses slightly offset overlapping characters to achieve a fake 'bold' effect.
  class OverlappingRunsFilter

    # This should be between 0 and 1. If TextRun B obscures this much of TextRun A (and they
    # have identical characters) then one will be discarded
    OVERLAPPING_THRESHOLD = 0.5

    def self.exclude_redundant_runs(runs)
      sweep_line_status = Array.new
      event_point_schedule = Array.new
      to_exclude = []

      runs.each do |run|
        event_point_schedule << EventPoint.new(run.x, run)
        event_point_schedule << EventPoint.new(run.endx, run)
      end

      event_point_schedule.sort! { |a,b| a.x <=> b.x }

      while not event_point_schedule.empty? do
        event_point = event_point_schedule.shift
        break unless event_point

        if event_point.start? then
          if detect_intersection(sweep_line_status, event_point)
            to_exclude << event_point.run
          end
          sweep_line_status.push event_point
        else
          sweep_line_status.delete event_point
        end
      end
      runs - to_exclude
    end

    def self.detect_intersection(sweep_line_status, event_point)
      sweep_line_status.each do |point_in_sls|
        if event_point.x >= point_in_sls.run.x &&
            event_point.x <= point_in_sls.run.endx &&
            point_in_sls.run.intersection_area_percent(event_point.run) >= OVERLAPPING_THRESHOLD
          return true
        end
      end
      return false
    end
  end

  # Utility class used to avoid modifying the underlying TextRun objects while we're
  # looking for duplicates
  class EventPoint
    attr_reader :x, :run

    def initialize x, run
      @x, @run = x, run
    end

    def start?
      @x == @run.x
    end
  end

end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
pdf-reader-2.3.0 lib/pdf/reader/overlapping_runs_filter.rb