Sha256: 229ae0bd785be2abe25b78cc0011460a7d49c39efdb93a36ed0cae21b8f683e5

Contents?: true

Size: 1.71 KB

Versions: 4

Compression:

Stored size: 1.71 KB

Contents

# coding: utf-8
# frozen_string_literal: true

class PDF::Reader
  # A value object that represents one or more consecutive characters on a page.
  class TextRun
    include Comparable

    attr_reader :x, :y, :width, :font_size, :text

    alias :to_s :text

    def initialize(x, y, width, font_size, text)
      @x = x
      @y = y
      @width = width
      @font_size = font_size.floor
      @text = text
    end

    # Allows collections of TextRun objects to be sorted. They will be sorted
    # in order of their position on a cartesian plain - Top Left to Bottom Right
    def <=>(other)
      if x == other.x && y == other.y
        0
      elsif y < other.y
        1
      elsif y > other.y
        -1
      elsif x < other.x
        -1
      elsif x > other.x
        1
      end
    end

    def endx
      @endx ||= x + width
    end

    def mean_character_width
      @width / character_count
    end

    def mergable?(other)
      y.to_i == other.y.to_i && font_size == other.font_size && mergable_range.include?(other.x)
    end

    def +(other)
      raise ArgumentError, "#{other} cannot be merged with this run" unless mergable?(other)

      if (other.x - endx) <( font_size * 0.2)
        TextRun.new(x, y, other.endx - x, font_size, text + other.text)
      else
        TextRun.new(x, y, other.endx - x, font_size, "#{text} #{other.text}")
      end
    end

    def inspect
      "#{text} w:#{width} f:#{font_size} @#{x},#{y}"
    end

    private

    def mergable_range
      @mergable_range ||= Range.new(endx - 3, endx + font_size)
    end

    # Assume string encoding is marked correctly and we can trust String#size to return a
    # character count
    def character_count
      @text.size.to_f
    end
  end
end

Version data entries

4 entries across 3 versions & 2 rubygems

Version Path
pdf-reader-2.2.1 lib/pdf/reader/text_run.rb
embulk-input-druginfo_interview_form-0.1.0 vendor/bundle/ruby/2.4.0/gems/pdf-reader-2.2.0/lib/pdf/reader/text_run.rb
embulk-input-druginfo_interview_form-0.1.0 vendor/bundle/ruby/2.5.0/gems/pdf-reader-2.2.0/lib/pdf/reader/text_run.rb
pdf-reader-2.2.0 lib/pdf/reader/text_run.rb