# coding: utf-8 # typed: true # frozen_string_literal: true require 'pdf/reader/transformation_matrix' class PDF::Reader # encapsulates logic for tracking graphics state as the instructions for # a single page are processed. Most of the public methods correspond # directly to PDF operators. class PageState DEFAULT_GRAPHICS_STATE = { :char_spacing => 0, :word_spacing => 0, :h_scaling => 1.0, :text_leading => 0, :text_font => nil, :text_font_size => 0, :text_mode => 0, :text_rise => 0, :text_knockout => 0 } # starting a new page def initialize(page) @page = page @cache = page.cache @objects = page.objects @font_stack = [build_fonts(page.fonts)] @xobject_stack = [page.xobjects] @cs_stack = [page.color_spaces] @stack = [DEFAULT_GRAPHICS_STATE.dup] state[:ctm] = identity_matrix # These are only valid when inside a `BT` block and we re-initialize them on each # `BT`. However, we need the instance variables set so PDFs with the text operators # out order don't trigger NoMethodError when these are nil @text_matrix = identity_matrix @text_line_matrix = identity_matrix end ##################################################### # Graphics State Operators ##################################################### # Clones the current graphics state and push it onto the top of the stack. # Any changes that are subsequently made to the state can then by reversed # by calling restore_graphics_state. # def save_graphics_state @stack.push clone_state end # Restore the state to the previous value on the stack. # def restore_graphics_state @stack.pop end ##################################################### # Matrix Operators ##################################################### # update the current transformation matrix. # # If the CTM is currently undefined, just store the new values. # # If there's an existing CTM, then multiply the existing matrix # with the new matrix to form the updated matrix. # def concatenate_matrix(a, b, c, d, e, f) if state[:ctm] ctm = state[:ctm] state[:ctm] = TransformationMatrix.new(a,b,c,d,e,f).multiply!( ctm.a, ctm.b, ctm.c, ctm.d, ctm.e, ctm.f ) else state[:ctm] = TransformationMatrix.new(a,b,c,d,e,f) end @text_rendering_matrix = nil # invalidate cached value end ##################################################### # Text Object Operators ##################################################### def begin_text_object @text_matrix = identity_matrix @text_line_matrix = identity_matrix @font_size = nil end def end_text_object # don't need to do anything end ##################################################### # Text State Operators ##################################################### def set_character_spacing(char_spacing) state[:char_spacing] = char_spacing end def set_horizontal_text_scaling(h_scaling) state[:h_scaling] = h_scaling / 100.0 end def set_text_font_and_size(label, size) state[:text_font] = label state[:text_font_size] = size end def font_size @font_size ||= begin _, zero = trm_transform(0,0) _, one = trm_transform(1,1) (zero - one).abs end end def set_text_leading(leading) state[:text_leading] = leading end def set_text_rendering_mode(mode) state[:text_mode] = mode end def set_text_rise(rise) state[:text_rise] = rise end def set_word_spacing(word_spacing) state[:word_spacing] = word_spacing end ##################################################### # Text Positioning Operators ##################################################### def move_text_position(x, y) # Td temp = TransformationMatrix.new(1, 0, 0, 1, x, y) @text_line_matrix = temp.multiply!( @text_line_matrix.a, @text_line_matrix.b, @text_line_matrix.c, @text_line_matrix.d, @text_line_matrix.e, @text_line_matrix.f ) @text_matrix = @text_line_matrix.dup @font_size = @text_rendering_matrix = nil # invalidate cached value end def move_text_position_and_set_leading(x, y) # TD set_text_leading(-1 * y) move_text_position(x, y) end def set_text_matrix_and_text_line_matrix(a, b, c, d, e, f) # Tm @text_matrix = TransformationMatrix.new( a, b, c, d, e, f ) @text_line_matrix = @text_matrix.dup @font_size = @text_rendering_matrix = nil # invalidate cached value end def move_to_start_of_next_line # T* move_text_position(0, -state[:text_leading]) end ##################################################### # Text Showing Operators ##################################################### def show_text_with_positioning(params) # TJ # TODO record position changes in state here end def move_to_next_line_and_show_text(str) # ' move_to_start_of_next_line end def set_spacing_next_line_show_text(aw, ac, string) # " set_word_spacing(aw) set_character_spacing(ac) move_to_next_line_and_show_text(string) end ##################################################### # XObjects ##################################################### def invoke_xobject(label) save_graphics_state xobject = find_xobject(label) raise MalformedPDFError, "XObject #{label} not found" if xobject.nil? matrix = xobject.hash[:Matrix] concatenate_matrix(*matrix) if matrix if xobject.hash[:Subtype] == :Form form = PDF::Reader::FormXObject.new(@page, xobject, :cache => @cache) @font_stack.unshift(form.font_objects) @xobject_stack.unshift(form.xobjects) yield form if block_given? @font_stack.shift @xobject_stack.shift else yield xobject if block_given? end restore_graphics_state end ##################################################### # Public Visible State ##################################################### # transform x and y co-ordinates from the current user space to the # underlying device space. # def ctm_transform(x, y) [ (ctm.a * x) + (ctm.c * y) + (ctm.e), (ctm.b * x) + (ctm.d * y) + (ctm.f) ] end # transform x and y co-ordinates from the current text space to the # underlying device space. # # transforming (0,0) is a really common case, so optimise for it to # avoid unnecessary object allocations # def trm_transform(x, y) trm = text_rendering_matrix if x == 0 && y == 0 [trm.e, trm.f] else [ (trm.a * x) + (trm.c * y) + (trm.e), (trm.b * x) + (trm.d * y) + (trm.f) ] end end def current_font find_font(state[:text_font]) end def find_font(label) dict = @font_stack.detect { |fonts| fonts.has_key?(label) } dict ? dict[label] : nil end def find_color_space(label) dict = @cs_stack.detect { |colorspaces| colorspaces.has_key?(label) } dict ? dict[label] : nil end def find_xobject(label) dict = @xobject_stack.detect { |xobjects| xobjects.has_key?(label) } dict ? dict[label] : nil end # when save_graphics_state is called, we need to push a new copy of the # current state onto the stack. That way any modifications to the state # will be undone once restore_graphics_state is called. # def stack_depth @stack.size end # This returns a deep clone of the current state, ensuring changes are # keep separate from earlier states. # # Marshal is used to round-trip the state through a string to easily # perform the deep clone. Kinda hacky, but effective. # def clone_state if @stack.empty? {} else Marshal.load Marshal.dump(@stack.last) end end # after each glyph is painted onto the page the text matrix must be # modified. There's no defined operator for this, but depending on # the use case some receivers may need to mutate the state with this # while walking a page. # # NOTE: some of the variable names in this method are obscure because # they mirror variable names from the PDF spec # # NOTE: see Section 9.4.4, PDF 32000-1:2008, pp 252 # # Arguments: # # w0 - the glyph width in *text space*. This generally means the width # in glyph space should be divded by 1000 before being passed to # this function # tj - any kerning that should be applied to the text matrix before the # following glyph is painted. This is usually the numeric arguments # in the array passed to a TJ operator # word_boundary - a boolean indicating if a word boundary was just # reached. Depending on the current state extra space # may need to be added # def process_glyph_displacement(w0, tj, word_boundary) fs = state[:text_font_size] tc = state[:char_spacing] if word_boundary tw = state[:word_spacing] else tw = 0 end th = state[:h_scaling] # optimise the common path to reduce Float allocations if th == 1 && tj == 0 && tc == 0 && tw == 0 tx = w0 * fs elsif tj != 0 # don't apply spacing to TJ displacement tx = (w0 - (tj/1000.0)) * fs * th else # apply horizontal scaling to spacing values but not font size tx = ((w0 * fs) + tc + tw) * th end # TODO: support ty > 0 ty = 0 temp = TransformationMatrix.new(1, 0, 0, 1, tx, ty) @text_matrix = temp.multiply!( @text_matrix.a, @text_matrix.b, @text_matrix.c, @text_matrix.d, @text_matrix.e, @text_matrix.f ) @font_size = @text_rendering_matrix = nil # invalidate cached value end private # used for many and varied text positioning calculations. We potentially # need to access the results of this method many times when working with # text, so memoize it # def text_rendering_matrix @text_rendering_matrix ||= begin state_matrix = TransformationMatrix.new( state[:text_font_size] * state[:h_scaling], 0, 0, state[:text_font_size], 0, state[:text_rise] ) state_matrix.multiply!( @text_matrix.a, @text_matrix.b, @text_matrix.c, @text_matrix.d, @text_matrix.e, @text_matrix.f ) state_matrix.multiply!( ctm.a, ctm.b, ctm.c, ctm.d, ctm.e, ctm.f ) end end # return the current transformation matrix # def ctm state[:ctm] end def state @stack.last end # wrap the raw PDF Font objects in handy ruby Font objects. # def build_fonts(raw_fonts) wrapped_fonts = raw_fonts.map { |label, font| [label, PDF::Reader::Font.new(@objects, @objects.deref_hash(font) || {})] } ::Hash[wrapped_fonts] end ##################################################### # Low-level Matrix Operations ##################################################### # This class uses 3x3 matrices to represent geometric transformations # These matrices are represented by arrays with 9 elements # The array [a,b,c,d,e,f,g,h,i] would represent a matrix like: # a b c # d e f # g h i def identity_matrix TransformationMatrix.new(1, 0, 0, 1, 0, 0) end end end