# coding: utf-8 # frozen_string_literal: true require 'forwardable' require 'pdf/reader/page_layout' module PDF class Reader # Builds a UTF-8 string of all the text on a single page by processing all # the operaters in a content stream. # class PageTextReceiver extend Forwardable SPACE = " " attr_reader :state, :options ########## BEGIN FORWARDERS ########## # Graphics State Operators def_delegators :@state, :save_graphics_state, :restore_graphics_state # Matrix Operators def_delegators :@state, :concatenate_matrix # Text Object Operators def_delegators :@state, :begin_text_object, :end_text_object # Text State Operators def_delegators :@state, :set_character_spacing, :set_horizontal_text_scaling def_delegators :@state, :set_text_font_and_size, :font_size def_delegators :@state, :set_text_leading, :set_text_rendering_mode def_delegators :@state, :set_text_rise, :set_word_spacing # Text Positioning Operators def_delegators :@state, :move_text_position, :move_text_position_and_set_leading def_delegators :@state, :set_text_matrix_and_text_line_matrix, :move_to_start_of_next_line ########## END FORWARDERS ########## # starting a new page def page=(page) @state = PageState.new(page) @content = [] @characters = [] @mediabox = page.objects.deref(page.attributes[:MediaBox]) end def content PageLayout.new(@characters, @mediabox).to_s end ##################################################### # Text Showing Operators ##################################################### # record text that is drawn on the page def show_text(string) # Tj (AWAY) internal_show_text(string) end def show_text_with_positioning(params) # TJ [(A) 120 (WA) 20 (Y)] params.each do |arg| if arg.is_a?(String) internal_show_text(arg) else @state.process_glyph_displacement(0, arg, false) end end end def move_to_next_line_and_show_text(str) # ' @state.move_to_start_of_next_line show_text(str) end def set_spacing_next_line_show_text(aw, ac, string) # " @state.set_word_spacing(aw) @state.set_character_spacing(ac) move_to_next_line_and_show_text(string) end ##################################################### # XObjects ##################################################### def invoke_xobject(label) @state.invoke_xobject(label) do |xobj| case xobj when PDF::Reader::FormXObject then xobj.walk(self) end end end private def internal_show_text(string) if @state.current_font.nil? raise PDF::Reader::MalformedPDFError, "current font is invalid" end glyphs = @state.current_font.unpack(string) glyphs.each_with_index do |glyph_code, index| # paint the current glyph newx, newy = @state.trm_transform(0,0) utf8_chars = @state.current_font.to_utf8(glyph_code) # apply to glyph displacment for the current glyph so the next # glyph will appear in the correct position glyph_width = @state.current_font.glyph_width(glyph_code) / 1000.0 th = 1 scaled_glyph_width = glyph_width * @state.font_size * th unless utf8_chars == SPACE @characters << TextRun.new(newx, newy, scaled_glyph_width, @state.font_size, utf8_chars) end @state.process_glyph_displacement(glyph_width, 0, utf8_chars == SPACE) end end end end end