#!/usr/bin/env ruby # encoding: ascii-8bit # # Rpdf2txt -- PDF to Text Parser # Copyright (C) 2003 Andreas Schrafl, Hannes Wyss # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # ywesee - intellectual capital connected, Winterthurerstrasse 52, CH-8006 Zürich, Switzerland # hwyss@ywesee.com, aschrafl@ywesee.com # # TestTextState -- Rpdf2txt -- 29.11.2002 -- aschrafl@ywesee.com $: << File.expand_path('../lib', File.dirname(__FILE__)) $: << File.dirname(__FILE__) require 'test/unit' require 'rpdf2txt/text_state' require 'rpdf2txt/object' require 'flexmock' module Rpdf2txt class TextState attr_accessor :w, :char_spacing end end class TestTextState < Test::Unit::TestCase include FlexMock::TestCase def setup font_src = <<-EOS 580 0 obj << /Type /Font /Subtype /Type1 /FirstChar 32 /LastChar 240 /Widths [ 278 389 500 556 556 1000 722 278 333 333 556 600 278 389 278 278 556 556 556 556 556 556 556 556 556 556 278 278 600 600 600 500 800 722 611 611 722 556 500 722 722 278 389 667 500 944 722 778 556 778 611 556 556 722 667 1000 667 667 556 389 278 389 600 500 278 556 611 444 611 556 389 611 611 278 278 556 278 889 611 611 611 611 389 444 389 611 556 889 556 556 500 333 222 333 600 278 0 0 0 0 0 0 0 0 0 0 0 0 0 0 556 0 0 0 0 0 0 0 0 0 0 0 0 0 0 611 0 0 0 0 556 556 0 0 0 0 0 800 0 0 0 278 0 0 278 600 278 278 0 611 278 278 278 278 278 0 0 278 0 0 0 0 0 278 0 278 278 0 0 0 278 0 0 0 0 0 0 0 0 0 0 0 0 278 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 278 ] /Encoding /MacRomanEncoding /BaseFont /Frutiger-BoldItalic /FontDescriptor 579 0 R >> endobj EOS @text_state = Rpdf2txt::TextState.new('latin1') @font = Rpdf2txt::Font.new(font_src) @text_state.set_font(@font) end def test_set @text_state.set_x("42.7953") @text_state.set_y("670.6528") @text_state.update! assert_equal(427953, (@text_state.x*10000).round) assert_equal(6706528, (@text_state.y*10000).round) end def test_update_td @text_state.set_x("100.1234") @text_state.set_y("200.5678") @text_state.update_x("1.2345") @text_state.update_y("6.7890") @text_state.update! assert_equal(1013579, (@text_state.x*10000).to_i) assert_equal(2073568, (@text_state.y*10000).to_i) @text_state.update_x("-1.2345") @text_state.update_y("-6.7890") @text_state.update! assert_equal(1001234, (@text_state.x*10000).to_i) assert_equal(2005678, (@text_state.y*10000).to_i) end def test_update_tD @text_state.set_x("100.1234") @text_state.set_y("200.5678") @text_state.set_lead("-6.7890") @text_state.update_x("1.2345") @text_state.update_y("6.7890") @text_state.update! assert_equal(-6789.0, @text_state.lead*1000) assert_equal(1013579, (@text_state.x*10000).to_i) assert_equal(2073568, (@text_state.y*10000).to_i) @text_state.set_lead(-1.2345) @text_state.set_lead(6.7890) @text_state.update_x("-1.2345") @text_state.update_y("-6.7890") @text_state.update! assert_equal(6789.0, @text_state.lead*1000) assert_equal(1001234, (@text_state.x*10000).to_i) assert_equal(2005678, (@text_state.y*10000).to_i) end def test_set_lead_tl @text_state.set_lead("1.2345") assert_equal(1234.5, @text_state.lead*1000) @text_state.set_lead("-6.7890") assert_equal(-6789.0, @text_state.lead*1000) end def test_step @text_state.set_lead(-3.4567) @text_state.set_y(200.1234) @text_state.set_x(400.5678) @text_state.set_txt('foo') @text_state.advance_x @text_state.update! assert_equal(2001234, (@text_state.y * 10000).to_i) assert_not_equal(4005678, (@text_state.x * 10000).to_i) @text_state.step @text_state.update! assert_equal(1966667, (@text_state.y * 10000).to_i) assert_equal(4005678, (@text_state.x * 10000).to_i) end def test_compare1 @text_state.set_font_size(0.0) @text_state.set_x 100.1234 @text_state.set_y 200.5678 @text_state.update! text_state = Rpdf2txt::TextState.new text_state.set_font_size(0.0) text_state.set_x 88.9012 text_state.set_y 250.3456 text_state.update! assert(text_state > @text_state, text_state <=> @text_state) end def test_compare2 @text_state.set_font_size(10) @text_state.set_x 100.1234 @text_state.set_y 200.5678 @text_state.update! text_state = Rpdf2txt::TextState.new text_state.set_font_size(10) text_state.set_x 88.9012 text_state.set_y 200.5678 text_state.update! assert(text_state < @text_state, text_state <=> @text_state) end def test_same_word (p1 = Rpdf2txt::TextState.new).set_x -10000 (p1 = Rpdf2txt::TextState.new).set_y 10000 (p2 = Rpdf2txt::TextState.new).set_x -5000 (p2 = Rpdf2txt::TextState.new).set_y 10000 (p3 = Rpdf2txt::TextState.new).set_x 5000 (p3 = Rpdf2txt::TextState.new).set_y 10000 (p4 = Rpdf2txt::TextState.new).set_x 10000 (p4 = Rpdf2txt::TextState.new).set_y 10000 p1.set_font_size(10) p2.set_font_size(10) p3.set_font_size(10) p4.set_font_size(10) p1.update! p2.update! p3.update! p4.update! assert_equal(true, p1.same_word(p1)) assert_equal(true, p1.same_word(p2)) assert_equal(true, p1.same_word(p3)) assert_equal(true, p1.same_word(p4)) assert_equal(true, p2.same_word(p2)) assert_equal(true, p2.same_word(p3)) assert_equal(true, p2.same_word(p4)) assert_equal(true, p3.same_word(p3)) assert_equal(true, p3.same_word(p4)) assert_equal(true, p4.same_word(p4)) p1.set_y -10 p1.update! assert_equal(true, p1.same_word(p1)) assert_equal(false, p1.same_word(p2)) assert_equal(false, p1.same_word(p3)) assert_equal(false, p1.same_word(p4)) end def test_same_line ts1 = Rpdf2txt::TextState.new ts1.set_y(210) ts1.set_font_size(10) ts1.update! assert_equal(210.000, ts1.y) assert_equal(200.000, ts1.y2) ts2 = Rpdf2txt::TextState.new ts2.set_y(200) ts2.set_font_size(10) ts2.update! assert_equal(200.000, ts2.y) assert_equal(190.000, ts2.y2) # ----- # ts1 # ----- ----- => not same line # ts2 # ----- assert_equal(false, ts1.same_line(ts2)) assert_equal(false, ts2.same_line(ts1)) ts3 = Rpdf2txt::TextState.new ts3.set_y(205) ts3.set_font_size(10) ts3.update! assert_equal(205.000, ts3.y) assert_equal(195.000, ts3.y2) # ----- # ts1 ----- # ----- ts3 => same line # ----- assert_equal(true, ts1.same_line(ts3)) assert_equal(true, ts3.same_line(ts1)) # ----- # ----- ts3 => same line # ts2 ----- # ----- assert_equal(true, ts2.same_line(ts3)) assert_equal(true, ts3.same_line(ts2)) ts4 = Rpdf2txt::TextState.new ts4.set_y(210) ts4.set_font_size(30) ts4.update! assert_equal(210.000, ts4.y) assert_equal(180.000, ts4.y2) # ----- # ----- # ts1 ts4 => same line # ----- # ----- assert_equal(true, ts1.same_line(ts4)) assert_equal(true, ts4.same_line(ts1)) end def test_set_txt @text_state.set_txt("Hello World") assert_in_delta(5.612, @text_state.w, 0.001) end def test_set_char_spacing assert_equal(0, @text_state.char_spacing) @text_state.set_char_spacing('-0.456 Tc') assert_equal(-456, @text_state.char_spacing) @text_state.set_char_spacing('0.789 Tc') assert_equal(789, @text_state.char_spacing) end def test_same_word2 (p1 = Rpdf2txt::TextState.new).set_x -10000 (p1 = Rpdf2txt::TextState.new).set_y 10000 (p2 = Rpdf2txt::TextState.new).set_x -5000 (p2 = Rpdf2txt::TextState.new).set_y 10000 (p3 = Rpdf2txt::TextState.new).set_x 5000 (p3 = Rpdf2txt::TextState.new).set_y 10000 (p4 = Rpdf2txt::TextState.new).set_x 10000 (p4 = Rpdf2txt::TextState.new).set_y 10000 p1.set_font_size(10.0) p2.set_font_size(10.0) p3.set_font_size(10.0) p4.set_font_size(10.0) chars = %w(a b c) [p1,p2,p3,p4].each { |text_state| text_state.set_font(@font) text_state.set_txt(chars.join) text_state.update! chars = chars.collect { |char| char.next } } assert_equal(true, p1.same_word(p1)) assert_equal(true, p1.same_word(p2)) assert_equal(true, p1.same_word(p3)) assert_equal(true, p1.same_word(p4)) assert_equal(true, p2.same_word(p2)) assert_equal(true, p2.same_word(p3)) assert_equal(true, p2.same_word(p4)) assert_equal(true, p3.same_word(p3)) assert_equal(true, p3.same_word(p4)) assert_equal(true, p4.same_word(p4)) p1.set_y -10 p1.update! assert_equal(true, p1.same_word(p1)) assert_equal(false, p1.same_word(p2)) assert_equal(false, p1.same_word(p3)) assert_equal(false, p1.same_word(p4)) end def test_char_width assert_equal(0.556, @text_state.char_width('a')) assert_equal(0.278, @text_state.char_width(' ')) @text_state.set_char_spacing('0.023') assert_equal(0.579, @text_state.char_width('a')) assert_equal(0.301, @text_state.char_width(' ')) @text_state.set_word_spacing('0.012') assert_equal(0.579, @text_state.char_width('a')) assert_equal(0.313, @text_state.char_width(' ')) end def test_txt font = flexmock('font') input = "Anwendung: Bei nervšsen Herzbeschwerden" font.should_receive(:encoding).and_return('mac') font.should_receive(:attributes).and_return({}) font.should_ignore_missing @text_state.set_font(font) @text_state.set_txt(input) expected = "Anwendung: Bei nervösen Herzbeschwerden" assert_equal(expected, @text_state.txt) end end