#!/usr/bin/env ruby # # Rpdf2txt -- PDF to Text Parser # Copyright (C) 2003 Andreas Schrafl, Hannes Wyss, Masaomi Hatakeyama # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # ywesee - intellectual capital connected, Winterthurerstrasse 52, CH-8006 Zürich, Switzerland # zdvatz@ywesee.com, mhatakeyama@ywesee.com # # TestPdfObject -- Rpdf2txt -- 21.11.2002 -- aschrafl@ywesee.com $KCODE = 'u' $: << File.expand_path('../lib', File.dirname(__FILE__)) require 'test/unit' require 'tempfile' require 'rpdf2txt/object' require 'rpdf2txt/default_handler' module Rpdf2txt class PdfObject attr_accessor :attributes end class TrailerDictionary public attr_accessor :attributes end class PageLeaf < TreeNode attr_accessor :contents, :resources public :join_snippets end class CMap < Stream public :extract_bfchar, :extract_bfrange end class TestCmap < Test::Unit::TestCase def setup @input_bfchar = <<-EOS 25 0 obj <> stream /CIDInit /ProcSet findresource begin 12 dict begin begincmap /CIDSystemInfo << /Registry (TT11+0) /Ordering (T42UV) /Supplement 0 >> def /CMapName /TT11+0 def /CMapType 2 def 1 begincodespacerange <004a> <0074> endcodespacerange 3 beginbfchar <004a> <03B3> <0064> <2264> <0074> <2265> endbfchar endcmap CMapName currentdict /CMap defineresource pop end end endstream endobj EOS @input_bfrange = <<-EOS 75 0 obj <> stream /CIDInit /ProcSet findresource begin 12 dict begin begincmap /CIDSystemInfo << /Registry (TT11+0) /Ordering (T42UV) /Supplement 0 >> def /CMapName /TT11+0 def /CMapType 2 def 1 begincodespacerange <0044> <0045> endcodespacerange 1 beginbfrange <0044> <0045> <03B1> endbfrange endcmap CMapName currentdict /CMap defineresource pop end end endstream endobj EOS end def test_parser_grammar_bfchar cmap = Rpdf2txt::CMap.new(@input_bfchar) assert_nothing_raised{ ast= Rpdf2txt.cmap_parser.parse(cmap.extract_bfchar) } end def test_extract_attributes_bfchar cmap = Rpdf2txt::CMap.new(@input_bfchar) expected = {:length => "357"} assert_equal(expected, cmap.attributes) end def test_cmap_bfchar cmap = Rpdf2txt::CMap.new(@input_bfchar) assert_equal(8805, cmap.map[116]) assert_equal(8804, cmap.map[100]) assert_equal(947, cmap.map[74]) end def test_parser_grammar_bfrange cmap = Rpdf2txt::CMap.new(@input_bfrange) assert_nothing_raised{ ast= Rpdf2txt.cmap_range_parser.parse(cmap.extract_bfrange) } end def test_cmap_bfrange cmap = Rpdf2txt::CMap.new(@input_bfrange) assert_equal(945, cmap.map[68]) assert_equal(946, cmap.map[69]) end def test_cmap_bfrange_array input_bfrange = <<-EOS 75 0 obj <> stream /CIDInit /ProcSet findresource begin 12 dict begin begincmap /CIDSystemInfo << /Registry (TT11+0) /Ordering (T42UV) /Supplement 0 >> def /CMapName /TT11+0 def /CMapType 2 def 1 begincodespacerange <0094> <0095> endcodespacerange 1 beginbfrange <0094> <0095> [ <2264> <2265> ] <0024> <0025> [ <2224> <2225> ] endbfrange endcmap CMapName currentdict /CMap defineresource pop end end endstream endobj EOS cmap = Rpdf2txt::CMap.new(input_bfrange) assert_equal(8804, cmap.map[148]) assert_equal(8805, cmap.map[149]) assert_equal(8740, cmap.map[36]) assert_equal(8741, cmap.map[37]) end end class TestPdfObject < Test::Unit::TestCase def setup input = '3 0 obj << /Type /Page /Parent 2 0 R /Contents 6 0 R >> endobj' @tree_node = Rpdf2txt::TreeNode.new(input) end def test_tree_node1 input = '4 0 obj << /Type /Pages /Kids [ 7 0 R 8 0 R ] /Count 2 >> endobj' node = Rpdf2txt::TreeNode.new(input) assert_equal(4, node.oid) assert_equal(["7 0 R", "8 0 R"], node.attributes[:kids]) assert_equal(nil, node.attributes[:contents]) assert_equal(nil, node.attributes[:parent]) assert_equal(true, node.root?) end def test_tree_node2 input = '3 0 obj << /Type /Page /Parent 2 0 R /Contents 6 0 R >> endobj' node = Rpdf2txt::TreeNode.new(input) assert_equal(3, node.oid) assert_equal(nil, node.attributes[:kids]) assert_equal('6 0 R', node.attributes[:contents]) assert_equal('2 0 R', node.attributes[:parent]) assert_equal(false, node.root?) end def test_tree_node3 input = '3 0 obj << /Type /Page /Parent 2 0 R / 2 0 R >> endobj' node = nil assert_nothing_raised { node = Rpdf2txt::TreeNode.new(input) } end def test_tree_node4 src = ' 400 0 obj << /Title (οê\)ÃìÂÞ\\žåPÕT#/ûØ-&Ÿ®;Sü“O®A) /Parent 399 0 R /A 436 0 R /Next 433 0 R >> endobj ' node = Rpdf2txt::TreeNode.new(src) assert_equal(400, node.oid) assert_equal('433 0 R', node.attributes[:next]) end def test_tree_node5 src = ' 124 0 obj << /Type /Font /Subtype /CIDFontType2 /BaseFont /HAGNPN+SymbolMT /FontDescriptor 122 0 R /CIDSystemInfo << /Registry (yÃ>á€)/Ordering (qÃ4í‘6ZB)/Supplement 0 >> /DW 1000 /W [ 74 [ 411 ] 100 [ 548 ] 116 [ 548 ] ] >> endobj ' node = Rpdf2txt::TreeNode.new(src) assert_equal(124, node.oid) end def test_tree_node6 src = ' 198 0 obj << /S /Standard#20#28Web#29 /C /Standard#20#28Web#29 /Pg 11 0 R /P 346 0 R /K [ 13 << /Type /MCR /Pg 21 0 R /MCID 0 >> ] >> endobj ' node = Rpdf2txt::TreeNode.new(src) assert_equal(198, node.oid) end def test_tree_node7 src = ' 345 0 obj << /S /Standard /C /Standard /Pg 111 0 R /K 17 /P 346 0 R >> endobj' node = Rpdf2txt::TreeNode.new(src) assert_equal(345, node.oid) end def test_tree_node8 src = ' 346 0 obj << /S /Sect /P 396 0 R /K [ 347 0 R 143 0 R 144 0 R 352 0 R 149 0 R 150 0 R 151 0 R 153 0 R 154 0 R 155 0 R 156 0 R 157 0 R 158 0 R 159 0 R 160 0 R 161 0 R 162 0 R 163 0 R 164 0 R 165 0 R 166 0 R 167 0 R 168 0 R 169 0 R 170 0 R 171 0 R 172 0 R 173 0 R 174 0 R 175 0 R 176 0 R 177 0 R 178 0 R 179 0 R 180 0 R 181 0 R 182 0 R 183 0 R 184 0 R 185 0 R 186 0 R 187 0 R 188 0 R 189 0 R 190 0 R 191 0 R 192 0 R 193 0 R 194 0 R 195 0 R 196 0 R 197 0 R 198 0 R 199 0 R 200 0 R 201 0 R 202 0 R 203 0 R 204 0 R 205 0 R 206 0 R 207 0 R 208 0 R 209 0 R 210 0 R 211 0 R 212 0 R 213 0 R 214 0 R 215 0 R 216 0 R 217 0 R 218 0 R 219 0 R 220 0 R 221 0 R 222 0 R 223 0 R 224 0 R 225 0 R 226 0 R 227 0 R 228 0 R 229 0 R 230 0 R 231 0 R 232 0 R 233 0 R 234 0 R 235 0 R 236 0 R 237 0 R 238 0 R 239 0 R 240 0 R 241 0 R 242 0 R 243 0 R 244 0 R 245 0 R 246 0 R 247 0 R 248 0 R 249 0 R 250 0 R 251 0 R 252 0 R 253 0 R 254 0 R 255 0 R 256 0 R 257 0 R 258 0 R 259 0 R 260 0 R 261 0 R 262 0 R 263 0 R 264 0 R 265 0 R 266 0 R 267 0 R 268 0 R 269 0 R 270 0 R 271 0 R 272 0 R 273 0 R 274 0 R 275 0 R 276 0 R 277 0 R 278 0 R 279 0 R 280 0 R 281 0 R 282 0 R 283 0 R 284 0 R 285 0 R 286 0 R 287 0 R 288 0 R 289 0 R 290 0 R 291 0 R 292 0 R 293 0 R 294 0 R 295 0 R 296 0 R 297 0 R 298 0 R 299 0 R 300 0 R 301 0 R 302 0 R 303 0 R 304 0 R 305 0 R 306 0 R 307 0 R 308 0 R 309 0 R 310 0 R 311 0 R 312 0 R 313 0 R 314 0 R 315 0 R 316 0 R 317 0 R 318 0 R 319 0 R 320 0 R 321 0 R 322 0 R 324 0 R 325 0 R 326 0 R 327 0 R 328 0 R 329 0 R 330 0 R 331 0 R 332 0 R 333 0 R 334 0 R 335 0 R 336 0 R 337 0 R 338 0 R 339 0 R 340 0 R 360 0 R 344 0 R 345 0 R ] >> endobj' node = Rpdf2txt::TreeNode.new(src) assert_equal(346, node.oid) end def test_tree_node9 src = ' 346 0 obj << /S /Sect /P 396 0 R /K [ 155 0 R 156 0 R 157 0 R 158 0 R 159 0 R 160 0 R 161 0 R 162 0 R 155 0 R 156 0 R 157 0 R 158 0 R 159 0 R 160 0 R 161 0 R 162 0 R 163 0 R 164 0 R 165 0 R 166 0 R 167 0 R 168 0 R 169 0 R 170 0 R 163 0 R 164 0 R 165 0 R 166 0 R 167 0 R 168 0 R 169 0 R 170 0 R 190 ] >> endobj' node = Rpdf2txt::TreeNode.new(src) assert_equal(346, node.oid) end def test_tree_node10 src = ' 198 0 obj << /S /Standard#20#28Web#29 /C /Standard#20#28Web#29 /Pg 11 0 R /P 346 0 R /K [ ] >> endobj' node = Rpdf2txt::TreeNode.new(src) assert_equal(198, node.oid) end def test_extract_oids input = '6 0 R' assert_equal(@tree_node.extract_oids(input), [6]) input = ["7 0 R", "8 0 R"] assert_equal(@tree_node.extract_oids(input), [7,8]) end def test_parse_content_from_complex_attributes src = <<-ENDOFSRC 46 0 obj << /Type /Page /Parent 543 0 R /Resources << /Font << /F2 575 0 R /T1_0 504 0 R /F4 573 0 R /T1_4 512 0 R /T1_3 511 0 R >> /Shading << /S12 508 0 R >> /XObject << /Im4 51 0 R >> /ExtGState 47 0 R /ProcSet [ /PDF /Text /ImageB ] /ColorSpace 534 0 R >> /Contents 48 0 R /BleedBox [ 0 0 651 898 ] /MediaBox [ 0 0 651 898 ] /TrimBox [ 28 28 623 870 ] /CropBox [ 28 28 623 870 ] /ArtBox [ 28 28 623 870 ] /LastModified (D:20021210105029+01') /Rotate 0 >> endobj ENDOFSRC obj = Rpdf2txt::PdfObject.new(src) attributes = obj.attributes assert_equal(Hash, attributes.class) assert_equal(11, attributes.size) assert_equal(0, obj.revision_id) end def test_parse_content_from_complex_attributes2 src = <<-ENDOFSRC 568 0 obj << /Linearized 1 /O 570 /H [ 1049 1249 ] /L 910845 /E 169588 /N 108 /T 899366 >> endobj ENDOFSRC obj = Rpdf2txt::PdfObject.new(src) attributes = obj.attributes assert_equal(Hash, attributes.class) assert_equal(7, attributes.size) end def test_parse_pantone src = <<-ENDOFSRC 2 0 obj << /JT 150 0 R /AGFA_NORN_V (ES15.101 V03) /AGFA_PSE_V (Apogee Norm PSE 1.1 23 ) /AGFA_CMYKCCN << /PANTONE#20379#20CV [ 0.08501 0 0.60001 0 ] /PANTONE#20192#20CV [ 0 0.94 0.64999 0 ] /PANTONE#20199#20CV [ 0 1 0.64999 0 ] /PANTONE#20383#20CV [ 0.185 0 1 0.185 ] /PANTONE#20375#20CV [ 0.42999 0 0.78999 0 ] /PANTONE#20100#20CV [ 0 0 0.50999 0 ] /PANTONE#20281#20CV [ 1 0.72 0 0.38 ] /PANTONE#20185#20CV [ 0 0.91 0.75999 0 ] /PANTONE#20377#20CV [ 0.42999 0 1 0.235 ] /PANTONE#203015#20CV [ 1 0.235 0 0.185 ] /PANTONE#20195#20CV [ 0 0.75999 0.56 0.56 ] /PANTONE#20381#20CV [ 0.185 0 0.91 0 ] /PANTONE#20Cl#20Gy#207#20CV [ 0 0 0 0.47 ] /PANTONE#20137#20CV [ 0 0.34 0.91 0 ] /PANTONE#20397#20CV [ 0.11501 0 1 0.11501 ] /PANTONE#20322#20CV [ 1 0 0.38 0.30499 ] /PANTONE#20382#20CV [ 0.30499 0 0.94 0 ] /PANTONE#20376#20CV [ 0.56 0 1 0 ] >> /Type /Catalog /Pages 55 0 R /Outlines 15 1 R >> endobj ENDOFSRC obj = Rpdf2txt::PdfObject.new(src) attributes = obj.attributes assert_equal(Hash, attributes.class) assert_equal(7, attributes.size) end def test_parse_escaped src = '<< /O (foo\\) >>' obj = nil assert_nothing_raised { obj = Rpdf2txt::PdfObject.new(src) } assert_equal({:o => 'foo\\'}, obj.attributes) end def test_parse_limits src = <<-EOS 31 0 obj << /Limits [ ] /Names [ 141 0 R ] >> endobj EOS obj = nil assert_nothing_raised { obj = Rpdf2txt::PdfObject.new(src) } expected = { :names => ["", "141 0 R"], :limits => ["", ""]} assert_equal expected, obj.attributes end end class TestText < Test::Unit::TestCase def test_get_font font_src = <<-EOS 580 0 obj << /Type /Font /Subtype /Type1 /FirstChar 32 /LastChar 240 /Widths [ 278 389 500 556 556 1000 722 278 333 333 556 600 278 389 278 278 556 556 556 556 556 556 556 556 556 556 278 278 600 600 600 500 800 722 611 611 722 556 500 722 722 278 389 667 500 944 722 778 556 778 611 556 556 722 667 1000 667 667 556 389 278 389 600 500 278 556 611 444 611 556 389 611 611 278 278 556 278 889 611 611 611 611 389 444 389 611 556 889 556 556 500 333 222 333 600 278 0 0 0 0 0 0 0 0 0 0 0 0 0 0 556 0 0 0 0 0 0 0 0 0 0 0 0 0 0 611 0 0 0 0 556 556 0 0 0 0 0 800 0 0 0 278 0 0 278 600 278 278 0 611 278 278 278 278 278 0 0 278 0 0 0 0 0 278 0 278 278 0 0 0 278 0 0 0 0 0 0 0 0 0 0 0 0 278 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 278 ] /Encoding /MacRomanEncoding /BaseFont /Frutiger-BoldItalic /FontDescriptor 579 0 R >> endobj EOS page_src = <<-EOS 570 0 obj << /Type /Page /Parent 540 0 R /Resources 571 0 R /Contents 576 0 R /BleedBox [ 0 0 651 898 ] /MediaBox [ 0 0 651 898 ] /TrimBox [ 28 28 623 870 ] /CropBox [ 28 28 623 870 ] /ArtBox [ 28 28 623 870 ] /Rotate 0 >> endobj EOS rsrc_src = <<-EOS 571 0 obj << /ProcSet [ /PDF /Text /ImageC ] /Font << /F1 580 0 R /F2 575 0 R /F3 578 0 R /F4 573 0 R >> /XObject << /Im1 587 0 R >> /ExtGState << /GS2 585 0 R /GS3 584 0 R >> /Shading << /Sh1 590 0 R >> >> endobj EOS font = Rpdf2txt::Font.new(font_src) page = Rpdf2txt::PageLeaf.new(page_src) rsrc = Rpdf2txt::Resource.new(rsrc_src) page.build_tree({580=>font,571=>rsrc}) text = Rpdf2txt::Text.new("(Hello World)") text.current_page = page get_font = text.get_font("F1") assert_equal(Rpdf2txt::Font, get_font.class) assert_equal(font, get_font) assert_equal(true, font.bold?) assert_equal(true, font.italic?) assert_equal("/Frutiger-BoldItalic", font.basefont_name) end def test_font_no_width font_src = <<-EOS 327 0 obj << /Type /Font /Subtype /Type1 /Encoding 370 0 R /BaseFont /Symbol >> endobj EOS page_src = <<-EOS 10 0 obj << /Type /Page /Parent 390 0 R /Resources 11 0 R /Contents 12 0 R /MediaBox [ 0 0 595 841 ] /CropBox [ 0 0 595 841 ] /Rotate 0 >> endobj EOS rsrc_src = <<-EOS 11 0 obj << /ProcSet [ /PDF /Text ] /Font << /F1 416 0 R /F2 408 0 R /F4 410 0 R /F6 325 0 R /F8 327 0 R >> /ExtGState << /GS1 422 0 R >> >> endobj EOS txt_src = <<-EOS BT /F8 1 Tf (Hello World) Tj ET EOS font = Rpdf2txt::Font.new(font_src) page = Rpdf2txt::PageLeaf.new(page_src) rsrc = Rpdf2txt::Resource.new(rsrc_src) page.build_tree({327=>font,11=>rsrc}) text = Rpdf2txt::Text.new(txt_src) text.current_page = page assert_nothing_raised { text.scan } end end class TestEncrypt < Test::Unit::TestCase def setup file = File.expand_path('./data/encrypt_string', File.dirname(__FILE__)) src_encrypt_obj = File.read(file) @encrypt = Rpdf2txt::PdfEncrypt.new(src_encrypt_obj) @encrypt.file_id = '8664e6986751f2a49dccc9a4b40a4f18' end def test_decrypt file = File.expand_path('./data/working_obj', File.dirname(__FILE__)) input = File.read(file) pdf_obj = Rpdf2txt::Stream.new(input) assert_equal("dc08b36009e48618f99c", @encrypt.decrypt_key(pdf_obj).unpack('h*').first) #if the stream could be inflated, the decryption is ok! assert_nothing_raised{ Zlib::Inflate.inflate(@encrypt.decrypt(pdf_obj)) Zlib::Inflate.inflate(@encrypt.decrypt(pdf_obj)) } end def test_decrypt2 file = File.expand_path('./data/90_obj', File.dirname(__FILE__)) input = File.read(file) pdf_obj = Rpdf2txt::Stream.new(input) assert_equal("7617ca1ac5babcf09cdf", @encrypt.decrypt_key(pdf_obj).unpack('h*').first) #if the stream could be inflated, the decryption is ok! assert_nothing_raised{ Zlib::Inflate.inflate(@encrypt.decrypt(pdf_obj)) Zlib::Inflate.inflate(@encrypt.decrypt(pdf_obj)) } end def test_decrypt3 file = File.expand_path('./data/working_obj2', File.dirname(__FILE__)) input = File.read(file) pdf_obj = Rpdf2txt::Stream.new(input) assert_equal("a9a666959bd64a96551b", @encrypt.decrypt_key(pdf_obj).unpack('h*').first) #if the stream could be inflated, the decryption is ok! assert_nothing_raised{ Zlib::Inflate.inflate(@encrypt.decrypt(pdf_obj)) Zlib::Inflate.inflate(@encrypt.decrypt(pdf_obj)) } end def test_decrypt5 file = File.expand_path('./data/458_obj', File.dirname(__FILE__)) input = File.read(file) pdf_obj = Rpdf2txt::Stream.new(input) #assert_equal("1aaeedd5d5304b79709b", @encrypt.decrypt_key(pdf_obj).unpack('h*').first) #if the stream could be inflated, the decryption is ok! assert_nothing_raised{ Zlib::Inflate.inflate(@encrypt.decrypt(pdf_obj)) Zlib::Inflate.inflate(@encrypt.decrypt(pdf_obj)) } end def test_decrypt6 file = File.expand_path('./data/450_obj', File.dirname(__FILE__)) input = File.read(file) pdf_obj = Rpdf2txt::Stream.new(input) #assert_equal("1aaeedd5d5304b79709b", @encrypt.decrypt_key(pdf_obj).unpack('h*').first) #if the stream could be inflated, the decryption is ok! assert_nothing_raised{ Zlib::Inflate.inflate(@encrypt.decrypt(pdf_obj)) Zlib::Inflate.inflate(@encrypt.decrypt(pdf_obj)) } end def test_decrypt7 file = File.expand_path('./data/465_obj', File.dirname(__FILE__)) input = File.read(file) pdf_obj = Rpdf2txt::Stream.new(input) #assert_equal("1aaeedd5d5304b79709b", @encrypt.decrypt_key(pdf_obj).unpack('h*').first) #if the stream could be inflated, the decryption is ok! assert_nothing_raised{ Zlib::Inflate.inflate(@encrypt.decrypt(pdf_obj)) Zlib::Inflate.inflate(@encrypt.decrypt(pdf_obj)) } end def test_decrypt_key file = File.expand_path('./data/encrypt_obj', File.dirname(__FILE__)) src = File.read(file) #byte position important! do not indent these lines!!! obj_src = <<-EOS 473 0 obj << /N 3 /Alternate /DeviceRGB /Length 2575 /Filter /FlateDecode >> endobj EOS pdf_obj = Rpdf2txt::Stream.new(obj_src) encrypt = Rpdf2txt::PdfEncrypt.new(src) encrypt.file_id = '8664e6986751f2a49dccc9a4b40a4f18' #puts encrypt.decrypt_key(pdf_obj) assert_equal("dc08b36009e48618f99c", encrypt.decrypt_key(pdf_obj).unpack('h*').first) end def test_inflate_obj file = File.expand_path('./data/90_obj_comp', File.dirname(__FILE__)) input = File.read(file) input = [input].pack('H*') # puts input assert_nothing_raised{ Zlib::Inflate.inflate(input) # Zlib::Inflate.inflate(@encrypt.decrypt(pdf_obj)) } end def test_parse_encrypt file = File.expand_path('./data/encrypt_obj', File.dirname(__FILE__)) src = File.read(file) encrypt = Rpdf2txt::PdfEncrypt.new(src) encrypt.file_id = '8664e6986751f2a49dccc9a4b40a4f18' assert_equal("00ecc7a7bf8d68c564a21b98258b1dbff2aaf8d24bfdbaa74a9a073467d896b6", encrypt.user_key.unpack("H*").first) assert_equal("2055c756c72e1ad702608e8196acad447ad32d17cff583235f6dd15fed7dab67", encrypt.owner_key.unpack("H*").first) assert_nothing_raised{ encrypt.encryption_key } end def test_endianess file = File.expand_path('./data/encrypt_obj', File.dirname(__FILE__)) src = File.read(file) encrypt = Rpdf2txt::PdfEncrypt.new(src) encrypt.big_endian? end end class TestEncrypt128bit < Test::Unit::TestCase def setup file = File.expand_path('./data/encrypt_string_128bit', File.dirname(__FILE__)) src_encrypt_obj = File.read(file) @encrypt = Rpdf2txt::PdfEncrypt.new(src_encrypt_obj) @encrypt.file_id = 'D816A5E838D50653C19DB62504229EB6' end def test_decrypt8 file = File.expand_path('./data/3392_obj', File.dirname(__FILE__)) input = File.read(file) pdf_obj = Rpdf2txt::Stream.new(input) #if the stream could be inflated, the decryption is ok! assert_nothing_raised{ Zlib::Inflate.inflate(@encrypt.decrypt(pdf_obj)) Zlib::Inflate.inflate(@encrypt.decrypt(pdf_obj)) } end end class TestTrailer < Test::Unit::TestCase def test_parse_trail src =' trailer << /Size 476 /Info 388 0 R /Encrypt 395 0 R /Root 394 0 R /Prev 203754 /ID[<8664e6986751f2a49dccc9a4b40a4f18v>] >> startxref' expected = "8664e6986751f2a49dccc9a4b40a4f18v" node = Rpdf2txt::TrailerDictionary.new(src) assert_equal(expected, node.file_id) end end class TestFont < Test::Unit::TestCase def test_encoding src = <<-EOS 580 0 obj << /Type /Font /Subtype /Type1 /FirstChar 32 /LastChar 240 /Widths [ 278 389 500 556 556 1000 722 278 333 333 556 600 278 389 278 278 556 556 556 556 556 556 556 556 556 556 278 278 600 600 600 500 800 722 611 611 722 556 500 722 722 278 389 667 500 944 722 778 556 778 611 556 556 722 667 1000 667 667 556 389 278 389 600 500 278 556 611 444 611 556 389 611 611 278 278 556 278 889 611 611 611 611 389 444 389 611 556 889 556 556 500 333 222 333 600 278 0 0 0 0 0 0 0 0 0 0 0 0 0 0 556 0 0 0 0 0 0 0 0 0 0 0 0 0 0 611 0 0 0 0 556 556 0 0 0 0 0 800 0 0 0 278 0 0 278 600 278 278 0 611 278 278 278 278 278 0 0 278 0 0 0 0 0 278 0 278 278 0 0 0 278 0 0 0 0 0 0 0 0 0 0 0 0 278 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 278 ] /Encoding /MacRomanEncoding /BaseFont /Frutiger-BoldItalic /FontDescriptor 579 0 R >> endobj EOS font = Rpdf2txt::Font.new(src) assert_equal("mac", font.encoding) end def test_width src = <<-EOS 580 0 obj << /Type /Font /Subtype /Type1 /FirstChar 32 /LastChar 240 /Widths [ 278 389 500 556 556 1000 722 278 333 333 556 600 278 389 278 278 556 556 556 556 556 556 556 556 556 556 278 278 600 600 600 500 800 722 611 611 722 556 500 722 722 278 389 667 500 944 722 778 556 778 611 556 556 722 667 1000 667 667 556 389 278 389 600 500 278 556 611 444 611 556 389 611 611 278 278 556 278 889 611 611 611 611 389 444 389 611 556 889 556 556 500 333 222 333 600 278 0 0 0 0 0 0 0 0 0 0 0 0 0 0 556 0 0 0 0 0 0 0 0 0 0 0 0 0 0 611 0 0 0 0 556 556 0 0 0 0 0 800 0 0 0 278 0 0 278 600 278 278 0 611 278 278 278 278 278 0 0 278 0 0 0 0 0 278 0 278 278 0 0 0 278 0 0 0 0 0 0 0 0 0 0 0 0 278 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 278 ] /Encoding /MacRomanEncoding /BaseFont /Frutiger-BoldItalic /FontDescriptor 579 0 R >> endobj EOS font = Rpdf2txt::Font.new(src) assert_equal(278, font.width(' ')) assert_equal(556, font.width('a')) end def test_width__builtin src = <<-EOS 580 0 obj << /Type /Font /Subtype /Type1 /FirstChar 32 /LastChar 240 /Encoding /MacRomanEncoding /BaseFont /Symbol /FontDescriptor 579 0 R >> endobj EOS font = Rpdf2txt::Font.new(src) assert_equal(250, font.width(' ')) assert_equal(763, font.width(70)) assert_equal(631, font.width('alpha')) end def test_width__differences encoding = <<-EOS 252 0 obj << /Type /Encoding /Differences [ 1 /space /beta /alpha ] >> endobj EOS font = <<-EOS 219 0 obj << /Type /Font /Subtype /Type1 /Encoding 252 0 R /BaseFont /Symbol /ToUnicode 253 0 R >> endobj EOS tounicode = <<-EOS 253 0 obj << /Filter /FlateDecode /Length 227 >> stream H‰TP»nÄ ìùŠ-ï”°[‹"—ÆÅ%§8Iam!a\øï9)#v–™–^Æ·Ñ™ô½š0ÁbœŽ¸ù=*„Wã€w J­ª¨¬ @³x:¶„vt‹‡a ô37·8M‡ýý…~DѸN_üû'ÓÂ-º „ ¡—« ïÒbn7iåyè5nA*ŒÒ­ã"C/þß#ýC1/²=­À:&2Ñ•{…þµ+DßÌ ÁÉžêb_þúL§ös𺚹¤4Ÿ; >”På_Eoæ endstream endobj EOS font = Rpdf2txt::Font.new(font) font.attributes[:encoding] = Rpdf2txt::Encoding.new(encoding) font.attributes[:to_unicode] = Rpdf2txt::Stream.new(tounicode).to_cmap assert_equal(631, font.width('alpha')) assert_equal(631, font.width(3)) end end class TestPageLeaf < Test::Unit::TestCase def test_text_cm stream = Stream.new stream.decoded_stream = <<-'EOS' q 1 0 0 -1 70.866 841.89 cm 0 J 1 1 1 RG q -1.5 -1.5 m 455.043 -1.5 l 452.043 1.5 l 1.5 1.5 l W* n -0.5 0 m 454.043 0 l S Q q 455.043 -1.5 m 455.043 32.5 l 452.043 29.5 l 452.043 1.5 l W* n 453.543 -0.5 m 453.543 31.5 l S Q q 455.043 32.5 m -1.5 32.5 l 1.5 29.5 l 452.043 29.5 l W* n 454.043 31 m -0.5 31 l S Q q -1.5 32.5 m -1.5 -1.5 l 1.5 1.5 l 1.5 29.5 l W* n 0 31.5 m 0 -0.5 l S Q 1 0 0 1 0.5 31.5 cm 0 0 0 rg BT /F0 8 Tf 1 0 0 -1 232.336 7.573 Tm [(1. position: 7.573 offset: 31.5)] TJ ET 1 0 0 1 -0.5 -0.5 cm q 455.043 -1.5 m 455.043 12.1 l 452.043 9.1 l 452.043 1.5 l W* n 453.543 -0.5 m 453.543 11.1 l S Q q 455.043 12.1 m -1.5 12.1 l 1.5 9.1 l 452.043 9.1 l W* n 454.043 10.6 m -0.5 10.6 l S Q q -1.5 12.1 m -1.5 -1.5 l 1.5 1.5 l 1.5 9.1 l W* n 0 11.1 m 0 -0.5 l S Q 1 0 0 1 0 39.866 cm BT /F1 16 Tf 1 0 0 -1 0 14.347 Tm (2. position: 14.347 offset: 39.866) Tj 0 0 0 RG ET 0 30.173 m 453.543 30.173 l S 1 0 0 1 0 32.2 cm BT /F2 11 Tf 1 0 0 -1 314.813 10.413 Tm (3. position: 10.413 offset: 32.2) Tj ET 1 0 0 1 0 -32.2 cm 0 46.7 m 453.543 46.7 l S BT /F2 8 Tf 1 0 0 -1 0 62.573 Tm (4. position: 62.573 offset:-32.2) Tj /F3 9 Tf 0 -14.547 Td (5. moved by: -14.547) Tj /F2 8 Tf 0 -15.853 Td (6. moved by: -15.853) Tj ET EOS page = PageLeaf.new page.resources = Resource.new handler = SimpleHandler.new page.contents = [stream] page.text(handler) ## a+b a-b # 1. 39.073 -23.927 # 2. 54.213 -25.519 # 3. 42.613 -21.787 ## fonts # 1. F0 8 # 2. F1 16 # 3. F2 11 ## a+b+f a+b-f a-b-f a-b+f # 1. 47.073 31.073 -31.927 -15.926 # 2. 70.213 38.213 -41.519 - 9.519 # 3. 53.613 31.613 -32.787 -10.787 ## a+f a-f # 1. 15.573 -0.427 # 2. 30.347 -1.653 # 3. 21.413 -0.587 ## 5 hrs -> 5 x newline expected = <<-EOS 1. position: 7.573 offset: 31.5 2. position: 14.347 offset: 39.866 3. position: 10.413 offset: 32.2 4. position: 62.573 offset:-32.2 5. moved by: -14.547 6. moved by: -15.853 EOS assert_equal(expected.strip, handler.out.strip) end def test_text__fixed_double_lead_bug stream = Stream.new stream.decoded_stream = <<-'EOS' q 1 i 0.059998 34.407 618 -34.5 re W* n 0 864.567 617.94 -864.54 re W* n /GS1 gs q 324.71994 0 0 25.199999 -0.720012 10.166975 cm /Im112 Do Q Q q 1 i 617.04 11.127 0.89996 0.23999 re W n /GS1 gs q 1.44 0 0 0.24 617.039978 11.126974 cm /Im17 Do Q Q q 1 i 0.059998 34.407 618 -34.5 re W* n 0 864.567 617.94 -864.54 re W* n /GS1 gs q 1.44 0 0 0.24 0.239988 10.886974 cm /Im18 Do Q q 27.359999 0 0 0.24 295.679962 10.886974 cm /Im16 Do Q Q q 1 i 617.28 10.887 0.65997 0.24002 re W n /GS1 gs q 0.96 0 0 0.24 617.279968 10.886974 cm /Im14 Do Q Q q 1 i 0.059998 34.407 618 -34.5 re W* n 0 864.567 617.94 -864.54 re W* n /GS1 gs q 597.599976 0 0 12.719999 10.319989 -0.873026 cm /Im113 Do Q Q q 1 i 11.28 0.026978 0.47998 0.059998 re W n /GS1 gs q 0.48 0 0 0.24 11.279988 -0.153026 cm /Im2 Do Q Q q 1 i 606.48 0.026978 0.47998 0.059998 re W n /GS1 gs q 0.48 0 0 0.24 606.47998 -0.153026 cm /Im2 Do Q Q q 1 i 0 864.567 617.94 -50.94 re W* n /GS1 gs q 608.399963 0 0 13.200012 -0.960012 852.326965 cm /Im93 Do Q Q q 1 i 616.08 853.287 1.86 0.23999 re W n /GS1 gs q 1.92 0 0 0.24 616.079956 853.286987 cm /Im44 Do Q Q q 1 i 0 864.567 617.94 -50.94 re W* n /GS1 gs q 1.92 0 0 0.24 -0.000012 853.046936 cm /Im87 Do Q q 240 0 0 0.24 7.439988 853.046936 cm /Im85 Do Q Q q 1 i 615.84 853.047 2.1 0.24005 re W n /GS1 gs q 2.4 0 0 0.24 615.839966 853.046936 cm /Im59 Do Q Q q 1 i 0 864.567 617.94 -50.94 re W* n /GS1 gs q 241.920013 0 0 41.040039 6.479988 812.966919 cm /Im88 Do Q q 572.639954 0 0 0.24 39.359989 813.686951 cm /Im109 Do Q q 572.639954 0 0 0.24 39.359989 813.44696 cm /Im109 Do Q Q /GS1 gs BT /F1 1 Tf 10.02 0 0 10.02 48.24 821.187 Tm 0 g -0.0006 Tc -0.002 Tw [(Arzneimittel Nachrichten )5.9(/ Médicamen)5.6(t)-0.8(s )]TJ /F2 1 Tf 7.02 0 0 7.02 87.9 24.987 Tm 0.0023 Tc 0.0017 Tw [(S)6.6(w)6.2(iss)6.7(m)2.4(ed)6.5(ic)10.4( Jo)6.5(u)6.5(r)-1.9(n)6.5(a)11.3(l 03)11.3(/200)11.3(6)11.3( )]TJ 1 g 30.6752 0 TD -0.0004 Tc 0 Tw (226)Tj 0 g 1.6667 0 TD 0 Tc ( )Tj /F1 1 Tf 11.52 0 0 11.52 96.42 773.3669 Tm -0.0006 Tc -0.0014 Tw [(Autorisa)3.3(tion d’un médicament co)6.2(ntenant un)6.2( nouveau principe actif: )]TJ 0 -1.125 TD 0 Tc 0 Tw (M)Tj /F2 1 Tf 10.02 0 0 10.02 96.42 742.527 Tm 0 Tc 0 Tw ( )Tj /F1 1 Tf 0 -1.1976 TD -0.0002 Tc 0.1054 Tw [(En février 2)4.9(006, la prép)6(aration Mac)6.7(ugen)6(®)-2.6(, une )]TJ T* -0.0004 Tc 0.1116 Tw [(solution injectable conte)4.7(n)-0.2(ant un nou)5.8(veau prin)5.8(-)]TJ T* -0.0002 Tc 0.1893 Tw [(cipe actif, le pegapta)4.9(n)0(ib, a été autor)5.6(i)2.4(sée dans)6.7( )]TJ T* -0.0005 Tc 0.1357 Tw [(l’indication suivante )137.8(: « )137.8(T)4.6(r)-0.7(aitement )6(d)5.7(e)-1.4( la form)8.3(e )]TJ T* -0.0007 Tc 0.3514 Tw [(néovasc)6.2(u)-0.5(laire \(h)5.5(umide\) )5.9(de la d)5.5(é)-1.6(générescence )]TJ T* -0.0019 Tw (maculaire liée à l’âge». )Tj T* -0.0001 Tc 0.0395 Tw [(La dos)6.8(e auto)6.1(risée de 0,3 )47.9(mg de pegaptanib doit)5.7( )]TJ T* -0.0003 Tc 0.4409 Tw [(être administrée par )-6(i)8.2(n)-0.1(jection )-6(int)5.5(r)-0.5(avitré)4.8(enne)4.8( )]TJ T* -0.0005 Tc -0.002 Tw [(toutes les six semaines \(9 )6(injections par an\). )]TJ T* 0 Tc 0 Tw ( )Tj /F2 1 Tf T* -0.001 Tc 0.4595 Tw [(Le )6(pegaptanib sodique est)4.8( un oli)7.5(gonucléide )]TJ T* -0.0014 Tc 0.0767 Tw [(modifié pégy)-4.4(lé qui )6(se lie à l’isoforme VEGF)]TJ -21.4132 -1.1976 TD -0.001 Tc 0.3158 Tw [(facteur)4.8( de c)5.9(r)-1.2(oissance de l’endothéli)7.5(um vascu-)]TJ T* -0.0004 Tc 0.1595 Tw [(laire \(VEGF\) )6(et inhibe so)5.8(n activité. Le VEGF est)5.4( )]TJ T* -0.0006 Tc 0.1957 Tw [(une prot)5.2(éin)5.6(e)-1.5( qui induit une angiog)5.6(enèse, un)5.6(e )]TJ T* -0.0009 Tc 0.4295 Tw [(perméabilité v)-3.9(a)-1.8(sculaire )6(et une inflammation. )]TJ T* ( )Tj ET EOS page = PageLeaf.new page.resources = Resource.new handler = SimpleHandler.new page.contents = [stream] page.text(handler) expected = <<-EOS.strip Arzneimittel Nachrichten / M\351dicaments Autorisation d\222un m\351dicament contenant un nouveau principe actif: M En f\351vrier 2006, la pr\351paration Macugen\256, une solution injectable contenant un nouveau prin- cipe actif, le pegaptanib, a \351t\351 autoris\351e dans l\222indication suivante : \253 Traitement de la forme n\351ovasculaire (humide) de la d\351g\351n\351rescence maculaire li\351e \340 l\222\342ge\273. La dose autoris\351e de 0,3 mg de pegaptanib doit \352tre administr\351e par injection intravitr\351enne toutes les six semaines (9 injections par an). Le pegaptanib sodique est un oligonucl\351ide modifi\351 p\351gyl\351 qui se lie \340 l\222isoforme VEGF facteur de croissance de l\222endoth\351lium vascu- laire (VEGF) et inhibe son activit\351. Le VEGF est une prot\351ine qui induit une angiogen\350se, une perm\351abilit\351 vasculaire et une inflammation. Swissmedic Journal 03/2006 226 EOS result = handler.out.strip =begin [expected.size, result.size].max.times do |idx| unless result[idx] == expected[idx] flunk "unexpected result: (#{result[idx]}/#{expected[idx]} at #{idx}) ...#{expected[idx-10,20].inspect}..." end end =end assert_equal(expected, result) end def test_text_landscape stream = Stream.new stream.decoded_stream = <<-'EOS' /GS1 gs BT /TT2 1 Tf 0 14.0053 -13.9999 0 59.64 43.2505 Tm /Cs6 cs 0 0 0 scn -0.0002 Tc 0.0008 Tw (Zuzahlungsbefreite Arzneimittel nach § 31 Abs. 3 Satz 4 SGB V)Tj /TT4 1 Tf 0 9.0035 -9 0 117 176.4505 Tm 0.0009 Tc 0 Tw (PZN)Tj -14.7942 0 TD -0.0016 Tc [(Arzneimit)-3.7(t)-3.7(e)1.4(lname)]TJ 59.8165 0 TD 0.0016 Tc [(D)4(a)11.3(rrei)10.5(c)1.8(hu)4.6(n)11.3(g)-2(sf)6.2(orm)]TJ -39.9843 0 TD -0.0013 Tc [(He)8.4(rst)-10(e)8.4(l)-5.7(l)7.6(e)-4.9(r)]TJ 52.2861 0 TD 0.0001 Tc [(Apo)9.8(t)-8.6(h)9.8(e)-3.5(ke)9.8(nverka)9.8(ufspre)9.8(is)]TJ 3.1321 -1.14 TD -0.0006 Tc 0.0027 Tw [( in)-4.2(kl)8.3(.)-9.3(M)-0.6(w)21.8(S)0(t)]TJ ET 129.3 42.531 1.98 751.68 re f BT 0 9.0035 -9 0 117 519.6505 Tm -0.0017 Tc 0 Tw (Packungs-)Tj 1.0662 -1.14 TD -0.0022 Tc [(größ)-4.4(e)]TJ -20.6119 1.14 TD 0 Tc [(Wirkstoff)-8.7(\()-0.2(e)9.7(\))-5598(Wirkstärke)]TJ ET q 1 i 108.9 440.091 9.96 53.46 re W n BT 0 9.0035 -9 0 117 482.5705 Tm (\()Tj ET Q BT 0 9.0035 -9 0 117 485.5705 Tm (n)Tj ET q 1 i 108.9 440.091 9.96 53.46 re W n BT 0 9.0035 -9 0 117 490.5505 Tm (\))Tj ET Q BT /TT2 1 Tf 0 14.0053 -13.9999 0 79.5 43.2505 Tm -0.0008 Tc (Produktstand)Tj 0 -1.2129 TD -0.0001 Tc 0.0007 Tw (sortiert nach Arzneimittelname)Tj 7.2915 1.2129 TD 0.0001 Tc 0 Tw [(01)-94.9(.)-231.9(0)-0.7(8)-35(.)-0.5(2009)]TJ /TT4 1 Tf 0 8.003 -7.9999 0 144.36 176.4505 Tm -0.0014 Tc (4000741)Tj -16.6438 0 TD -0.0006 Tc 0.0002 Tw (ABSEAMED 10000I.E./1ML)Tj 67.3249 0 TD -0.0008 Tc 0 Tw [(Fertigspritzen)-12242.4(611,53)]TJ -45.0132 0 TD 0.0045 Tc -0.0049 Tw [(M)12.8(E)-3.3(D)14.4(I)-2.6(C)6.9(E)4.2( AR)6.9(Z)15.6(N)6.9(.G)17.6(MB)11.7(H&)11.7(CO)17.6(.K)11.7(G)-23621.1(6)-1.6(X1)-1298.7(m)5.3(l)]TJ 15.2268 0 TD -0.0006 Tc 0.0077 Tw [(E)-8.4(p)0.8(o)8.3(e)0.8(t)-7.7(i)-3.3(n)8.3( alf)7.3(a)-8118.7(10000)-831.4(I.E.)]TJ ET 169.56 42.531 0.48 748.26 re f BT 0 8.003 -7.9999 0 178.32 176.4505 Tm -0.0014 Tc 0 Tw (4000646)Tj -16.6438 0 TD -0.0006 Tc 0.0002 Tw (ABSEAMED 1000I.E./0.5ML)Tj 67.3249 0 TD 0 Tw [(Fertigspritzen)-12519.7(64,20)]TJ -45.0132 0 TD 0.0045 Tc -0.0049 Tw [(M)12.8(E)-3.3(D)14.4(I)-2.6(C)6.9(E)4.2( AR)6.9(Z)15.6(N)6.9(.G)17.6(MB)11.7(H&)11.7(CO)17.6(.K)11.7(G)-22781.4(6)5.9(X0)5.9(.)4.9(5)-1313.6(m)5.3(l)]TJ 15.2268 0 TD -0.0005 Tc 0.0076 Tw [(E)-8.3(p)0.9(o)8.4(e)0.9(t)-7.6(i)-3.2(n)8.4( alf)7.4(a)-8673.4(1000)-831.3(I.E.)]TJ ET 203.46 42.531 0.54001 748.26 re f BT 0 8.003 -7.9999 0 212.34 176.4505 Tm -0.0014 Tc 0 Tw (4000652)Tj -16.6438 0 TD -0.0006 Tc 0.0002 Tw (ABSEAMED 2000I.E./1ML)Tj 67.3249 0 TD -0.0008 Tc 0 Tw [(Fertigspritzen)-12242.4(119,04)]TJ -45.0132 0 TD 0.0045 Tc -0.0049 Tw [(M)12.8(E)-3.3(D)14.4(I)-2.6(C)6.9(E)4.2( AR)6.9(Z)15.6(N)6.9(.G)17.6(MB)11.7(H&)11.7(CO)17.6(.K)11.7(G)-23621.1(6)-1.6(X1)-1298.7(m)5.3(l)]TJ 15.2268 0 TD -0.0005 Tc 0.0076 Tw [(E)-8.3(p)0.9(o)8.4(e)0.9(t)-7.6(i)-3.2(n)8.4( alf)7.4(a)-8673.4(2000)-831.3(I.E.)]TJ ET 237.48 42.531 0.53999 748.26 re f BT 0 8.003 -7.9999 0 246.36 176.4505 Tm -0.0014 Tc 0 Tw (4000669)Tj -16.6438 0 TD -0.0006 Tc 0.0002 Tw (ABSEAMED 3000I.E./0.3ML)Tj 67.3249 0 TD -0.0008 Tc 0 Tw [(Fertigspritzen)-12242.4(173,94)]TJ -45.0132 0 TD 0.0045 Tc -0.0049 Tw [(M)12.8(E)-3.3(D)14.4(I)-2.6(C)6.9(E)4.2( AR)6.9(Z)15.6(N)6.9(.G)17.6(MB)11.7(H&)11.7(CO)17.6(.K)11.7(G)-22781.4(6)5.9(X0)5.9(.)4.9(3)-1313.6(m)5.3(l)]TJ 15.2268 0 TD -0.0005 Tc 0.0076 Tw [(E)-8.3(p)0.9(o)8.4(e)0.9(t)-7.6(i)-3.2(n)8.4( alf)7.4(a)-8673.4(3000)-831.3(I.E.)]TJ ET 271.56 42.531 0.48001 748.26 re f BT 0 8.003 -7.9999 0 280.32 176.4505 Tm -0.0014 Tc 0 Tw (4000681)Tj -16.6438 0 TD -0.0006 Tc 0.0002 Tw (ABSEAMED 4000I.E./0.4ML)Tj 67.3249 0 TD -0.0008 Tc 0 Tw [(Fertigspritzen)-12242.4(228,83)]TJ -45.0132 0 TD 0.0045 Tc -0.0049 Tw [(M)12.8(E)-3.3(D)14.4(I)-2.6(C)6.9(E)4.2( AR)6.9(Z)15.6(N)6.9(.G)17.6(MB)11.7(H&)11.7(CO)17.6(.K)11.7(G)-22781.4(6)5.9(X0)5.9(.)4.9(4)-1313.6(m)5.3(l)]TJ 15.2268 0 TD -0.0005 Tc 0.0076 Tw [(E)-8.3(p)0.9(o)8.4(e)0.9(t)-7.6(i)-3.2(n)8.4( alf)7.4(a)-8673.4(4000)-831.3(I.E.)]TJ ET 305.46 42.531 0.53998 748.26 re f BT 0 8.003 -7.9999 0 314.34 176.4505 Tm -0.0014 Tc 0 Tw (4000698)Tj -16.6438 0 TD -0.0006 Tc 0.0002 Tw (ABSEAMED 5000I.E./0.5ML)Tj 67.3249 0 TD -0.0008 Tc 0 Tw [(Fertigspritzen)-12242.4(283,70)]TJ -45.0132 0 TD 0.0045 Tc -0.0049 Tw [(M)12.8(E)-3.3(D)14.4(I)-2.6(C)6.9(E)4.2( AR)6.9(Z)15.6(N)6.9(.G)17.6(MB)11.7(H&)11.7(CO)17.6(.K)11.7(G)-22781.4(6)5.9(X0)5.9(.)4.9(5)-1313.6(m)5.3(l)]TJ 15.2268 0 TD -0.0005 Tc 0.0076 Tw [(E)-8.3(p)0.9(o)8.4(e)0.9(t)-7.6(i)-3.2(n)8.4( alf)7.4(a)-8673.4(5000)-831.3(I.E.)]TJ ET 339.48 42.531 0.54001 748.26 re f BT 0 8.003 -7.9999 0 348.36 176.4505 Tm -0.0014 Tc 0 Tw (4000729)Tj -16.6438 0 TD -0.0006 Tc 0.0002 Tw (ABSEAMED 6000I.E./0.6ML)Tj 67.3249 0 TD -0.0008 Tc 0 Tw [(Fertigspritzen)-12242.4(338,57)]TJ -45.0132 0 TD 0.0045 Tc -0.0049 Tw [(M)12.8(E)-3.3(D)14.4(I)-2.6(C)6.9(E)4.2( AR)6.9(Z)15.6(N)6.9(.G)17.6(MB)11.7(H&)11.7(CO)17.6(.K)11.7(G)-22781.4(6)5.9(X0)5.9(.)4.9(6)-1313.6(m)5.3(l)]TJ 15.2268 0 TD -0.0005 Tc 0.0076 Tw [(E)-8.3(p)0.9(o)8.4(e)0.9(t)-7.6(i)-3.2(n)8.4( alf)7.4(a)-8673.4(6000)-831.3(I.E.)]TJ ET 373.56 42.531 0.47998 748.26 re f BT 0 8.003 -7.9999 0 382.32 176.4505 Tm -0.0014 Tc 0 Tw (4000735)Tj -16.6438 0 TD -0.0006 Tc 0.0002 Tw (ABSEAMED 8000I.E./0.8ML)Tj 67.3249 0 TD -0.0008 Tc 0 Tw [(Fertigspritzen)-12242.4(448,34)]TJ -45.0132 0 TD 0.0045 Tc -0.0049 Tw [(M)12.8(E)-3.3(D)14.4(I)-2.6(C)6.9(E)4.2( AR)6.9(Z)15.6(N)6.9(.G)17.6(MB)11.7(H&)11.7(CO)17.6(.K)11.7(G)-22781.4(6)5.9(X0)5.9(.)4.9(8)-1313.6(m)5.3(l)]TJ 15.2268 0 TD -0.0005 Tc 0.0076 Tw [(E)-8.3(p)0.9(o)8.4(e)0.9(t)-7.6(i)-3.2(n)8.4( alf)7.4(a)-8673.4(8000)-831.3(I.E.)]TJ ET 407.46 42.531 0.54001 748.26 re f BT 0 8.003 -7.9999 0 416.34 176.4505 Tm -0.0014 Tc 0 Tw (3867219)Tj -16.6438 0 TD -0.0016 Tc 0.0012 Tw (ACC 200)Tj 67.3249 0 TD -0.0008 Tc 0 Tw [(Brausetabletten)-11575.1(12,74)]TJ -45.0132 0 TD 0.0003 Tc 0.0068 Tw [(H)10.2(E)-7.5(XAL)9.2( AG)]TJ 38.2882 0 TD -0.0006 Tc 0 Tw [(50)-1333.7(St)]TJ -23.0614 0 TD -0.0021 Tc [(A)-9.9(c)3.1(ety)10.6(l)-4.9(c)-11.9(y)10.6(st)-9.2(e)6.8(i)-4.8(n)-8690(2)-8.2(00)-825.4(mg)]TJ ET 441.48 42.531 0.54001 748.26 re f BT 0 8.003 -7.9999 0 450.36 176.4505 Tm -0.0014 Tc (3867225)Tj -16.6438 0 TD -0.0016 Tc 0.0012 Tw (ACC 200)Tj 67.3249 0 TD -0.0008 Tc 0 Tw [(Brausetabletten)-11575.1(15,42)]TJ -45.0132 0 TD 0.0003 Tc 0.0068 Tw [(H)10.2(E)-7.5(XAL)9.2( AG)]TJ 37.7335 0 TD -0.0007 Tc 0 Tw [(100)-1333.8(St)]TJ -22.5066 0 TD -0.0021 Tc [(A)-9.9(c)3.1(ety)10.6(l)-4.9(c)-11.9(y)10.6(st)-9.2(e)6.8(i)-4.8(n)-8690(2)-8.2(00)-825.4(mg)]TJ ET 475.56 42.531 0.47998 748.26 re f BT 0 8.003 -7.9999 0 484.32 176.4505 Tm -0.0014 Tc (4789763)Tj -16.6438 0 TD -0.0016 Tc 0.0012 Tw (ACC 200)Tj 67.3249 0 TD -0.0008 Tc 0 Tw [(Brausetabletten)-11575.1(11,01)]TJ -45.0132 0 TD 0.0003 Tc 0.0068 Tw [(H)10.2(E)-7.5(XAL)9.2( AG)]TJ 38.2882 0 TD -0.0006 Tc 0 Tw [(20)-1333.7(St)]TJ -23.0614 0 TD -0.0021 Tc [(A)-9.9(c)3.1(ety)10.6(l)-4.9(c)-11.9(y)10.6(st)-9.2(e)6.8(i)-4.8(n)-8690(2)-8.2(00)-825.4(mg)]TJ ET 509.46 42.531 0.53998 748.26 re f BT 0 8.003 -7.9999 0 548.8199 376.6105 Tm -0.0007 Tc -0.0072 Tw [(Seite 1)-6.8( v)-10.5(o)0.7(n)8.2( )-15(10)8.2(83)]TJ ET EOS page = PageLeaf.new page.attributes.store :rotate, '90' page.resources = Resource.new handler = SimpleHandler.new page.contents = [stream] page.text(handler) expected = <<-EOS Zuzahlungsbefreite Arzneimittel nach \247 31 Abs. 3 Satz 4 SGB V Produktstand 01.08.2009 sortiert nach Arzneimittelname Arzneimittelname PZN Hersteller Wirkstoff(e) Wirkstärke (n) Packungs- Darreichungsform Apothekenverkaufspreis größe inkl.MwSt ABSEAMED 10000I.E./1ML 4000741 MEDICE ARZN.GMBH&CO.KG Epoetin alfa 10000 I.E.6X1 ml Fertigspritzen 611,53 ABSEAMED 1000I.E./0.5ML 4000646 MEDICE ARZN.GMBH&CO.KG Epoetin alfa 1000 6I.E.X0.5 ml Fertigspritzen 64,20 ABSEAMED 2000I.E./1ML 4000652 MEDICE ARZN.GMBH&CO.KG Epoetin alfa 2000 I.E.6X1 ml Fertigspritzen 119,04 ABSEAMED 3000I.E./0.3ML 4000669 MEDICE ARZN.GMBH&CO.KG Epoetin alfa 3000 6I.E.X0.3 ml Fertigspritzen 173,94 ABSEAMED 4000I.E./0.4ML 4000681 MEDICE ARZN.GMBH&CO.KG Epoetin alfa 4000 6I.E.X0.4 ml Fertigspritzen 228,83 ABSEAMED 5000I.E./0.5ML 4000698 MEDICE ARZN.GMBH&CO.KG Epoetin alfa 5000 6I.E.X0.5 ml Fertigspritzen 283,70 ABSEAMED 6000I.E./0.6ML 4000729 MEDICE ARZN.GMBH&CO.KG Epoetin alfa 6000 6I.E.X0.6 ml Fertigspritzen 338,57 ABSEAMED 8000I.E./0.8ML 4000735 MEDICE ARZN.GMBH&CO.KG Epoetin alfa 8000 6I.E.X0.8 ml Fertigspritzen 448,34 ACC 200 3867219 HEXAL AG Acetylcystein 200 mg 50 St Brausetabletten 12,74 ACC 200 3867225 HEXAL AG Acetylcystein 200 mg 100 St Brausetabletten 15,42 ACC 200 4789763 HEXAL AG Acetylcystein 200 mg 20 St Brausetabletten 11,01 Seite 1 von 1083 EOS assert_equal(expected.strip, handler.out.strip) end def test_text_kerning_bug stream = Stream.new path = File.expand_path('data/stream_kerning_bug.txt', File.dirname(__FILE__)) stream.decoded_stream = File.read path page = PageLeaf.new page.attributes.store :rotate, '90' page.resources = Resource.new handler = SimpleHandler.new page.contents = [stream] page.text(handler) expected = "RATIOPHARM GMBH 20 St" assert_equal(expected.strip, handler.out.strip) end def test_text_kerning_bug2 stream = Stream.new path = File.expand_path('data/stream_kerning_bug2.txt', File.dirname(__FILE__)) stream.decoded_stream = File.read path page = PageLeaf.new page.attributes.store :rotate, '90' page.resources = Resource.new handler = SimpleHandler.new page.contents = [stream] page.text(handler) expected = "HEUMANN PH GMBH&CO. KG 20 St" assert_equal(expected.strip, handler.out.strip) end =begin def test_text_space_bug2 stream = Stream.new path = File.expand_path('data/space_bug_stream2.txt', File.dirname(__FILE__)) fontsrc15 = <<-EOS 327 0 obj <> endobj EOS font15 = Font.new(fontsrc15) stream.decoded_stream = File.read path page = PageLeaf.new page.resources = resource = Resource.new resource.instance_variable_get('@fonts').store(:r15, font15) handler = SimpleHandler.new page.contents = [stream] page.text(handler) expected = "Inhalt / Table des mati\303\250res" assert_equal(expected.strip, handler.out.strip[0,28]) expected = '10 mg, 20 mg und 40 mg' assert_equal(expected.strip, handler.out.strip[346,22]) end =end end class TestEncoding < Test::Unit::TestCase def setup src = <<-EOS 252 0 obj << /Type /Encoding /Differences [ 1 /space /beta /alpha ] >> endobj EOS @encoding = Rpdf2txt::Encoding.new(src) end def test_differences expected = { 1 => 'space', 2 => 'beta', 3 => 'alpha', } assert_equal(expected, @encoding.differences) end def test_convert_symbol txt = "\003" assert_equal("a", @encoding.convert_symbol(txt)) assert_equal("\003", txt) end end class TestImage < Test::Unit::TestCase def test_png path = File.expand_path('data/png.pdfobj', File.dirname(__FILE__)) src = File.read(path) obj = Image.new(src) assert_nothing_raised { obj.image } path = File.expand_path('data/logo.png', File.dirname(__FILE__)) good, = Magick::Image.read path assert_equal(good, obj.image) end def test_indexed path = File.expand_path('data/index.pdfobj', File.dirname(__FILE__)) src = File.read(path) index = Stream.new(src) path = File.expand_path('data/indexed.pdfobj', File.dirname(__FILE__)) src = File.read(path) obj = Image.new(src) obj.build_tree(51 => index) assert_nothing_raised { obj.image } path = File.expand_path('data/pdf_50.png', File.dirname(__FILE__)) good, = Magick::Image.read path assert_equal(good, obj.image) end def test_indexed_2bit path = File.expand_path('data/index_2bit.pdfobj', File.dirname(__FILE__)) src = File.read(path) index = Stream.new(src) path = File.expand_path('data/indexed_2bit.pdfobj', File.dirname(__FILE__)) src = File.read(path) obj = Image.new(src) obj.build_tree(21 => index) assert_nothing_raised { obj.image } path = File.expand_path('data/pdf_20.png', File.dirname(__FILE__)) good, = Magick::Image.read path assert_equal(good, obj.image) end def test_indexed_masked path = File.expand_path('data/index_masked.pdfobj', File.dirname(__FILE__)) src = File.read(path) index = Stream.new(src) path = File.expand_path('data/indexed_masked.pdfobj', File.dirname(__FILE__)) src = File.read(path) obj = Image.new(src) obj.build_tree(21 => index) assert_nothing_raised { obj.image } path = File.expand_path('data/pdf_21.png', File.dirname(__FILE__)) good, = Magick::Image.read path assert_equal(good, obj.image) end def test_lzw_decode ## from the PDF-Manual data = "\x80\x0B\x60\x50\x22\x0C\x0C\x85\x01" stream = Stream.new(data) expected = "-----A---B" assert_equal(expected, stream.lzw_decode(data)) end def test_lzw_image path = File.expand_path('data/lzw_index.pdfobj', File.dirname(__FILE__)) src = File.read(path) index = Stream.new(src) path = File.expand_path('data/lzw.pdfobj', File.dirname(__FILE__)) src = File.read(path) obj = Image.new(src) obj.build_tree(21 => index) assert_nothing_raised { obj.image } path = File.expand_path('data/pdf_22.png', File.dirname(__FILE__)) good, = Magick::Image.read path assert_equal(good, obj.image) end end class TestInlineImage < Test::Unit::TestCase def test_inline_img attrs = <<-EOS /W 113 /CS /DeviceGray /BPC 8 /DP << /Predictor 15 /Columns 113 >> /F /Fl /H 1 EOS data = "x\234cd\2407\000\000\000\344\000\002" obj = InlineImage.new(attrs, data) assert_nothing_raised { obj.image } path = File.expand_path('data/inline.png', File.dirname(__FILE__)) good = Magick::Image.read path tmp_path = Tempfile.new('test').path + '.png' obj.image.write tmp_path tmp = Magick::Image.read tmp_path assert_equal(good, tmp) rescue StandardError => e p e ensure File.delete tmp_path if File.exist? tmp_path end end end