lib/podoff.rb in podoff-1.1.0 vs lib/podoff.rb in podoff-1.1.1

- old
+ new

@@ -21,17 +21,18 @@ # THE SOFTWARE. # # Made in Japan. #++ +require 'zlib' require 'strscan' require 'stringio' module Podoff - VERSION = '1.1.0' + VERSION = '1.1.1' def self.load(path, encoding='iso-8859-1') Podoff::Document.load(path, encoding) end @@ -51,11 +52,11 @@ def self.parse(s) Podoff::Document.new(s) end - attr_reader :source + attr_reader :scanner attr_reader :version attr_reader :xref attr_reader :objs attr_reader :obj_counters attr_reader :root @@ -65,51 +66,55 @@ def initialize(s) fail ArgumentError.new('not a PDF file') \ unless s.match(/\A%PDF-\d+\.\d+\s/) - @source = s + @scanner = ::StringScanner.new(s) @version = nil @xref = nil @objs = {} @obj_counters = {} @root = nil @additions = {} - sca = ::StringScanner.new(s) - @version = sca.scan(/%PDF-\d+\.\d+/) + @version = @scanner.scan(/%PDF-\d+\.\d+/) loop do - i = sca.skip_until( + i = @scanner.skip_until( /(startxref\s+\d+|\d+\s+\d+\s+obj|\/Root\s+\d+\s+\d+\s+R)/) - m = sca.matched + m = @scanner.matched break unless m if m[0] == 's' @xref = m.split(' ').last.to_i elsif m[0] == '/' @root = extract_ref(m) else - obj = Podoff::Obj.extract(self, sca) + obj = Podoff::Obj.extract(self) @objs[obj.ref] = obj @obj_counters[obj.ref] = (@obj_counters[obj.ref] || 0) + 1 end end if @root == nil - sca.pos = 0 + @scanner.pos = 0 loop do - i = sca.skip_until(/\/Root\s+\d+\s+\d+\s+R/) - break unless sca.matched - @root = extract_ref(sca.matched) + i = @scanner.skip_until(/\/Root\s+\d+\s+\d+\s+R/) + break unless @scanner.matched + @root = extract_ref(@scanner.matched) end end end + def source + + @scanner.string + end + def extract_ref(s) s.gsub(/\s+/, ' ').gsub(/[^0-9 ]+/, '').strip end @@ -122,11 +127,11 @@ o = self self.class.allocate.instance_eval do - @source = o.source + @scanner = ::StringScanner.new(o.source) @xref = o.xref @objs = o.objs.inject({}) { |h, (k, v)| h[k] = v.dup(self); h } @obj_counters = o.obj_counters.dup @@ -180,39 +185,36 @@ def add_base_font(name) name = name[1..-1] if name[0] == '/' - ref = new_ref + r = new_ref + s = "#{r} obj <</Type /Font /Subtype /Type1 /BaseFont /#{name}>> endobj" - add( - Obj.create( - self, - ref, - [ - "#{ref} obj", - "<< /Type /Font /Subtype /Type1 /BaseFont /#{name} >>", - "endobj" - ].join(' '))) + add(Obj.new(self, r, source: s)) end - def add_stream(s=nil, &block) + def add_stream(src=nil, &block) ref = new_ref - s = s || make_stream(&block) + src = + src && + [ + "#{ref} obj", + "<< /Length #{src.size} >>\nstream\n#{src}\nendstream", + "endobj" + ].join("\n") - s = [ - "#{ref} obj", - "<< /Length #{s.length} >>", - "stream\n#{s}\nendstream", - "endobj" - ].join("\n") if s.is_a?(String) + str = + src ? + nil : + make_stream(&block) - o = add(Obj.create(self, ref, s)) + obj = add(Obj.new(self, ref, source: src, stream: str)) - s.is_a?(Podoff::Stream) ? s : o + str || obj end def re_add(obj_or_ref) obj = obj_or_ref.is_a?(String) ? @objs[obj_or_ref] : obj_or_ref @@ -229,40 +231,27 @@ when :string, '-' then StringIO.new when String then File.open(path, 'wb') else path end - f.write(@source) + f.write(source) if @additions.any? pointers = {} @additions.values.each do |o| f.write("\n") - pointers[o.ref] = f.pos + 1 - if o.source.is_a?(String) - f.write(o.source) - else # Stream - s = o.source.to_s - f.write("#{o.ref} obj\n<< /Length #{s.length} >>\n") - f.write("stream\n#{s}\nendstream\nendobj") - end + pointers[o.ref.split(' ').first.to_i] = f.pos + 1 + f.write(o.to_s) end f.write("\n\n") xref = f.pos + 1 - f.write("xref\n") - f.write("0 1\n") - f.write("0000000000 65535 f\n") + write_xref(f, pointers) - pointers.each do |k, v| - f.write("#{k.split(' ').first} 1\n") - f.write(sprintf("%010d 00000 n\n", v)) - end - f.write("trailer\n") f.write("<<\n") f.write("/Prev #{self.xref}\n") f.write("/Size #{objs.size}\n") f.write("/Root #{root} R\n") @@ -287,47 +276,22 @@ v = source.match(/%PDF-\d+\.\d+/)[0] f.write(v) f.write("\n") - ptrs = {} + pointers = {} objs.keys.sort.each do |k| - ptrs[k] = f.pos + 1 + pointers[k.split(' ').first.to_i] = f.pos + 1 f.write(objs[k].source) f.write("\n") end xref = f.pos + 1 - max = objs.keys.inject(-1) { |i, k| [ i, k.split(' ')[0].to_i ].max } - #f.write("xref\n0 #{max}\n0000000000 65535 f\n") - f.write("xref\n0 1\n0000000000 65535 f\n") + write_xref(f, pointers) - partitions = [ [] ] - # - (1..max).each do |i| - k = "#{i} 0" - last = partitions.last - if ptrs.has_key?(k) - last << i - else - partitions << [] unless last == [] - end - end - # - partitions.each do |part| - - f.write("#{part.first} #{part.size}\n") - - part.each do |i| - k = "#{i} 0" - #f.write(sprintf("%010d 00000 n %% %s\n", ptrs[k], k)) - f.write(sprintf("%010d 00000 n\n", ptrs[k])) - end - end - f.write("trailer\n") f.write("<<\n") f.write("/Size #{objs.size}\n") f.write("/Root #{root} R\n") f.write(">>\n") @@ -337,12 +301,32 @@ f.close if path.is_a?(String) || path.is_a?(Symbol) f.is_a?(StringIO) ? f.string : nil end - private + protected + def write_xref(f, pointers) + + f.write("xref\n") + f.write("0 1\n") + f.write("0000000000 65535 f\n") + + pointers + .keys + .sort + .inject([ [] ]) { |ps, k| + ps << [] if ps.last != [] && k > ps.last.last + 1 + ps.last << k + ps + } + .each { |part| + f.write("#{part.first} #{part.size}\n") + part.each { |k| f.write(sprintf("%010d 00000 n\n", pointers[k])) } + } + end + def make_stream(&block) s = Stream.new s.instance_exec(&block) if block @@ -353,72 +337,72 @@ class Obj ATTRIBUTES = { type: 'Type', contents: 'Contents', pagenum: 'pdftk_PageNum' } - def self.extract(doc, sca) + def self.extract(doc) + sca = doc.scanner + re = sca.matched[0..-4].strip st = sca.pos - sca.matched.length i = sca.skip_until(/endobj/); return nil unless i en = sca.pos - 1 - atts = {} - ATTRIBUTES.each do |k, v| - sca.pos = st - i = sca.skip_until(/\/#{v}\b/); next unless i - next if sca.pos > en - atts[k] = sca.scan(/ *\/?[^\n\r\/>]+/).strip - end - - sca.pos = en - - Podoff::Obj.new(doc, re, st, en, atts) + Podoff::Obj.new(doc, re, start_index: st, end_index: en) end attr_reader :document attr_reader :ref attr_reader :start_index, :end_index + attr_reader :stream attr_reader :attributes - def initialize(doc, ref, st, en, atts, source=nil) + def initialize(doc, ref, opts={}) @document = doc @ref = ref - @start_index = st - @end_index = en - @attributes = atts - @source = source - recompute_attributes if @source.is_a?(String) - @source.obj = self if @source.is_a?(Podoff::Stream) - end + @start_index = opts[:start_index] + @end_index = opts[:end_index] + @attributes = nil + @source = opts[:source] - def dup(new_doc) + @stream = opts[:stream] + @stream.obj = self if @stream - self.class.new(new_doc, ref, start_index, end_index, attributes.dup) + recompute_attributes + #@source.obj = self if @source.is_a?(Podoff::Stream) + + @document.scanner.pos = @end_index if @document.scanner && @end_index end - def self.create(doc, ref, source) + def dup(new_doc) - self.new(doc, ref, nil, nil, nil, source) + self.class.new( + new_doc, ref, + start_index: start_index, end_index: end_index) end + #def self.create(doc, ref, source) + # self.new(doc, ref, nil, nil, nil, source) + #end + def replicate - self.class.create(document, ref, source.dup) + self.class.new(document, ref, source: source.dup) end def to_a [ @ref, @start_index, @end_index, @attributes ] end def source - @source || @document.source[@start_index..@end_index] + @source || (@start_index && @document.source[@start_index..@end_index]) end def replica? @source != nil @@ -461,18 +445,33 @@ add_to_attribute(:contents, re) end alias :insert_content :insert_contents + def to_s + + source || stream.to_s + end + protected def recompute_attributes + st, en, sca = + if @start_index + [ @start_index, @end_index, @document.scanner ] + elsif @source + [ 0, @source.length, ::StringScanner.new(@source) ] + end + + return unless sca + @attributes = ATTRIBUTES.inject({}) do |h, (k, v)| - m = @source.match(/\/#{v}\s+(\/?[^\/\n<>]+)/) - h[k] = m[1].strip if m + sca.pos = st + i = sca.skip_until(/\/#{v}\b/) + h[k] = sca.scan(/ *\/?[^\n\r\/>]+/).strip if i && sca.pos < en h end end def concat(refs, ref) @@ -502,12 +501,13 @@ class Stream attr_accessor :obj - def initialize + def initialize(obj=nil) + @obj = obj @font = nil @content = StringIO.new end def tf(font_name, font_size) @@ -533,10 +533,19 @@ @content.write(text) end def to_s - @content.string + s = @content.string + f = '' + if s.length > 98 + f = ' /Filter /FlateDecode' + s = Zlib::Deflate.deflate(s) + end + + "#{obj.ref} obj\n" + + "<</Length #{s.size}#{f}>>\nstream\n#{s}\nendstream\n" + + "endobj" end protected def escape(s)