lib/podoff.rb in podoff-1.1.1 vs lib/podoff.rb in podoff-1.2.0

- old
+ new

@@ -28,48 +28,55 @@ require 'stringio' module Podoff - VERSION = '1.1.1' + VERSION = '1.2.0' - def self.load(path, encoding='iso-8859-1') + def self.load(path, encoding) Podoff::Document.load(path, encoding) end - def self.parse(s) + def self.parse(s, encoding) - Podoff::Document.new(s) + Podoff::Document.new(s, encoding) end class Document - def self.load(path, encoding='iso-8859-1') + def self.load(path, encoding) - Podoff::Document.new(File.open(path, 'r:' + encoding) { |f| f.read }) + Podoff::Document.new( + File.open(path, 'r:' + encoding) { |f| f.read }, + encoding + ) end def self.parse(s) Podoff::Document.new(s) end + attr_reader :encoding + attr_reader :scanner attr_reader :version attr_reader :xref attr_reader :objs attr_reader :obj_counters attr_reader :root # attr_reader :additions - def initialize(s) + def initialize(s, encoding) fail ArgumentError.new('not a PDF file') \ unless s.match(/\A%PDF-\d+\.\d+\s/) + @encoding = encoding + @scanner = ::StringScanner.new(s) @version = nil @xref = nil @objs = {} @obj_counters = {} @@ -111,15 +118,10 @@ def source @scanner.string end - def extract_ref(s) - - s.gsub(/\s+/, ' ').gsub(/[^0-9 ]+/, '').strip - end - def updated? @additions.any? end @@ -127,10 +129,12 @@ o = self self.class.allocate.instance_eval do + @encoding = o.encoding + @scanner = ::StringScanner.new(o.source) @xref = o.xref @objs = o.objs.inject({}) { |h, (k, v)| h[k] = v.dup(self); h } @obj_counters = o.obj_counters.dup @@ -144,30 +148,27 @@ end end def pages - @objs.values.select { |o| o.type == '/Page' } + #@objs.values.select { |o| o.type == '/Page' } + + ps = @objs.values.find { |o| o.type == '/Pages' } + return nil unless ps + + extract_refs(ps.attributes[:kids]).collect { |r| @objs[r] } end def page(index) - return nil if index == 0 - - pas = pages - return nil if pas.empty? - - return ( - index > 0 ? pas.at(index - 1) : pas.at(index) - ) unless pas.first.attributes[:pagenum] - if index < 0 - max = pas.inject(0) { |n, pa| [ n, pa.page_number ].max } - index = max + 1 + index + pages[index] + elsif index == 0 + nil + else + pages[index - 1] end - - pas.find { |pa| pa.page_number == index } end def new_ref "#{ @@ -222,40 +223,44 @@ obj = obj.replicate unless obj.replica? add(obj) end - def write(path) + def write(path=:string, encoding=nil) + encoding ||= @encoding + f = case path when :string, '-' then StringIO.new when String then File.open(path, 'wb') else path end + f.set_encoding(encoding) # internal encoding: nil + #f.set_encoding(encoding, encoding) f.write(source) if @additions.any? pointers = {} @additions.values.each do |o| f.write("\n") - pointers[o.ref.split(' ').first.to_i] = f.pos + 1 - f.write(o.to_s) + pointers[o.ref.split(' ').first.to_i] = f.pos + f.write(o.to_s.force_encoding(encoding)) end f.write("\n\n") - xref = f.pos + 1 + xref = f.pos write_xref(f, pointers) f.write("trailer\n") f.write("<<\n") f.write("/Prev #{self.xref}\n") - f.write("/Size #{objs.size}\n") + f.write("/Size #{objs.size + 1}\n") f.write("/Root #{root} R\n") f.write(">>\n") f.write("startxref #{xref}\n") f.write("%%EOF\n") end @@ -263,38 +268,41 @@ f.close if path.is_a?(String) || path.is_a?(Symbol) f.is_a?(StringIO) ? f.string : nil end - def rewrite(path=:string) + def rewrite(path=:string, encoding=nil) + encoding ||= @encoding + f = case path when :string, '-' then StringIO.new when String then File.open(path, 'wb') else path end + f.set_encoding(encoding) v = source.match(/%PDF-\d+\.\d+/)[0] f.write(v) f.write("\n") pointers = {} objs.keys.sort.each do |k| - pointers[k.split(' ').first.to_i] = f.pos + 1 - f.write(objs[k].source) + pointers[k.split(' ').first.to_i] = f.pos + f.write(objs[k].source.force_encoding(encoding)) f.write("\n") end - xref = f.pos + 1 + xref = f.pos write_xref(f, pointers) f.write("trailer\n") f.write("<<\n") - f.write("/Size #{objs.size}\n") + f.write("/Size #{objs.size + 1}\n") f.write("/Root #{root} R\n") f.write(">>\n") f.write("startxref #{xref}\n") f.write("%%EOF\n") @@ -307,11 +315,11 @@ def write_xref(f, pointers) f.write("xref\n") f.write("0 1\n") - f.write("0000000000 65535 f\n") + f.write("0000000000 65535 f \n") pointers .keys .sort .inject([ [] ]) { |ps, k| @@ -319,27 +327,36 @@ ps.last << k ps } .each { |part| f.write("#{part.first} #{part.size}\n") - part.each { |k| f.write(sprintf("%010d 00000 n\n", pointers[k])) } + part.each { |k| f.write(sprintf("%010d 00000 n \n", pointers[k])) } } end def make_stream(&block) s = Stream.new s.instance_exec(&block) if block s end + + def extract_ref(s) + + s.gsub(/\s+/, ' ').gsub(/[^0-9 ]+/, '').strip + end + + def extract_refs(s) + + s.gsub(/\s+/, ' ').scan(/(\d+ \d+) R/).collect(&:first) + end end class Obj - ATTRIBUTES = - { type: 'Type', contents: 'Contents', pagenum: 'pdftk_PageNum' } + ATTRIBUTES = { type: 'Type', contents: 'Contents', kids: 'Kids' } def self.extract(doc) sca = doc.scanner @@ -409,15 +426,9 @@ end def type @attributes && @attributes[:type] - end - - def page_number - - r = @attributes && @attributes[:pagenum] - r ? r.to_i : nil end def insert_font(nick, obj_or_ref) fail ArgumentError.new("target '#{ref}' not a replica") \