lib/podoff.rb in podoff-1.1.1 vs lib/podoff.rb in podoff-1.2.0
- old
+ new
@@ -28,48 +28,55 @@
require 'stringio'
module Podoff
- VERSION = '1.1.1'
+ VERSION = '1.2.0'
- def self.load(path, encoding='iso-8859-1')
+ def self.load(path, encoding)
Podoff::Document.load(path, encoding)
end
- def self.parse(s)
+ def self.parse(s, encoding)
- Podoff::Document.new(s)
+ Podoff::Document.new(s, encoding)
end
class Document
- def self.load(path, encoding='iso-8859-1')
+ def self.load(path, encoding)
- Podoff::Document.new(File.open(path, 'r:' + encoding) { |f| f.read })
+ Podoff::Document.new(
+ File.open(path, 'r:' + encoding) { |f| f.read },
+ encoding
+ )
end
def self.parse(s)
Podoff::Document.new(s)
end
+ attr_reader :encoding
+
attr_reader :scanner
attr_reader :version
attr_reader :xref
attr_reader :objs
attr_reader :obj_counters
attr_reader :root
#
attr_reader :additions
- def initialize(s)
+ def initialize(s, encoding)
fail ArgumentError.new('not a PDF file') \
unless s.match(/\A%PDF-\d+\.\d+\s/)
+ @encoding = encoding
+
@scanner = ::StringScanner.new(s)
@version = nil
@xref = nil
@objs = {}
@obj_counters = {}
@@ -111,15 +118,10 @@
def source
@scanner.string
end
- def extract_ref(s)
-
- s.gsub(/\s+/, ' ').gsub(/[^0-9 ]+/, '').strip
- end
-
def updated?
@additions.any?
end
@@ -127,10 +129,12 @@
o = self
self.class.allocate.instance_eval do
+ @encoding = o.encoding
+
@scanner = ::StringScanner.new(o.source)
@xref = o.xref
@objs = o.objs.inject({}) { |h, (k, v)| h[k] = v.dup(self); h }
@obj_counters = o.obj_counters.dup
@@ -144,30 +148,27 @@
end
end
def pages
- @objs.values.select { |o| o.type == '/Page' }
+ #@objs.values.select { |o| o.type == '/Page' }
+
+ ps = @objs.values.find { |o| o.type == '/Pages' }
+ return nil unless ps
+
+ extract_refs(ps.attributes[:kids]).collect { |r| @objs[r] }
end
def page(index)
- return nil if index == 0
-
- pas = pages
- return nil if pas.empty?
-
- return (
- index > 0 ? pas.at(index - 1) : pas.at(index)
- ) unless pas.first.attributes[:pagenum]
-
if index < 0
- max = pas.inject(0) { |n, pa| [ n, pa.page_number ].max }
- index = max + 1 + index
+ pages[index]
+ elsif index == 0
+ nil
+ else
+ pages[index - 1]
end
-
- pas.find { |pa| pa.page_number == index }
end
def new_ref
"#{
@@ -222,40 +223,44 @@
obj = obj.replicate unless obj.replica?
add(obj)
end
- def write(path)
+ def write(path=:string, encoding=nil)
+ encoding ||= @encoding
+
f =
case path
when :string, '-' then StringIO.new
when String then File.open(path, 'wb')
else path
end
+ f.set_encoding(encoding) # internal encoding: nil
+ #f.set_encoding(encoding, encoding)
f.write(source)
if @additions.any?
pointers = {}
@additions.values.each do |o|
f.write("\n")
- pointers[o.ref.split(' ').first.to_i] = f.pos + 1
- f.write(o.to_s)
+ pointers[o.ref.split(' ').first.to_i] = f.pos
+ f.write(o.to_s.force_encoding(encoding))
end
f.write("\n\n")
- xref = f.pos + 1
+ xref = f.pos
write_xref(f, pointers)
f.write("trailer\n")
f.write("<<\n")
f.write("/Prev #{self.xref}\n")
- f.write("/Size #{objs.size}\n")
+ f.write("/Size #{objs.size + 1}\n")
f.write("/Root #{root} R\n")
f.write(">>\n")
f.write("startxref #{xref}\n")
f.write("%%EOF\n")
end
@@ -263,38 +268,41 @@
f.close if path.is_a?(String) || path.is_a?(Symbol)
f.is_a?(StringIO) ? f.string : nil
end
- def rewrite(path=:string)
+ def rewrite(path=:string, encoding=nil)
+ encoding ||= @encoding
+
f =
case path
when :string, '-' then StringIO.new
when String then File.open(path, 'wb')
else path
end
+ f.set_encoding(encoding)
v = source.match(/%PDF-\d+\.\d+/)[0]
f.write(v)
f.write("\n")
pointers = {}
objs.keys.sort.each do |k|
- pointers[k.split(' ').first.to_i] = f.pos + 1
- f.write(objs[k].source)
+ pointers[k.split(' ').first.to_i] = f.pos
+ f.write(objs[k].source.force_encoding(encoding))
f.write("\n")
end
- xref = f.pos + 1
+ xref = f.pos
write_xref(f, pointers)
f.write("trailer\n")
f.write("<<\n")
- f.write("/Size #{objs.size}\n")
+ f.write("/Size #{objs.size + 1}\n")
f.write("/Root #{root} R\n")
f.write(">>\n")
f.write("startxref #{xref}\n")
f.write("%%EOF\n")
@@ -307,11 +315,11 @@
def write_xref(f, pointers)
f.write("xref\n")
f.write("0 1\n")
- f.write("0000000000 65535 f\n")
+ f.write("0000000000 65535 f \n")
pointers
.keys
.sort
.inject([ [] ]) { |ps, k|
@@ -319,27 +327,36 @@
ps.last << k
ps
}
.each { |part|
f.write("#{part.first} #{part.size}\n")
- part.each { |k| f.write(sprintf("%010d 00000 n\n", pointers[k])) }
+ part.each { |k| f.write(sprintf("%010d 00000 n \n", pointers[k])) }
}
end
def make_stream(&block)
s = Stream.new
s.instance_exec(&block) if block
s
end
+
+ def extract_ref(s)
+
+ s.gsub(/\s+/, ' ').gsub(/[^0-9 ]+/, '').strip
+ end
+
+ def extract_refs(s)
+
+ s.gsub(/\s+/, ' ').scan(/(\d+ \d+) R/).collect(&:first)
+ end
end
class Obj
- ATTRIBUTES =
- { type: 'Type', contents: 'Contents', pagenum: 'pdftk_PageNum' }
+ ATTRIBUTES = { type: 'Type', contents: 'Contents', kids: 'Kids' }
def self.extract(doc)
sca = doc.scanner
@@ -409,15 +426,9 @@
end
def type
@attributes && @attributes[:type]
- end
-
- def page_number
-
- r = @attributes && @attributes[:pagenum]
- r ? r.to_i : nil
end
def insert_font(nick, obj_or_ref)
fail ArgumentError.new("target '#{ref}' not a replica") \