lib/podoff.rb in podoff-1.1.0 vs lib/podoff.rb in podoff-1.1.1
- old
+ new
@@ -21,17 +21,18 @@
# THE SOFTWARE.
#
# Made in Japan.
#++
+require 'zlib'
require 'strscan'
require 'stringio'
module Podoff
- VERSION = '1.1.0'
+ VERSION = '1.1.1'
def self.load(path, encoding='iso-8859-1')
Podoff::Document.load(path, encoding)
end
@@ -51,11 +52,11 @@
def self.parse(s)
Podoff::Document.new(s)
end
- attr_reader :source
+ attr_reader :scanner
attr_reader :version
attr_reader :xref
attr_reader :objs
attr_reader :obj_counters
attr_reader :root
@@ -65,51 +66,55 @@
def initialize(s)
fail ArgumentError.new('not a PDF file') \
unless s.match(/\A%PDF-\d+\.\d+\s/)
- @source = s
+ @scanner = ::StringScanner.new(s)
@version = nil
@xref = nil
@objs = {}
@obj_counters = {}
@root = nil
@additions = {}
- sca = ::StringScanner.new(s)
- @version = sca.scan(/%PDF-\d+\.\d+/)
+ @version = @scanner.scan(/%PDF-\d+\.\d+/)
loop do
- i = sca.skip_until(
+ i = @scanner.skip_until(
/(startxref\s+\d+|\d+\s+\d+\s+obj|\/Root\s+\d+\s+\d+\s+R)/)
- m = sca.matched
+ m = @scanner.matched
break unless m
if m[0] == 's'
@xref = m.split(' ').last.to_i
elsif m[0] == '/'
@root = extract_ref(m)
else
- obj = Podoff::Obj.extract(self, sca)
+ obj = Podoff::Obj.extract(self)
@objs[obj.ref] = obj
@obj_counters[obj.ref] = (@obj_counters[obj.ref] || 0) + 1
end
end
if @root == nil
- sca.pos = 0
+ @scanner.pos = 0
loop do
- i = sca.skip_until(/\/Root\s+\d+\s+\d+\s+R/)
- break unless sca.matched
- @root = extract_ref(sca.matched)
+ i = @scanner.skip_until(/\/Root\s+\d+\s+\d+\s+R/)
+ break unless @scanner.matched
+ @root = extract_ref(@scanner.matched)
end
end
end
+ def source
+
+ @scanner.string
+ end
+
def extract_ref(s)
s.gsub(/\s+/, ' ').gsub(/[^0-9 ]+/, '').strip
end
@@ -122,11 +127,11 @@
o = self
self.class.allocate.instance_eval do
- @source = o.source
+ @scanner = ::StringScanner.new(o.source)
@xref = o.xref
@objs = o.objs.inject({}) { |h, (k, v)| h[k] = v.dup(self); h }
@obj_counters = o.obj_counters.dup
@@ -180,39 +185,36 @@
def add_base_font(name)
name = name[1..-1] if name[0] == '/'
- ref = new_ref
+ r = new_ref
+ s = "#{r} obj <</Type /Font /Subtype /Type1 /BaseFont /#{name}>> endobj"
- add(
- Obj.create(
- self,
- ref,
- [
- "#{ref} obj",
- "<< /Type /Font /Subtype /Type1 /BaseFont /#{name} >>",
- "endobj"
- ].join(' ')))
+ add(Obj.new(self, r, source: s))
end
- def add_stream(s=nil, &block)
+ def add_stream(src=nil, &block)
ref = new_ref
- s = s || make_stream(&block)
+ src =
+ src &&
+ [
+ "#{ref} obj",
+ "<< /Length #{src.size} >>\nstream\n#{src}\nendstream",
+ "endobj"
+ ].join("\n")
- s = [
- "#{ref} obj",
- "<< /Length #{s.length} >>",
- "stream\n#{s}\nendstream",
- "endobj"
- ].join("\n") if s.is_a?(String)
+ str =
+ src ?
+ nil :
+ make_stream(&block)
- o = add(Obj.create(self, ref, s))
+ obj = add(Obj.new(self, ref, source: src, stream: str))
- s.is_a?(Podoff::Stream) ? s : o
+ str || obj
end
def re_add(obj_or_ref)
obj = obj_or_ref.is_a?(String) ? @objs[obj_or_ref] : obj_or_ref
@@ -229,40 +231,27 @@
when :string, '-' then StringIO.new
when String then File.open(path, 'wb')
else path
end
- f.write(@source)
+ f.write(source)
if @additions.any?
pointers = {}
@additions.values.each do |o|
f.write("\n")
- pointers[o.ref] = f.pos + 1
- if o.source.is_a?(String)
- f.write(o.source)
- else # Stream
- s = o.source.to_s
- f.write("#{o.ref} obj\n<< /Length #{s.length} >>\n")
- f.write("stream\n#{s}\nendstream\nendobj")
- end
+ pointers[o.ref.split(' ').first.to_i] = f.pos + 1
+ f.write(o.to_s)
end
f.write("\n\n")
xref = f.pos + 1
- f.write("xref\n")
- f.write("0 1\n")
- f.write("0000000000 65535 f\n")
+ write_xref(f, pointers)
- pointers.each do |k, v|
- f.write("#{k.split(' ').first} 1\n")
- f.write(sprintf("%010d 00000 n\n", v))
- end
-
f.write("trailer\n")
f.write("<<\n")
f.write("/Prev #{self.xref}\n")
f.write("/Size #{objs.size}\n")
f.write("/Root #{root} R\n")
@@ -287,47 +276,22 @@
v = source.match(/%PDF-\d+\.\d+/)[0]
f.write(v)
f.write("\n")
- ptrs = {}
+ pointers = {}
objs.keys.sort.each do |k|
- ptrs[k] = f.pos + 1
+ pointers[k.split(' ').first.to_i] = f.pos + 1
f.write(objs[k].source)
f.write("\n")
end
xref = f.pos + 1
- max = objs.keys.inject(-1) { |i, k| [ i, k.split(' ')[0].to_i ].max }
- #f.write("xref\n0 #{max}\n0000000000 65535 f\n")
- f.write("xref\n0 1\n0000000000 65535 f\n")
+ write_xref(f, pointers)
- partitions = [ [] ]
- #
- (1..max).each do |i|
- k = "#{i} 0"
- last = partitions.last
- if ptrs.has_key?(k)
- last << i
- else
- partitions << [] unless last == []
- end
- end
- #
- partitions.each do |part|
-
- f.write("#{part.first} #{part.size}\n")
-
- part.each do |i|
- k = "#{i} 0"
- #f.write(sprintf("%010d 00000 n %% %s\n", ptrs[k], k))
- f.write(sprintf("%010d 00000 n\n", ptrs[k]))
- end
- end
-
f.write("trailer\n")
f.write("<<\n")
f.write("/Size #{objs.size}\n")
f.write("/Root #{root} R\n")
f.write(">>\n")
@@ -337,12 +301,32 @@
f.close if path.is_a?(String) || path.is_a?(Symbol)
f.is_a?(StringIO) ? f.string : nil
end
- private
+ protected
+ def write_xref(f, pointers)
+
+ f.write("xref\n")
+ f.write("0 1\n")
+ f.write("0000000000 65535 f\n")
+
+ pointers
+ .keys
+ .sort
+ .inject([ [] ]) { |ps, k|
+ ps << [] if ps.last != [] && k > ps.last.last + 1
+ ps.last << k
+ ps
+ }
+ .each { |part|
+ f.write("#{part.first} #{part.size}\n")
+ part.each { |k| f.write(sprintf("%010d 00000 n\n", pointers[k])) }
+ }
+ end
+
def make_stream(&block)
s = Stream.new
s.instance_exec(&block) if block
@@ -353,72 +337,72 @@
class Obj
ATTRIBUTES =
{ type: 'Type', contents: 'Contents', pagenum: 'pdftk_PageNum' }
- def self.extract(doc, sca)
+ def self.extract(doc)
+ sca = doc.scanner
+
re = sca.matched[0..-4].strip
st = sca.pos - sca.matched.length
i = sca.skip_until(/endobj/); return nil unless i
en = sca.pos - 1
- atts = {}
- ATTRIBUTES.each do |k, v|
- sca.pos = st
- i = sca.skip_until(/\/#{v}\b/); next unless i
- next if sca.pos > en
- atts[k] = sca.scan(/ *\/?[^\n\r\/>]+/).strip
- end
-
- sca.pos = en
-
- Podoff::Obj.new(doc, re, st, en, atts)
+ Podoff::Obj.new(doc, re, start_index: st, end_index: en)
end
attr_reader :document
attr_reader :ref
attr_reader :start_index, :end_index
+ attr_reader :stream
attr_reader :attributes
- def initialize(doc, ref, st, en, atts, source=nil)
+ def initialize(doc, ref, opts={})
@document = doc
@ref = ref
- @start_index = st
- @end_index = en
- @attributes = atts
- @source = source
- recompute_attributes if @source.is_a?(String)
- @source.obj = self if @source.is_a?(Podoff::Stream)
- end
+ @start_index = opts[:start_index]
+ @end_index = opts[:end_index]
+ @attributes = nil
+ @source = opts[:source]
- def dup(new_doc)
+ @stream = opts[:stream]
+ @stream.obj = self if @stream
- self.class.new(new_doc, ref, start_index, end_index, attributes.dup)
+ recompute_attributes
+ #@source.obj = self if @source.is_a?(Podoff::Stream)
+
+ @document.scanner.pos = @end_index if @document.scanner && @end_index
end
- def self.create(doc, ref, source)
+ def dup(new_doc)
- self.new(doc, ref, nil, nil, nil, source)
+ self.class.new(
+ new_doc, ref,
+ start_index: start_index, end_index: end_index)
end
+ #def self.create(doc, ref, source)
+ # self.new(doc, ref, nil, nil, nil, source)
+ #end
+
def replicate
- self.class.create(document, ref, source.dup)
+ self.class.new(document, ref, source: source.dup)
end
def to_a
[ @ref, @start_index, @end_index, @attributes ]
end
def source
- @source || @document.source[@start_index..@end_index]
+ @source || (@start_index && @document.source[@start_index..@end_index])
end
def replica?
@source != nil
@@ -461,18 +445,33 @@
add_to_attribute(:contents, re)
end
alias :insert_content :insert_contents
+ def to_s
+
+ source || stream.to_s
+ end
+
protected
def recompute_attributes
+ st, en, sca =
+ if @start_index
+ [ @start_index, @end_index, @document.scanner ]
+ elsif @source
+ [ 0, @source.length, ::StringScanner.new(@source) ]
+ end
+
+ return unless sca
+
@attributes =
ATTRIBUTES.inject({}) do |h, (k, v)|
- m = @source.match(/\/#{v}\s+(\/?[^\/\n<>]+)/)
- h[k] = m[1].strip if m
+ sca.pos = st
+ i = sca.skip_until(/\/#{v}\b/)
+ h[k] = sca.scan(/ *\/?[^\n\r\/>]+/).strip if i && sca.pos < en
h
end
end
def concat(refs, ref)
@@ -502,12 +501,13 @@
class Stream
attr_accessor :obj
- def initialize
+ def initialize(obj=nil)
+ @obj = obj
@font = nil
@content = StringIO.new
end
def tf(font_name, font_size)
@@ -533,10 +533,19 @@
@content.write(text)
end
def to_s
- @content.string
+ s = @content.string
+ f = ''
+ if s.length > 98
+ f = ' /Filter /FlateDecode'
+ s = Zlib::Deflate.deflate(s)
+ end
+
+ "#{obj.ref} obj\n" +
+ "<</Length #{s.size}#{f}>>\nstream\n#{s}\nendstream\n" +
+ "endobj"
end
protected
def escape(s)