lib/podoff.rb in podoff-1.0.0 vs lib/podoff.rb in podoff-1.1.0
- old
+ new
@@ -21,14 +21,17 @@
# THE SOFTWARE.
#
# Made in Japan.
#++
+require 'strscan'
+require 'stringio'
+
module Podoff
- VERSION = '1.0.0'
+ VERSION = '1.1.0'
def self.load(path, encoding='iso-8859-1')
Podoff::Document.load(path, encoding)
end
@@ -36,17 +39,10 @@
def self.parse(s)
Podoff::Document.new(s)
end
- #OBJ_ATTRIBUTES =
- # { type: 'Type', subtype: 'Subtype',
- # parent: 'Parent', kids: 'Kids', contents: 'Contents', annots: 'Annots',
- # pagenum: 'pdftk_PageNum' }
- OBJ_ATTRIBUTES =
- { type: 'Type', contents: 'Contents', pagenum: 'pdftk_PageNum' }
-
class Document
def self.load(path, encoding='iso-8859-1')
Podoff::Document.new(File.open(path, 'r:' + encoding) { |f| f.read })
@@ -56,73 +52,69 @@
Podoff::Document.new(s)
end
attr_reader :source
+ attr_reader :version
attr_reader :xref
attr_reader :objs
attr_reader :obj_counters
attr_reader :root
#
attr_reader :additions
def initialize(s)
fail ArgumentError.new('not a PDF file') \
- unless s.match(/\A%PDF-\d+\.\d+\n/)
+ unless s.match(/\A%PDF-\d+\.\d+\s/)
@source = s
+ @version = nil
@xref = nil
@objs = {}
@obj_counters = {}
@root = nil
@additions = {}
- index = 0
- matches = {}
- #
+ sca = ::StringScanner.new(s)
+ @version = sca.scan(/%PDF-\d+\.\d+/)
+
loop do
- matches[:obj] ||= s.match(/^(\d+ \d+) obj\b/, index)
- matches[:endobj] ||= s.match(/\bendobj\b/, index)
- #
- OBJ_ATTRIBUTES.each do |k, v|
- matches[k] ||= s.match(/\/#{v} (\/?[^\/\n<>]+)/, index)
- end
- #
- matches[:startxref] ||= s.match(/\bstartxref\s+(\d+)\s*%%EOF/, index)
+ i = sca.skip_until(
+ /(startxref\s+\d+|\d+\s+\d+\s+obj|\/Root\s+\d+\s+\d+\s+R)/)
- objm = matches[:obj]
- sxrm = matches[:startxref]
+ m = sca.matched
+ break unless m
- break unless sxrm || objm
-
- fail ArgumentError.new('failed to find "startxref"') unless sxrm
-
- @root = nil if @root && index > @root.offset(0).last
- @root ||= s.match(/\/Root (\d+ \d+) R\b/, index)
-
- sxri = sxrm.offset(0).first
- obji = objm ? objm.offset(0).first : sxri + 1
-
- if obji < sxri
- obj = Podoff::Obj.extract(self, matches)
+ if m[0] == 's'
+ @xref = m.split(' ').last.to_i
+ elsif m[0] == '/'
+ @root = extract_ref(m)
+ else
+ obj = Podoff::Obj.extract(self, sca)
@objs[obj.ref] = obj
@obj_counters[obj.ref] = (@obj_counters[obj.ref] || 0) + 1
- index = obj.end_index + 1
- else
- @xref = sxrm[1].to_i
- index = sxrm.offset(0).last + 1
- matches.delete(:startxref)
end
end
- fail ArgumentError.new('found no /Root') unless @root
- @root = @root[1]
+ if @root == nil
+ sca.pos = 0
+ loop do
+ i = sca.skip_until(/\/Root\s+\d+\s+\d+\s+R/)
+ break unless sca.matched
+ @root = extract_ref(sca.matched)
+ end
+ end
end
+ def extract_ref(s)
+
+ s.gsub(/\s+/, ' ').gsub(/[^0-9 ]+/, '').strip
+ end
+
def updated?
@additions.any?
end
@@ -230,11 +222,16 @@
add(obj)
end
def write(path)
- f = (path == :string) ? StringIO.new : File.open(path, 'wb')
+ f =
+ case path
+ when :string, '-' then StringIO.new
+ when String then File.open(path, 'wb')
+ else path
+ end
f.write(@source)
if @additions.any?
@@ -272,15 +269,78 @@
f.write(">>\n")
f.write("startxref #{xref}\n")
f.write("%%EOF\n")
end
- f.close
+ f.close if path.is_a?(String) || path.is_a?(Symbol)
- path == :string ? f.string : nil
+ f.is_a?(StringIO) ? f.string : nil
end
+ def rewrite(path=:string)
+
+ f =
+ case path
+ when :string, '-' then StringIO.new
+ when String then File.open(path, 'wb')
+ else path
+ end
+
+ v = source.match(/%PDF-\d+\.\d+/)[0]
+ f.write(v)
+ f.write("\n")
+
+ ptrs = {}
+
+ objs.keys.sort.each do |k|
+ ptrs[k] = f.pos + 1
+ f.write(objs[k].source)
+ f.write("\n")
+ end
+
+ xref = f.pos + 1
+ max = objs.keys.inject(-1) { |i, k| [ i, k.split(' ')[0].to_i ].max }
+
+ #f.write("xref\n0 #{max}\n0000000000 65535 f\n")
+ f.write("xref\n0 1\n0000000000 65535 f\n")
+
+ partitions = [ [] ]
+ #
+ (1..max).each do |i|
+ k = "#{i} 0"
+ last = partitions.last
+ if ptrs.has_key?(k)
+ last << i
+ else
+ partitions << [] unless last == []
+ end
+ end
+ #
+ partitions.each do |part|
+
+ f.write("#{part.first} #{part.size}\n")
+
+ part.each do |i|
+ k = "#{i} 0"
+ #f.write(sprintf("%010d 00000 n %% %s\n", ptrs[k], k))
+ f.write(sprintf("%010d 00000 n\n", ptrs[k]))
+ end
+ end
+
+ f.write("trailer\n")
+ f.write("<<\n")
+ f.write("/Size #{objs.size}\n")
+ f.write("/Root #{root} R\n")
+ f.write(">>\n")
+ f.write("startxref #{xref}\n")
+ f.write("%%EOF\n")
+
+ f.close if path.is_a?(String) || path.is_a?(Symbol)
+
+ f.is_a?(StringIO) ? f.string : nil
+ end
+
private
def make_stream(&block)
s = Stream.new
@@ -290,28 +350,30 @@
end
end
class Obj
- def self.extract(doc, matches)
+ ATTRIBUTES =
+ { type: 'Type', contents: 'Contents', pagenum: 'pdftk_PageNum' }
- re = matches[:obj][1]
- st = matches[:obj].offset(0).first
- en = matches[:endobj].offset(0).last - 1
+ def self.extract(doc, sca)
- atts = {}
+ re = sca.matched[0..-4].strip
+ st = sca.pos - sca.matched.length
- OBJ_ATTRIBUTES.keys.each do |k|
- m = matches[k]
- if m && m.offset(0).last < en
- atts[k] = m[1].strip
- matches.delete(k)
- end
+ i = sca.skip_until(/endobj/); return nil unless i
+ en = sca.pos - 1
+
+ atts = {}
+ ATTRIBUTES.each do |k, v|
+ sca.pos = st
+ i = sca.skip_until(/\/#{v}\b/); next unless i
+ next if sca.pos > en
+ atts[k] = sca.scan(/ *\/?[^\n\r\/>]+/).strip
end
- matches.delete(:obj)
- matches.delete(:endobj)
+ sca.pos = en
Podoff::Obj.new(doc, re, st, en, atts)
end
attr_reader :document
@@ -371,66 +433,10 @@
r = @attributes && @attributes[:pagenum]
r ? r.to_i : nil
end
-# def parent
-#
-# r = @attributes[:parent]
-# r ? r[0..-2].strip : nil
-# end
-#
-# def kids
-#
-# r = @attributes[:kids]
-# (r || '').split(/[\[\]R]/).collect(&:strip).reject(&:empty?)
-# end
-#
-# def contents
-#
-# r = @attributes[:contents]
-# (r || '').split(/[\[\]R]/).collect(&:strip).reject(&:empty?)
-# end
-
-# def add_annotation(ref)
-#
-# if annots = @attributes[:annots]
-# fail "implement me!"
-# else
-# i = @source.index('/Type ')
-# @source.insert(i, "/Annots [#{ref} R]\n")
-# end
-# recompute_attributes
-# end
-
-# def add_free_text(x, y, text, font, size)
-#
-# fail ArgumentError.new('target is not a page') unless type == '/Page'
-#
-# nref = document.new_ref
-#
-# s = [
-# "#{nref} obj <<",
-# "/Type /Annot",
-# "/Subtype /FreeText",
-# "/Da (/F1 70 Tf 0 100 Td)",
-# "/Rect [0 0 500 600]",
-# "/Contents (#{text})",
-# ">>",
-# "endobj"
-# ].join("\n")
-# anno = Obj.create(document, nref, s)
-#
-# page = self.replicate
-# page.add_annotation(nref)
-#
-# document.add(anno)
-# document.add(page)
-#
-# anno
-# end
-
def insert_font(nick, obj_or_ref)
fail ArgumentError.new("target '#{ref}' not a replica") \
unless @source
@@ -460,13 +466,13 @@
protected
def recompute_attributes
@attributes =
- OBJ_ATTRIBUTES.inject({}) do |h, (k, v)|
- m = @source.match(/\/#{v} (\/?[^\/\n<>]+)/)
- h[k] = m[1] if m
+ ATTRIBUTES.inject({}) do |h, (k, v)|
+ m = @source.match(/\/#{v}\s+(\/?[^\/\n<>]+)/)
+ h[k] = m[1].strip if m
h
end
end
def concat(refs, ref)
@@ -479,11 +485,11 @@
def add_to_attribute(key, ref)
fail ArgumentError.new("obj not replicated") unless @source
- pkey = OBJ_ATTRIBUTES[key]
+ pkey = ATTRIBUTES[key]
if v = @attributes[key]
v = concat(v, ref)
@source = @source.gsub(/#{pkey} ([\[\]0-9 R]+)/, "#{pkey} #{v}")
else
@@ -501,13 +507,9 @@
def initialize
@font = nil
@content = StringIO.new
end
-
- #def document; obj.document; end
- #def ref; obj.ref; end
- #def source; self; end
def tf(font_name, font_size)
n = font_name[0] == '/' ? font_name[1..-1] : font_name