lib/podoff.rb in podoff-0.9.0 vs lib/podoff.rb in podoff-1.0.0
- old
+ new
@@ -24,253 +24,524 @@
#++
module Podoff
- VERSION = '0.9.0'
+ VERSION = '1.0.0'
- def self.load(path)
+ def self.load(path, encoding='iso-8859-1')
- Podoff::Document.new(
- File.open(path, 'r:iso8859-1') { |f| f.read })
+ Podoff::Document.load(path, encoding)
end
+ def self.parse(s)
+
+ Podoff::Document.new(s)
+ end
+
+ #OBJ_ATTRIBUTES =
+ # { type: 'Type', subtype: 'Subtype',
+ # parent: 'Parent', kids: 'Kids', contents: 'Contents', annots: 'Annots',
+ # pagenum: 'pdftk_PageNum' }
+ OBJ_ATTRIBUTES =
+ { type: 'Type', contents: 'Contents', pagenum: 'pdftk_PageNum' }
+
class Document
- attr_reader :header
+ def self.load(path, encoding='iso-8859-1')
+
+ Podoff::Document.new(File.open(path, 'r:' + encoding) { |f| f.read })
+ end
+
+ def self.parse(s)
+
+ Podoff::Document.new(s)
+ end
+
+ attr_reader :source
+ attr_reader :xref
attr_reader :objs
- attr_reader :footer
+ attr_reader :obj_counters
+ attr_reader :root
+ #
+ attr_reader :additions
def initialize(s)
fail ArgumentError.new('not a PDF file') \
unless s.match(/\A%PDF-\d+\.\d+\n/)
- @header = []
- #
+ @source = s
+ @xref = nil
@objs = {}
- cur = nil
+ @obj_counters = {}
+ @root = nil
+
+ @additions = {}
+
+ index = 0
+ matches = {}
#
- @footer = nil
+ loop do
- s.split("\n").each do |l|
+ matches[:obj] ||= s.match(/^(\d+ \d+) obj\b/, index)
+ matches[:endobj] ||= s.match(/\bendobj\b/, index)
+ #
+ OBJ_ATTRIBUTES.each do |k, v|
+ matches[k] ||= s.match(/\/#{v} (\/?[^\/\n<>]+)/, index)
+ end
+ #
+ matches[:startxref] ||= s.match(/\bstartxref\s+(\d+)\s*%%EOF/, index)
- if @footer
- @footer << l
- elsif m = /^(\d+ \d+) obj\b/.match(l)
- cur = (@objs[m[1]] = Obj.new(self, m[1]))
- cur << l
- elsif m = /^xref\b/.match(l)
- @footer = []
- @footer << l
- elsif cur
- cur << l
+ objm = matches[:obj]
+ sxrm = matches[:startxref]
+
+ break unless sxrm || objm
+
+ fail ArgumentError.new('failed to find "startxref"') unless sxrm
+
+ @root = nil if @root && index > @root.offset(0).last
+ @root ||= s.match(/\/Root (\d+ \d+) R\b/, index)
+
+ sxri = sxrm.offset(0).first
+ obji = objm ? objm.offset(0).first : sxri + 1
+
+ if obji < sxri
+ obj = Podoff::Obj.extract(self, matches)
+ @objs[obj.ref] = obj
+ @obj_counters[obj.ref] = (@obj_counters[obj.ref] || 0) + 1
+ index = obj.end_index + 1
else
- @header << l
+ @xref = sxrm[1].to_i
+ index = sxrm.offset(0).last + 1
+ matches.delete(:startxref)
end
end
+
+ fail ArgumentError.new('found no /Root') unless @root
+ @root = @root[1]
end
- def fonts; @objs.values.select(&:is_font?); end
- def pages; @objs.values.select(&:is_page?); end
+ def updated?
- def page(i)
-
- i < 1 ? nil : @objs.values.find { |o| o.page_number == i }
+ @additions.any?
end
def dup
- d0 = self
+ o = self
- d = d0.class.allocate
+ self.class.allocate.instance_eval do
- d.instance_eval do
- @header = d0.header.dup
- @footer = d0.footer.dup
- @objs = d0.objs.values.inject({}) { |h, v| h[v.ref] = v.dup(d); h }
+ @source = o.source
+ @xref = o.xref
+
+ @objs = o.objs.inject({}) { |h, (k, v)| h[k] = v.dup(self); h }
+ @obj_counters = o.obj_counters.dup
+
+ @root = o.root
+
+ @additions =
+ o.additions.inject({}) { |h, (k, v)| h[k] = v.dup(self); h }
+
+ self
end
+ end
- d
+ def pages
+
+ @objs.values.select { |o| o.type == '/Page' }
end
+ def page(index)
+
+ return nil if index == 0
+
+ pas = pages
+ return nil if pas.empty?
+
+ return (
+ index > 0 ? pas.at(index - 1) : pas.at(index)
+ ) unless pas.first.attributes[:pagenum]
+
+ if index < 0
+ max = pas.inject(0) { |n, pa| [ n, pa.page_number ].max }
+ index = max + 1 + index
+ end
+
+ pas.find { |pa| pa.page_number == index }
+ end
+
+ def new_ref
+
+ "#{
+ @objs.keys.inject(-1) { |i, r| [ i, r.split(' ').first.to_i ].max } + 1
+ } 0"
+ end
+
+ def add(obj)
+
+ @objs[obj.ref] = obj
+ @additions[obj.ref] = obj
+
+ obj
+ end
+
+ def add_base_font(name)
+
+ name = name[1..-1] if name[0] == '/'
+
+ ref = new_ref
+
+ add(
+ Obj.create(
+ self,
+ ref,
+ [
+ "#{ref} obj",
+ "<< /Type /Font /Subtype /Type1 /BaseFont /#{name} >>",
+ "endobj"
+ ].join(' ')))
+ end
+
+ def add_stream(s=nil, &block)
+
+ ref = new_ref
+
+ s = s || make_stream(&block)
+
+ s = [
+ "#{ref} obj",
+ "<< /Length #{s.length} >>",
+ "stream\n#{s}\nendstream",
+ "endobj"
+ ].join("\n") if s.is_a?(String)
+
+ o = add(Obj.create(self, ref, s))
+
+ s.is_a?(Podoff::Stream) ? s : o
+ end
+
+ def re_add(obj_or_ref)
+
+ obj = obj_or_ref.is_a?(String) ? @objs[obj_or_ref] : obj_or_ref
+
+ obj = obj.replicate unless obj.replica?
+
+ add(obj)
+ end
+
def write(path)
- File.open(path, 'wb') do |f|
+ f = (path == :string) ? StringIO.new : File.open(path, 'wb')
- @header.each { |l| f.print(l); f.print("\n") }
+ f.write(@source)
- @objs.values.each do |o|
- o.lines.each { |l| f.print(l); f.print("\n") }
+ if @additions.any?
+
+ pointers = {}
+
+ @additions.values.each do |o|
+ f.write("\n")
+ pointers[o.ref] = f.pos + 1
+ if o.source.is_a?(String)
+ f.write(o.source)
+ else # Stream
+ s = o.source.to_s
+ f.write("#{o.ref} obj\n<< /Length #{s.length} >>\n")
+ f.write("stream\n#{s}\nendstream\nendobj")
+ end
end
+ f.write("\n\n")
- @footer.each { |l| f.print(l); f.print("\n") }
+ xref = f.pos + 1
+
+ f.write("xref\n")
+ f.write("0 1\n")
+ f.write("0000000000 65535 f\n")
+
+ pointers.each do |k, v|
+ f.write("#{k.split(' ').first} 1\n")
+ f.write(sprintf("%010d 00000 n\n", v))
+ end
+
+ f.write("trailer\n")
+ f.write("<<\n")
+ f.write("/Prev #{self.xref}\n")
+ f.write("/Size #{objs.size}\n")
+ f.write("/Root #{root} R\n")
+ f.write(">>\n")
+ f.write("startxref #{xref}\n")
+ f.write("%%EOF\n")
end
+
+ f.close
+
+ path == :string ? f.string : nil
end
+
+ private
+
+ def make_stream(&block)
+
+ s = Stream.new
+ s.instance_exec(&block) if block
+
+ s
+ end
end
class Obj
+ def self.extract(doc, matches)
+
+ re = matches[:obj][1]
+ st = matches[:obj].offset(0).first
+ en = matches[:endobj].offset(0).last - 1
+
+ atts = {}
+
+ OBJ_ATTRIBUTES.keys.each do |k|
+ m = matches[k]
+ if m && m.offset(0).last < en
+ atts[k] = m[1].strip
+ matches.delete(k)
+ end
+ end
+
+ matches.delete(:obj)
+ matches.delete(:endobj)
+
+ Podoff::Obj.new(doc, re, st, en, atts)
+ end
+
attr_reader :document
attr_reader :ref
- attr_reader :lines
+ attr_reader :start_index, :end_index
+ attr_reader :attributes
- def initialize(doc, ref)
+ def initialize(doc, ref, st, en, atts, source=nil)
@document = doc
@ref = ref
- @lines = []
+ @start_index = st
+ @end_index = en
+ @attributes = atts
+ @source = source
+
+ recompute_attributes if @source.is_a?(String)
+ @source.obj = self if @source.is_a?(Podoff::Stream)
end
- def <<(l)
+ def dup(new_doc)
- @lines << l
+ self.class.new(new_doc, ref, start_index, end_index, attributes.dup)
end
- def lookup(k)
+ def self.create(doc, ref, source)
- @lines.each do |l|
+ self.new(doc, ref, nil, nil, nil, source)
+ end
- m = l.match(/^\/#{k} (.*)$/)
- return m[1] if m
- end
+ def replicate
- nil
+ self.class.create(document, ref, source.dup)
end
- def index(o, start=0)
+ def to_a
- @lines[start..-1].each_with_index do |l, i|
+ [ @ref, @start_index, @end_index, @attributes ]
+ end
- if o.is_a?(String)
- return start + i if l == o
- else
- return start + i if l.match(o)
- end
- end
+ def source
- nil
+ @source || @document.source[@start_index..@end_index]
end
+ def replica?
+
+ @source != nil
+ end
+
def type
- t = lookup('Type')
- t ? t[1..-1] : nil
+ @attributes && @attributes[:type]
end
def page_number
- r = lookup('pdftk_PageNum')
+ r = @attributes && @attributes[:pagenum]
r ? r.to_i : nil
end
- def is_page?
+# def parent
+#
+# r = @attributes[:parent]
+# r ? r[0..-2].strip : nil
+# end
+#
+# def kids
+#
+# r = @attributes[:kids]
+# (r || '').split(/[\[\]R]/).collect(&:strip).reject(&:empty?)
+# end
+#
+# def contents
+#
+# r = @attributes[:contents]
+# (r || '').split(/[\[\]R]/).collect(&:strip).reject(&:empty?)
+# end
- page_number != nil
- end
+# def add_annotation(ref)
+#
+# if annots = @attributes[:annots]
+# fail "implement me!"
+# else
+# i = @source.index('/Type ')
+# @source.insert(i, "/Annots [#{ref} R]\n")
+# end
+# recompute_attributes
+# end
- def is_font?
+# def add_free_text(x, y, text, font, size)
+#
+# fail ArgumentError.new('target is not a page') unless type == '/Page'
+#
+# nref = document.new_ref
+#
+# s = [
+# "#{nref} obj <<",
+# "/Type /Annot",
+# "/Subtype /FreeText",
+# "/Da (/F1 70 Tf 0 100 Td)",
+# "/Rect [0 0 500 600]",
+# "/Contents (#{text})",
+# ">>",
+# "endobj"
+# ].join("\n")
+# anno = Obj.create(document, nref, s)
+#
+# page = self.replicate
+# page.add_annotation(nref)
+#
+# document.add(anno)
+# document.add(page)
+#
+# anno
+# end
- type() == 'Font'
- end
+ def insert_font(nick, obj_or_ref)
- def parent
+ fail ArgumentError.new("target '#{ref}' not a replica") \
+ unless @source
- # /Parent 2 0 R
+ nick = nick[1..-1] if nick[0] == '/'
- r = lookup('Parent')
+ re = obj_or_ref
+ re = re.ref if re.respond_to?(:ref)
- r ? r[0..-2].strip : nil
+ @source = @source.gsub(/\/Font\s*<</, "/Font\n<<\n/#{nick} #{re} R")
end
- def kids
+ def insert_contents(obj_or_ref)
- # /Kids [1 0 R 16 0 R 33 0 R]
+ fail ArgumentError.new("target '#{ref}' not a replica") \
+ unless @source
+ fail ArgumentError.new("target '#{ref}' doesn't have /Contents") \
+ unless @attributes[:contents]
- r = lookup('Kids')
- (r || '').split(/[\[\]R]/).collect(&:strip).reject(&:empty?)
- end
+ re = obj_or_ref
+ re = re.obj if re.respond_to?(:obj) # Stream
+ re = re.ref if re.respond_to?(:ref)
- def contents
-
- r = lookup('Contents')
- r ? r[0..-2].strip : nil
+ add_to_attribute(:contents, re)
end
+ alias :insert_content :insert_contents
- def font_names
+ protected
- @lines.inject(nil) do |names, l|
+ def recompute_attributes
- if names
- return names if l == '>>'
- if m = l.match(/\/([^ ]+) /); names << m[1]; end
- elsif l.match(/\/Font\s*$/)
- names = []
+ @attributes =
+ OBJ_ATTRIBUTES.inject({}) do |h, (k, v)|
+ m = @source.match(/\/#{v} (\/?[^\/\n<>]+)/)
+ h[k] = m[1] if m
+ h
end
+ end
- names
- end
+ def concat(refs, ref)
- []
+ refs = refs.strip
+ refs = refs[1..-2] if refs[0] == '['
+
+ "[#{refs} #{ref} R]"
end
- def dup(new_doc)
+ def add_to_attribute(key, ref)
- o0 = self
- o = o0.class.new(new_doc, @ref)
- o.instance_eval { @lines = o0.lines.dup }
+ fail ArgumentError.new("obj not replicated") unless @source
- o
+ pkey = OBJ_ATTRIBUTES[key]
+
+ if v = @attributes[key]
+ v = concat(v, ref)
+ @source = @source.gsub(/#{pkey} ([\[\]0-9 R]+)/, "#{pkey} #{v}")
+ else
+ i = @source.index('/Type ')
+ @source.insert(i, "/#{pkey} [#{ref} R]\n")
+ end
+ recompute_attributes
end
+ end
- def find(opts={}, &block)
+ class Stream
- return self if block.call(self)
+ attr_accessor :obj
- [ *kids, contents ].compact.each do |k|
- o = @document.objs[k]
- return o if o && block.call(o)
- end
+ def initialize
- nil
+ @font = nil
+ @content = StringIO.new
end
- def crop_box
+ #def document; obj.document; end
+ #def ref; obj.ref; end
+ #def source; self; end
- r = lookup('CropBox') || lookup('MediaBox')
+ def tf(font_name, font_size)
- r ? r.strip[1..-2].split(' ').collect(&:strip).collect(&:to_f) : nil
+ n = font_name[0] == '/' ? font_name[1..-1] : font_name
+
+ @font = "/#{n} #{font_size} Tf "
end
+ alias :font :tf
- def crop_dims
+ def bt(x, y, text)
- x, y, w, h = crop_box
-
- x ? [ w - x, h - y ] : nil
+ @content.write "\n" if @content.size > 0
+ @content.write "BT "
+ @content.write @font if @font
+ @content.write "#{x} #{y} Td (#{escape(text)}) Tj"
+ @content.write " ET"
end
+ alias :text :bt
- def prepend_text(x, y, text, opts={})
+ def write(text)
- o = find { |o| o.index('BT') }
- fail ArgumentError.new('found no BT in the tree') unless o
+ @content.write(text)
+ end
- font = opts[:font] || o.font_names.first || 'TT0'
- size = opts[:size] || 10
- comm = opts[:comment]
+ def to_s
- i = o.index('BT')
- bt = []
- bt << 'BT'
- bt << "#{x} #{y} Td"
- bt << "/#{font} #{size} Tf"
- bt << "(#{text})Tj"
- bt << 'ET'
- bt << " % #{comm}" if comm
- bt = bt.join(' ')
+ @content.string
+ end
- o.lines.insert(i, bt)
+ protected
- o
+ def escape(s)
+
+ s.gsub(/\(/, '\(').gsub(/\)/, '\)')
end
end
end