lib/podoff.rb in podoff-0.0.1 vs lib/podoff.rb in podoff-0.9.0

- old
+ new

@@ -24,8 +24,253 @@ #++ module Podoff - VERSION = '0.0.1' + VERSION = '0.9.0' + + def self.load(path) + + Podoff::Document.new( + File.open(path, 'r:iso8859-1') { |f| f.read }) + end + + class Document + + attr_reader :header + attr_reader :objs + attr_reader :footer + + def initialize(s) + + fail ArgumentError.new('not a PDF file') \ + unless s.match(/\A%PDF-\d+\.\d+\n/) + + @header = [] + # + @objs = {} + cur = nil + # + @footer = nil + + s.split("\n").each do |l| + + if @footer + @footer << l + elsif m = /^(\d+ \d+) obj\b/.match(l) + cur = (@objs[m[1]] = Obj.new(self, m[1])) + cur << l + elsif m = /^xref\b/.match(l) + @footer = [] + @footer << l + elsif cur + cur << l + else + @header << l + end + end + end + + def fonts; @objs.values.select(&:is_font?); end + def pages; @objs.values.select(&:is_page?); end + + def page(i) + + i < 1 ? nil : @objs.values.find { |o| o.page_number == i } + end + + def dup + + d0 = self + + d = d0.class.allocate + + d.instance_eval do + @header = d0.header.dup + @footer = d0.footer.dup + @objs = d0.objs.values.inject({}) { |h, v| h[v.ref] = v.dup(d); h } + end + + d + end + + def write(path) + + File.open(path, 'wb') do |f| + + @header.each { |l| f.print(l); f.print("\n") } + + @objs.values.each do |o| + o.lines.each { |l| f.print(l); f.print("\n") } + end + + @footer.each { |l| f.print(l); f.print("\n") } + end + end + end + + class Obj + + attr_reader :document + attr_reader :ref + attr_reader :lines + + def initialize(doc, ref) + + @document = doc + @ref = ref + @lines = [] + end + + def <<(l) + + @lines << l + end + + def lookup(k) + + @lines.each do |l| + + m = l.match(/^\/#{k} (.*)$/) + return m[1] if m + end + + nil + end + + def index(o, start=0) + + @lines[start..-1].each_with_index do |l, i| + + if o.is_a?(String) + return start + i if l == o + else + return start + i if l.match(o) + end + end + + nil + end + + def type + + t = lookup('Type') + t ? t[1..-1] : nil + end + + def page_number + + r = lookup('pdftk_PageNum') + r ? r.to_i : nil + end + + def is_page? + + page_number != nil + end + + def is_font? + + type() == 'Font' + end + + def parent + + # /Parent 2 0 R + + r = lookup('Parent') + + r ? r[0..-2].strip : nil + end + + def kids + + # /Kids [1 0 R 16 0 R 33 0 R] + + r = lookup('Kids') + (r || '').split(/[\[\]R]/).collect(&:strip).reject(&:empty?) + end + + def contents + + r = lookup('Contents') + r ? r[0..-2].strip : nil + end + + def font_names + + @lines.inject(nil) do |names, l| + + if names + return names if l == '>>' + if m = l.match(/\/([^ ]+) /); names << m[1]; end + elsif l.match(/\/Font\s*$/) + names = [] + end + + names + end + + [] + end + + def dup(new_doc) + + o0 = self + o = o0.class.new(new_doc, @ref) + o.instance_eval { @lines = o0.lines.dup } + + o + end + + def find(opts={}, &block) + + return self if block.call(self) + + [ *kids, contents ].compact.each do |k| + o = @document.objs[k] + return o if o && block.call(o) + end + + nil + end + + def crop_box + + r = lookup('CropBox') || lookup('MediaBox') + + r ? r.strip[1..-2].split(' ').collect(&:strip).collect(&:to_f) : nil + end + + def crop_dims + + x, y, w, h = crop_box + + x ? [ w - x, h - y ] : nil + end + + def prepend_text(x, y, text, opts={}) + + o = find { |o| o.index('BT') } + fail ArgumentError.new('found no BT in the tree') unless o + + font = opts[:font] || o.font_names.first || 'TT0' + size = opts[:size] || 10 + comm = opts[:comment] + + i = o.index('BT') + bt = [] + bt << 'BT' + bt << "#{x} #{y} Td" + bt << "/#{font} #{size} Tf" + bt << "(#{text})Tj" + bt << 'ET' + bt << " % #{comm}" if comm + bt = bt.join(' ') + + o.lines.insert(i, bt) + + o + end + end end