#!/usr/bin/env ruby
=begin
= Info
Convert a PDF document to an Origami script.
Experimental.
= License:
Copyright (C) 2016 Guillaume Delugré.
Origami is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Origami is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with Origami. If not, see .
=end
require 'optparse'
require 'fileutils'
require 'rainbow'
begin
require 'origami'
rescue LoadError
$: << File.join(__dir__, '../lib')
require 'origami'
end
include Origami
@var_hash = {}
@code_hash = {}
@obj_route = []
@current_idx = nil
class OptParser
def self.parse(args)
options = {}
options[:verbose] =
options[:xstreams] = false
parser = OptionParser.new do |opts|
opts.banner = <
Convert a PDF document to an Origami script (experimental).
Options:
BANNER
opts.on("-v", "--verbose", "Verbose mode") do
options[:verbose] = true
end
opts.on("-x", "--extract-streams", "Extract PDF streams to separate files") do
options[:xstreams] = true
end
opts.on_tail("-h", "--help", "Show this message") do
puts opts
exit
end
end
parser.parse!(args)
options
end
end
@options = OptParser.parse(ARGV)
if ARGV.empty?
abort "Error: No filename was specified. #{$0} --help for details."
else
TARGET = ARGV.shift
end
Origami::OPTIONS[:enable_type_guessing] = Origami::OPTIONS[:enable_type_propagation] = true
TARGET_DIR = File.basename(TARGET, '.pdf')
TARGET_FILE = File.join(TARGET_DIR, "#{TARGET_DIR}.rb")
STREAM_DIR = "streams"
def objectToRuby(obj, inclevel = 0, internalname = nil, do_convert = false)
code = ""
code <<
case obj
when Origami::Null
"Null.new"
when Origami::Boolean, Origami::Number, Origami::Name, Origami::String
literalToRuby(obj)
when Origami::Dictionary
customclass = nil
if obj.class != Origami::Dictionary
p = (obj.class == Origami::Encoding) ? 0 : 1
customclass = obj.class.to_s.split('::')[p..-1].join('::') # strip Origami prefix if there is no collision
end
dictionaryToRuby(obj, inclevel, internalname, customclass)
when Origami::Array
arrayToRuby(obj, inclevel, internalname)
when Origami::Stream
streamToRuby(obj, internalname) unless obj.is_a?(ObjectStream) or obj.is_a?(XRefStream)
when Origami::Reference
referenceToRuby(obj, internalname)
else
raise RuntimeError, "Unknown object type: #{obj.class}"
end
case obj
when Origami::String, Origami::Dictionary, Origami::Array, Origami::Name
code << ".to_o" if do_convert
end
code
end
def referenceToRuby(ref, internalname)
varname = @var_hash[ref]
if varname.nil?
"nil"
elsif @obj_route[0..@current_idx].include?(varname)
@code_hash[varname] ||= {}
@code_hash[varname][:afterDecl] ||= []
@code_hash[varname][:afterDecl] << "#{internalname} = #{varname}"#.to_o.set_indirect(true)"
"nil"
else
@obj_route.push(varname) unless @obj_route.include?(varname)
varname
end
end
def literalToRuby(obj)
obj.value.inspect
end
def arrayToRuby(arr, inclevel, internalname)
i = 0
code = "\n" + " " * inclevel + "["
arr.each do |obj|
subintname = "#{internalname}[#{i}]"
code << "#{objectToRuby(obj, inclevel + 1, subintname)}"
code << ", " unless i == arr.length - 1
i = i + 1
end
code << "]"
code
end
def dictionaryToRuby(dict, inclevel, internalname, customtype = nil)
i = 0
code = "\n" + " " * inclevel
if customtype
code << "#{customtype}.new(#{dictionaryToHashMap(dict, inclevel, internalname)}"
code << " " * inclevel + ")"
else
code << "{\n"
dict.each_pair do |key, val|
rubyname = literalToRuby(key)
subintname = "#{internalname}[#{rubyname}]"
if val.is_a?(Origami::Reference) and @var_hash[val] and @var_hash[val][0,3] == "obj"
oldname = @var_hash[val]
newname = (key.value.to_s.downcase.gsub(/[^[[:alnum:]]]/,'_') + "_" + @var_hash[val][4..-1]).tr('.', '_')
if not @obj_route.include?(oldname)
@var_hash[val] = newname
@code_hash[newname] = @code_hash[oldname]
@code_hash.delete(oldname)
end
end
code << " " * (inclevel + 1) +
"#{rubyname} => #{objectToRuby(val, inclevel + 2, subintname)}"
code << ", " unless i == dict.length - 1
i = i + 1
code << "\n"
end
code << " " * inclevel + "}"
end
code
end
def dictionaryToHashMap(dict, inclevel, internalname)
i = 0
code = "\n"
dict.each_pair do |key, val|
rubyname = literalToRuby(key)
subintname = "#{internalname}[#{rubyname}]"
if val.is_a?(Origami::Reference) and @var_hash[val] and @var_hash[val][0,3] == "obj"
oldname = @var_hash[val]
newname = (key.value.to_s.downcase + "_" + @var_hash[val][4..-1]).tr('.', '_')
if not @obj_route.include?(oldname)
@var_hash[val] = newname
@code_hash[newname] = @code_hash[oldname]
@code_hash.delete(oldname)
end
end
code << " " * (inclevel + 1) +
"#{rubyname} => #{objectToRuby(val, inclevel + 2, subintname)}"
code << ", " unless i == dict.length - 1
i = i + 1
code << "\n"
end
code
end
def streamToRuby(stm, internalname)
dict = stm.dictionary.dup.delete_if {|k, _| k == :Length}
code = "Stream.new("
if @options[:xstreams]
stmdir = File.join(TARGET_DIR, STREAM_DIR)
Dir::mkdir(stmdir) unless File.directory? stmdir
stmfile = File.join(stmdir, "stm_#{stm.reference.refno}.data")
File.binwrite(stmfile, stm.data)
code << "File.binread('#{stmfile}')"
else
code << stm.data.inspect << ".b"
end
code << ", #{dictionaryToHashMap(dict, 1, internalname)}" unless dict.empty?
code << ")"
code
end
puts "[*] ".red + "Loading document '#{TARGET}'"
verbosity = @options[:verbose] ? Parser::VERBOSE_TRACE : Parser::VERBOSE_QUIET
target = PDF.read(TARGET, verbosity: verbosity)
puts "[*] ".red + "Document successfully loaded into Origami"
Dir::mkdir(TARGET_DIR) unless File.directory? TARGET_DIR
fd = File.open(TARGET_FILE, 'w', 0700)
DOCREF = "pdf"
fd.puts <