#!/usr/bin/env ruby
=begin
= Info
Convert a PDF document to an Origami script.
Experimental.
= License:
Origami is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Origami is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with Origami. If not, see .
= Author
Guillaume Delugré
=end
require 'optparse'
require 'fileutils'
begin
ORIGAMIDIR = "#{File.dirname(__FILE__)}/../lib"
require 'origami'
rescue LoadError
$: << ORIGAMIDIR
require 'origami'
end
include Origami
@var_hash = {}
@code_hash = {}
@obj_route = []
@current_idx = nil
class OptParser
def self.parse(args)
options = {}
options[:verbose] =
options[:xstreams] = false
opts = OptionParser.new do |opts|
opts.banner = <
Convert a PDF document to an Origami script (experimental).
Options:
BANNER
opts.on("-v", "--verbose", "Verbose mode") do
options[:verbose] = true
end
opts.on("-x", "--extract-streams", "Extract PDF streams to separate files") do
options[:xstreams] = true
end
opts.on_tail("-h", "--help", "Show this message") do
puts opts
exit
end
end
opts.parse!(args)
options
end
end
@options = OptParser.parse(ARGV)
if ARGV.empty?
STDERR.puts "Error: No filename was specified. #{$0} --help for details."
exit 1
else
TARGET = ARGV.shift
end
Origami::OPTIONS[:enable_type_guessing] = Origami::OPTIONS[:enable_type_propagation] = true
TARGET_DIR = File.basename(TARGET, '.pdf')
TARGET_FILE = "#{TARGET_DIR}/#{TARGET_DIR}.rb"
STREAM_DIR = "streams"
def objectToRuby(obj, inclevel = 0, internalname = nil, do_convert = false)
code = ""
code <<
case obj
when Origami::Null
"Null.new"
when Origami::Boolean, Origami::Number
obj.value.to_s
when Origami::String
"'#{obj.value.gsub("'","\\\\'")}'"
when Origami::Dictionary
customclass = nil
if obj.class != Origami::Dictionary
p = (obj.class == Origami::Encoding) ? 0 : 1
customclass = obj.class.to_s.split('::')[p..-1].join('::') # strip Origami prefix if there is no collision
end
dictionaryToRuby(obj, inclevel, internalname, customclass)
when Origami::Array
arrayToRuby(obj, inclevel, internalname)
when Origami::Stream
streamToRuby(obj, internalname)
when Origami::Name
nameToRuby(obj)
when Origami::Reference
referenceToRuby(obj, internalname)
else
raise RuntimeError, "Unknown object type: #{obj.class}"
end
case obj
when Origami::String, Origami::Dictionary, Origami::Array, Origami::Name
code << ".to_o" if do_convert
end
code
end
def referenceToRuby(ref, internalname)
varname = @var_hash[ref]
if varname.nil?
"nil"
elsif @obj_route[0..@current_idx].include?(varname)
@code_hash[varname] ||= {}
@code_hash[varname][:afterDecl] ||= []
@code_hash[varname][:afterDecl] << "#{internalname} = #{varname}"#.to_o.set_indirect(true)"
"nil"
else
@obj_route.push(varname) unless @obj_route.include?(varname)
varname
end
end
def nameToRuby(name)
code = ':'
valid = (name.value.to_s =~ /[+.:-]/).nil?
code << '"' unless valid
code << name.value.to_s
code << '"' unless valid
code
end
def arrayToRuby(arr, inclevel, internalname)
i = 0
code = "\n" + " " * inclevel + "["
arr.each do |obj|
subintname = "#{internalname}[#{i}]"
code << "#{objectToRuby(obj, inclevel + 1, subintname)}"
code << ", " unless i == arr.length - 1
i = i + 1
end
code << "]"
code
end
def dictionaryToRuby(dict, inclevel, internalname, customtype = nil)
i = 0
code = "\n" + " " * inclevel
if customtype
code << "#{customtype}.new(#{dictionaryToHashMap(dict, inclevel, internalname)}"
code << " " * inclevel + ")"
else
code << "{\n"
dict.each_pair do |key, val|
rubyname = nameToRuby(key)
subintname = "#{internalname}[#{rubyname}]"
if val.is_a?(Origami::Reference) and @var_hash[val] and @var_hash[val][0,3] == "obj"
oldname = @var_hash[val]
newname = (key.value.to_s.downcase + "_" + @var_hash[val][4..-1]).gsub('.','_')
if not @obj_route.include?(oldname)
@var_hash[val] = newname
@code_hash[newname] = @code_hash[oldname]
@code_hash.delete(oldname)
end
end
code << " " * (inclevel + 1) +
"#{rubyname} => #{objectToRuby(val, inclevel + 2, subintname)}"
code << ", " unless i == dict.length - 1
i = i + 1
code << "\n"
end
code << " " * inclevel + "}"
end
code
end
def dictionaryToHashMap(dict, inclevel, internalname)
i = 0
code = "\n"
dict.each_pair do |key, val|
rubyname = nameToRuby(key)
subintname = "#{internalname}[#{rubyname}]"
if val.is_a?(Origami::Reference) and @var_hash[val] and @var_hash[val][0,3] == "obj"
oldname = @var_hash[val]
newname = (key.value.to_s.downcase + "_" + @var_hash[val][4..-1]).gsub('.','_')
if not @obj_route.include?(oldname)
@var_hash[val] = newname
@code_hash[newname] = @code_hash[oldname]
@code_hash.delete(oldname)
end
end
code << " " * (inclevel + 1) +
"#{rubyname} => #{objectToRuby(val, inclevel + 2, subintname)}"
code << ", " unless i == dict.length - 1
i = i + 1
code << "\n"
end
code
end
def streamToRuby(stm, internalname)
dict = stm.dictionary.dup.delete_if{|k,v| k == :Length or k == :Filter}
code = "Stream.new("
if @options[:xstreams]
stmdir = "#{TARGET_DIR}/#{STREAM_DIR}"
Dir::mkdir(stmdir) unless File.directory? stmdir
stmfile = "#{stmdir}/stm_#{stm.reference.refno}.data"
File.open(stmfile, "w") do |stmfd|
stmfd.write stm.data
end
code << "File.read('#{STREAM_DIR}/stm_#{stm.reference.refno}.data')"
else
code << stm.data.inspect
end
code << ", #{dictionaryToHashMap(dict, 1, internalname)}" unless dict.empty?
code << ")"
if stm.dictionary.has_key? :Filter
code << ".setFilter(#{objectToRuby(stm.Filter, 1, internalname)})"
end
code
end
Console.colorprint "[*] ", Console::Colors::RED
puts "Loading document '#{TARGET}'"
verbosity = @options[:verbose] ? Parser::VERBOSE_INSANE : Parser::VERBOSE_QUIET
target = PDF.read(TARGET, :verbosity => verbosity)
Console.colorprint "[*] ", Console::Colors::RED
puts "Document successfully loaded into Origami"
Dir::mkdir(TARGET_DIR) unless File.directory? TARGET_DIR
fd = File.open(TARGET_FILE, 'w', 0700)
DOCREF = "pdf"
ORIGAMI_PATH = ORIGAMIDIR[0,1] == '/' ?
ORIGAMIDIR :
"../#{ORIGAMIDIR}"
fd.puts <