#!/usr/bin/env ruby =begin = Info Convert a PDF document to an Origami script. Experimental. = License: Copyright (C) 2016 Guillaume Delugré. Origami is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Origami is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with Origami. If not, see . =end require 'optparse' require 'fileutils' require 'rainbow' begin require 'origami' rescue LoadError $: << File.join(__dir__, '../lib') require 'origami' end include Origami @var_hash = {} @code_hash = {} @obj_route = [] @current_idx = nil class OptParser def self.parse(args) options = {} options[:verbose] = options[:xstreams] = false parser = OptionParser.new do |opts| opts.banner = < Convert a PDF document to an Origami script (experimental). Options: BANNER opts.on("-v", "--verbose", "Verbose mode") do options[:verbose] = true end opts.on("-x", "--extract-streams", "Extract PDF streams to separate files") do options[:xstreams] = true end opts.on_tail("-h", "--help", "Show this message") do puts opts exit end end parser.parse!(args) options end end @options = OptParser.parse(ARGV) if ARGV.empty? abort "Error: No filename was specified. #{$0} --help for details." else TARGET = ARGV.shift end Origami::OPTIONS[:enable_type_guessing] = Origami::OPTIONS[:enable_type_propagation] = true TARGET_DIR = File.basename(TARGET, '.pdf') TARGET_FILE = File.join(TARGET_DIR, "#{TARGET_DIR}.rb") STREAM_DIR = "streams" def objectToRuby(obj, inclevel = 0, internalname = nil, do_convert = false) code = "" code << case obj when Origami::Null "Null.new" when Origami::Boolean, Origami::Number, Origami::Name, Origami::String literalToRuby(obj) when Origami::Dictionary customclass = nil if obj.class != Origami::Dictionary p = (obj.class == Origami::Encoding) ? 0 : 1 customclass = obj.class.to_s.split('::')[p..-1].join('::') # strip Origami prefix if there is no collision end dictionaryToRuby(obj, inclevel, internalname, customclass) when Origami::Array arrayToRuby(obj, inclevel, internalname) when Origami::Stream streamToRuby(obj, internalname) unless obj.is_a?(ObjectStream) or obj.is_a?(XRefStream) when Origami::Reference referenceToRuby(obj, internalname) else raise RuntimeError, "Unknown object type: #{obj.class}" end case obj when Origami::String, Origami::Dictionary, Origami::Array, Origami::Name code << ".to_o" if do_convert end code end def referenceToRuby(ref, internalname) varname = @var_hash[ref] if varname.nil? "nil" elsif @obj_route[0..@current_idx].include?(varname) @code_hash[varname] ||= {} @code_hash[varname][:afterDecl] ||= [] @code_hash[varname][:afterDecl] << "#{internalname} = #{varname}"#.to_o.set_indirect(true)" "nil" else @obj_route.push(varname) unless @obj_route.include?(varname) varname end end def literalToRuby(obj) obj.value.inspect end def arrayToRuby(arr, inclevel, internalname) i = 0 code = "\n" + " " * inclevel + "[" arr.each do |obj| subintname = "#{internalname}[#{i}]" code << "#{objectToRuby(obj, inclevel + 1, subintname)}" code << ", " unless i == arr.length - 1 i = i + 1 end code << "]" code end def dictionaryToRuby(dict, inclevel, internalname, customtype = nil) i = 0 code = "\n" + " " * inclevel if customtype code << "#{customtype}.new(#{dictionaryToHashMap(dict, inclevel, internalname)}" code << " " * inclevel + ")" else code << "{\n" dict.each_pair do |key, val| rubyname = literalToRuby(key) subintname = "#{internalname}[#{rubyname}]" if val.is_a?(Origami::Reference) and @var_hash[val] and @var_hash[val][0,3] == "obj" oldname = @var_hash[val] newname = (key.value.to_s.downcase.gsub(/[^[[:alnum:]]]/,'_') + "_" + @var_hash[val][4..-1]).tr('.', '_') if not @obj_route.include?(oldname) @var_hash[val] = newname @code_hash[newname] = @code_hash[oldname] @code_hash.delete(oldname) end end code << " " * (inclevel + 1) + "#{rubyname} => #{objectToRuby(val, inclevel + 2, subintname)}" code << ", " unless i == dict.length - 1 i = i + 1 code << "\n" end code << " " * inclevel + "}" end code end def dictionaryToHashMap(dict, inclevel, internalname) i = 0 code = "\n" dict.each_pair do |key, val| rubyname = literalToRuby(key) subintname = "#{internalname}[#{rubyname}]" if val.is_a?(Origami::Reference) and @var_hash[val] and @var_hash[val][0,3] == "obj" oldname = @var_hash[val] newname = (key.value.to_s.downcase + "_" + @var_hash[val][4..-1]).tr('.', '_') if not @obj_route.include?(oldname) @var_hash[val] = newname @code_hash[newname] = @code_hash[oldname] @code_hash.delete(oldname) end end code << " " * (inclevel + 1) + "#{rubyname} => #{objectToRuby(val, inclevel + 2, subintname)}" code << ", " unless i == dict.length - 1 i = i + 1 code << "\n" end code end def streamToRuby(stm, internalname) dict = stm.dictionary.dup.delete_if {|k, _| k == :Length} code = "Stream.new(" if @options[:xstreams] stmdir = File.join(TARGET_DIR, STREAM_DIR) Dir::mkdir(stmdir) unless File.directory? stmdir stmfile = File.join(stmdir, "stm_#{stm.reference.refno}.data") File.binwrite(stmfile, stm.data) code << "File.binread('#{stmfile}')" else code << stm.data.inspect << ".b" end code << ", #{dictionaryToHashMap(dict, 1, internalname)}" unless dict.empty? code << ")" code end puts "[*] ".red + "Loading document '#{TARGET}'" verbosity = @options[:verbose] ? Parser::VERBOSE_TRACE : Parser::VERBOSE_QUIET target = PDF.read(TARGET, verbosity: verbosity) puts "[*] ".red + "Document successfully loaded into Origami" Dir::mkdir(TARGET_DIR) unless File.directory? TARGET_DIR fd = File.open(TARGET_FILE, 'w', 0700) DOCREF = "pdf" fd.puts <