bin/pdf2ruby in origami-1.2.7 vs bin/pdf2ruby in origami-2.0.0
- old
+ new
@@ -1,360 +1,344 @@
#!/usr/bin/env ruby
=begin
-= Info
- Convert a PDF document to an Origami script.
- Experimental.
+= Info
+ Convert a PDF document to an Origami script.
+ Experimental.
= License:
- Origami is free software: you can redistribute it and/or modify
- it under the terms of the GNU Lesser General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
+ Copyright (C) 2016 Guillaume Delugré.
- Origami is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Lesser General Public License for more details.
+ Origami is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
- You should have received a copy of the GNU Lesser General Public License
- along with Origami. If not, see <http://www.gnu.org/licenses/>.
+ Origami is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Lesser General Public License for more details.
-= Author
- Guillaume Delugré
+ You should have received a copy of the GNU Lesser General Public License
+ along with Origami. If not, see <http://www.gnu.org/licenses/>.
=end
require 'optparse'
require 'fileutils'
+require 'colorize'
+
begin
- ORIGAMIDIR = "#{File.dirname(__FILE__)}/../lib"
- require 'origami'
+ require 'origami'
rescue LoadError
- $: << ORIGAMIDIR
- require 'origami'
+ $: << File.join(__dir__, '../lib')
+ require 'origami'
end
include Origami
@var_hash = {}
@code_hash = {}
@obj_route = []
@current_idx = nil
class OptParser
- def self.parse(args)
- options = {}
- options[:verbose] =
- options[:xstreams] = false
+ def self.parse(args)
+ options = {}
+ options[:verbose] =
+ options[:xstreams] = false
- opts = OptionParser.new do |opts|
- opts.banner = <<BANNER
+ opts = OptionParser.new do |opts|
+ opts.banner = <<BANNER
Usage: #{$0} [-v] [-x] <PDF-file>
Convert a PDF document to an Origami script (experimental).
Options:
BANNER
-
- opts.on("-v", "--verbose", "Verbose mode") do
- options[:verbose] = true
- end
- opts.on("-x", "--extract-streams", "Extract PDF streams to separate files") do
- options[:xstreams] = true
- end
+ opts.on("-v", "--verbose", "Verbose mode") do
+ options[:verbose] = true
+ end
- opts.on_tail("-h", "--help", "Show this message") do
- puts opts
- exit
- end
- end
- opts.parse!(args)
+ opts.on("-x", "--extract-streams", "Extract PDF streams to separate files") do
+ options[:xstreams] = true
+ end
- options
- end
+ opts.on_tail("-h", "--help", "Show this message") do
+ puts opts
+ exit
+ end
+ end
+
+ opts.parse!(args)
+
+ options
+ end
end
@options = OptParser.parse(ARGV)
if ARGV.empty?
- STDERR.puts "Error: No filename was specified. #{$0} --help for details."
- exit 1
+ abort "Error: No filename was specified. #{$0} --help for details."
else
TARGET = ARGV.shift
end
Origami::OPTIONS[:enable_type_guessing] = Origami::OPTIONS[:enable_type_propagation] = true
-
+
TARGET_DIR = File.basename(TARGET, '.pdf')
-TARGET_FILE = "#{TARGET_DIR}/#{TARGET_DIR}.rb"
+TARGET_FILE = File.join(TARGET_DIR, "#{TARGET_DIR}.rb")
STREAM_DIR = "streams"
def objectToRuby(obj, inclevel = 0, internalname = nil, do_convert = false)
- code = ""
+ code = ""
- code <<
- case obj
- when Origami::Null
- "Null.new"
- when Origami::Boolean, Origami::Number
- obj.value.to_s
- when Origami::String
- "'#{obj.value.gsub("'","\\\\'")}'"
- when Origami::Dictionary
- customclass = nil
- if obj.class != Origami::Dictionary
- p = (obj.class == Origami::Encoding) ? 0 : 1
- customclass = obj.class.to_s.split('::')[p..-1].join('::') # strip Origami prefix if there is no collision
- end
- dictionaryToRuby(obj, inclevel, internalname, customclass)
- when Origami::Array
- arrayToRuby(obj, inclevel, internalname)
- when Origami::Stream
- streamToRuby(obj, internalname)
- when Origami::Name
- nameToRuby(obj)
- when Origami::Reference
- referenceToRuby(obj, internalname)
- else
- raise RuntimeError, "Unknown object type: #{obj.class}"
- end
+ code <<
+ case obj
+ when Origami::Null
+ "Null.new"
+ when Origami::Boolean, Origami::Number
+ obj.value.to_s
+ when Origami::String
+ obj.inspect
+ when Origami::Dictionary
+ customclass = nil
+ if obj.class != Origami::Dictionary
+ p = (obj.class == Origami::Encoding) ? 0 : 1
+ customclass = obj.class.to_s.split('::')[p..-1].join('::') # strip Origami prefix if there is no collision
+ end
+ dictionaryToRuby(obj, inclevel, internalname, customclass)
+ when Origami::Array
+ arrayToRuby(obj, inclevel, internalname)
+ when Origami::Stream
+ streamToRuby(obj, internalname) unless obj.is_a?(ObjectStream) or obj.is_a?(XRefStream)
+ when Origami::Name
+ nameToRuby(obj)
+ when Origami::Reference
+ referenceToRuby(obj, internalname)
+ else
+ raise RuntimeError, "Unknown object type: #{obj.class}"
+ end
- case obj
+ case obj
when Origami::String, Origami::Dictionary, Origami::Array, Origami::Name
- code << ".to_o" if do_convert
- end
+ code << ".to_o" if do_convert
+ end
- code
+ code
end
def referenceToRuby(ref, internalname)
- varname = @var_hash[ref]
+ varname = @var_hash[ref]
- if varname.nil?
- "nil"
- elsif @obj_route[0..@current_idx].include?(varname)
- @code_hash[varname] ||= {}
- @code_hash[varname][:afterDecl] ||= []
- @code_hash[varname][:afterDecl] << "#{internalname} = #{varname}"#.to_o.set_indirect(true)"
+ if varname.nil?
+ "nil"
+ elsif @obj_route[0..@current_idx].include?(varname)
+ @code_hash[varname] ||= {}
+ @code_hash[varname][:afterDecl] ||= []
+ @code_hash[varname][:afterDecl] << "#{internalname} = #{varname}"#.to_o.set_indirect(true)"
- "nil"
- else
- @obj_route.push(varname) unless @obj_route.include?(varname)
- varname
- end
+ "nil"
+ else
+ @obj_route.push(varname) unless @obj_route.include?(varname)
+ varname
+ end
end
def nameToRuby(name)
- code = ':'
- valid = (name.value.to_s =~ /[+.:-]/).nil?
+ code = ':'
+ valid = (name.value.to_s =~ /[+.:-]/).nil?
- code << '"' unless valid
- code << name.value.to_s
- code << '"' unless valid
+ code << '"' unless valid
+ code << name.value.to_s
+ code << '"' unless valid
- code
+ code
end
def arrayToRuby(arr, inclevel, internalname)
- i = 0
- code = "\n" + " " * inclevel + "["
- arr.each do |obj|
- subintname = "#{internalname}[#{i}]"
-
- code << "#{objectToRuby(obj, inclevel + 1, subintname)}"
- code << ", " unless i == arr.length - 1
- i = i + 1
- end
- code << "]"
+ i = 0
+ code = "\n" + " " * inclevel + "["
+ arr.each do |obj|
+ subintname = "#{internalname}[#{i}]"
- code
+ code << "#{objectToRuby(obj, inclevel + 1, subintname)}"
+ code << ", " unless i == arr.length - 1
+ i = i + 1
+ end
+ code << "]"
+
+ code
end
def dictionaryToRuby(dict, inclevel, internalname, customtype = nil)
- i = 0
- code = "\n" + " " * inclevel
-
- if customtype
- code << "#{customtype}.new(#{dictionaryToHashMap(dict, inclevel, internalname)}"
- code << " " * inclevel + ")"
- else
- code << "{\n"
- dict.each_pair do |key, val|
- rubyname = nameToRuby(key)
- subintname = "#{internalname}[#{rubyname}]"
+ i = 0
+ code = "\n" + " " * inclevel
- if val.is_a?(Origami::Reference) and @var_hash[val] and @var_hash[val][0,3] == "obj"
- oldname = @var_hash[val]
- newname = (key.value.to_s.downcase + "_" + @var_hash[val][4..-1]).gsub('.','_')
+ if customtype
+ code << "#{customtype}.new(#{dictionaryToHashMap(dict, inclevel, internalname)}"
+ code << " " * inclevel + ")"
+ else
+ code << "{\n"
+ dict.each_pair do |key, val|
+ rubyname = nameToRuby(key)
+ subintname = "#{internalname}[#{rubyname}]"
- if not @obj_route.include?(oldname)
- @var_hash[val] = newname
- @code_hash[newname] = @code_hash[oldname]
- @code_hash.delete(oldname)
+ if val.is_a?(Origami::Reference) and @var_hash[val] and @var_hash[val][0,3] == "obj"
+ oldname = @var_hash[val]
+ newname = (key.value.to_s.downcase.gsub(/[^[[:alnum:]]]/,'_') + "_" + @var_hash[val][4..-1]).gsub('.','_')
+
+ if not @obj_route.include?(oldname)
+ @var_hash[val] = newname
+ @code_hash[newname] = @code_hash[oldname]
+ @code_hash.delete(oldname)
+ end
+ end
+
+ code << " " * (inclevel + 1) +
+ "#{rubyname} => #{objectToRuby(val, inclevel + 2, subintname)}"
+ code << ", " unless i == dict.length - 1
+
+ i = i + 1
+ code << "\n"
end
- end
- code << " " * (inclevel + 1) +
- "#{rubyname} => #{objectToRuby(val, inclevel + 2, subintname)}"
- code << ", " unless i == dict.length - 1
-
- i = i + 1
- code << "\n"
+ code << " " * inclevel + "}"
end
- code << " " * inclevel + "}"
- end
- code
+ code
end
def dictionaryToHashMap(dict, inclevel, internalname)
- i = 0
- code = "\n"
- dict.each_pair do |key, val|
- rubyname = nameToRuby(key)
- subintname = "#{internalname}[#{rubyname}]"
+ i = 0
+ code = "\n"
+ dict.each_pair do |key, val|
+ rubyname = nameToRuby(key)
+ subintname = "#{internalname}[#{rubyname}]"
- if val.is_a?(Origami::Reference) and @var_hash[val] and @var_hash[val][0,3] == "obj"
- oldname = @var_hash[val]
- newname = (key.value.to_s.downcase + "_" + @var_hash[val][4..-1]).gsub('.','_')
+ if val.is_a?(Origami::Reference) and @var_hash[val] and @var_hash[val][0,3] == "obj"
+ oldname = @var_hash[val]
+ newname = (key.value.to_s.downcase + "_" + @var_hash[val][4..-1]).gsub('.','_')
- if not @obj_route.include?(oldname)
- @var_hash[val] = newname
- @code_hash[newname] = @code_hash[oldname]
- @code_hash.delete(oldname)
- end
+ if not @obj_route.include?(oldname)
+ @var_hash[val] = newname
+ @code_hash[newname] = @code_hash[oldname]
+ @code_hash.delete(oldname)
+ end
+ end
+
+ code << " " * (inclevel + 1) +
+ "#{rubyname} => #{objectToRuby(val, inclevel + 2, subintname)}"
+ code << ", " unless i == dict.length - 1
+ i = i + 1
+ code << "\n"
end
- code << " " * (inclevel + 1) +
- "#{rubyname} => #{objectToRuby(val, inclevel + 2, subintname)}"
- code << ", " unless i == dict.length - 1
- i = i + 1
- code << "\n"
- end
-
- code
+ code
end
def streamToRuby(stm, internalname)
- dict = stm.dictionary.dup.delete_if{|k,v| k == :Length or k == :Filter}
+ dict = stm.dictionary.dup.delete_if{|k,v| k == :Length}
- code = "Stream.new("
-
- if @options[:xstreams]
- stmdir = "#{TARGET_DIR}/#{STREAM_DIR}"
- Dir::mkdir(stmdir) unless File.directory? stmdir
- stmfile = "#{stmdir}/stm_#{stm.reference.refno}.data"
- File.open(stmfile, "w") do |stmfd|
- stmfd.write stm.data
+ code = "Stream.new("
+
+ if @options[:xstreams]
+ stmdir = File.join(TARGET_DIR, STREAM_DIR)
+ Dir::mkdir(stmdir) unless File.directory? stmdir
+ stmfile = File.join(stmdir, "stm_#{stm.reference.refno}.data")
+ File.binwrite(stmfile, stm.data)
+
+ code << "File.binread('#{stmfile}')"
+ else
+ code << stm.data.inspect << ".b"
end
- code << "File.read('#{STREAM_DIR}/stm_#{stm.reference.refno}.data')"
- else
- code << stm.data.inspect
- end
-
- code << ", #{dictionaryToHashMap(dict, 1, internalname)}" unless dict.empty?
- code << ")"
- if stm.dictionary.has_key? :Filter
- code << ".setFilter(#{objectToRuby(stm.Filter, 1, internalname)})"
- end
+ code << ", #{dictionaryToHashMap(dict, 1, internalname)}" unless dict.empty?
+ code << ")"
- code
+ code
end
-Console.colorprint "[*] ", Console::Colors::RED
-puts "Loading document '#{TARGET}'"
-verbosity = @options[:verbose] ? Parser::VERBOSE_INSANE : Parser::VERBOSE_QUIET
-target = PDF.read(TARGET, :verbosity => verbosity)
-Console.colorprint "[*] ", Console::Colors::RED
-puts "Document successfully loaded into Origami"
+puts "[*] ".red + "Loading document '#{TARGET}'"
+verbosity = @options[:verbose] ? Parser::VERBOSE_TRACE : Parser::VERBOSE_QUIET
+target = PDF.read(TARGET, verbosity: verbosity)
+puts "[*] ".red + "Document successfully loaded into Origami"
+
Dir::mkdir(TARGET_DIR) unless File.directory? TARGET_DIR
fd = File.open(TARGET_FILE, 'w', 0700)
DOCREF = "pdf"
-ORIGAMI_PATH = ORIGAMIDIR[0,1] == '/' ?
- ORIGAMIDIR :
- "../#{ORIGAMIDIR}"
fd.puts <<RUBY
#!/usr/bin/env ruby
begin
- require 'origami'
+ require 'origami'
rescue LoadError
- ORIGAMIDIR = "#{ORIGAMI_PATH}"
- $: << ORIGAMIDIR
- require 'origami'
+ $: << "#{File.join(__dir__, '../lib')}"
+ require 'origami'
end
include Origami
+using Origami::TypeConversion
+# Disable automatic type casting.
+Origami::OPTIONS[:enable_type_guessing] = false
+
OUTPUT = "\#{File.basename(__FILE__, '.rb')}.pdf"
#
# Creates the PDF object.
#
#{DOCREF} = PDF.new
RUBY
-Console.colorprint "[*] ", Console::Colors::RED
-puts "Retrieving all indirect objects..."
-roots = target.root_objects
-roots.each do |obj|
- varname = "obj_" + obj.no.to_s
- @var_hash[obj.reference] = varname
+puts "[*] ".red + "Retrieving all indirect objects..."
+target.each_object(compressed: true) do |obj|
+ varname = "obj_" + obj.no.to_s
+ @var_hash[obj.reference] = varname
end
-Console.colorprint "[*] ", Console::Colors::RED
-puts "Retrieving the document Catalog..."
+puts "[*] ".red + "Retrieving the document Catalog..."
catalog = target.Catalog
@var_hash[catalog.reference] = "#{DOCREF}.Catalog"
@obj_route.push "#{DOCREF}.Catalog"
-Console.colorprint "[*] ", Console::Colors::RED
-puts "Processing the object hierarchy..."
+puts "[*] ".red + "Processing the object hierarchy..."
@current_idx = 0
while @current_idx != @obj_route.size
- varname = @obj_route[@current_idx]
- if RUBY_VERSION < '1.9'
- obj = target[@var_hash.index(varname)]
- else
+ varname = @obj_route[@current_idx]
obj = target[@var_hash.key(varname)]
- end
-
- @code_hash[varname] ||= {}
- @code_hash[varname][:body] = objectToRuby(obj, 0, varname, true)
- @current_idx = @current_idx + 1
+ @code_hash[varname] ||= {}
+ @code_hash[varname][:body] = objectToRuby(obj, 0, varname, true)
+
+ @current_idx = @current_idx + 1
end
@obj_route.reverse.each do |varname|
- fd.puts "#{varname} = #{@code_hash[varname][:body]}"
- if @code_hash[varname][:afterDecl]
- @code_hash[varname][:afterDecl].each do |decl|
- fd.puts decl
+ fd.puts "#{varname} = #{@code_hash[varname][:body]}"
+ if @code_hash[varname][:afterDecl]
+ @code_hash[varname][:afterDecl].each do |decl|
+ fd.puts decl
+ end
end
- end
- fd.puts
+ fd.puts
end
@obj_route.each do |varname|
- fd.puts "#{DOCREF}.insert(#{varname})" unless varname == "#{DOCREF}.Catalog"
+ fd.puts "#{DOCREF}.insert(#{varname})" unless varname == "#{DOCREF}.Catalog"
end
fd.puts
fd.puts <<RUBY
#
# Saves the document.
#
#{DOCREF}.save(OUTPUT)
-
RUBY
-Console.colorprint "[*] ", Console::Colors::RED
-puts "Successfully generated script '#{TARGET_FILE}'"
-fd.close
-exit
+puts "[*] ".red + "Successfully generated script '#{TARGET_FILE}'"
+fd.close