#!/usr/bin/env ruby
=begin
= Info
This is a PDF document filtering engine using Origami.
Security policies are based on a white list of PDF features.
Default policies details can be found in the default configuration file.
You can also add your own policy and activate it using the -p switch.
= License
Copyright (C) 2016 Guillaume Delugré.
Origami is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Origami is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with Origami. If not, see .
=end
begin
require 'origami'
rescue LoadError
$: << File.join(__dir__, '../lib')
require 'origami'
end
require 'optparse'
require 'yaml'
require 'rexml/document'
require 'digest/md5'
require 'colorize'
DEFAULT_CONFIG_FILE = "#{File.dirname(__FILE__)}/config/pdfcop.conf.yml"
DEFAULT_POLICY = "standard"
SECURITY_POLICIES = {}
def load_config_file(path)
SECURITY_POLICIES.update(Hash.new(false).update YAML.load(File.read(path)))
end
class OptParser
BANNER = <
The PDF filtering engine. Scans PDF documents for malicious structures.
Bug reports or feature requests at: http://github.com/gdelugre/origami
Options:
USAGE
def self.parse(args)
options = {colors: true, password: ''}
opts = OptionParser.new do |opts|
opts.banner = BANNER
opts.on("-o", "--output LOG_FILE", "Output log file (default STDOUT)") do |o|
options[:output_log] = o
end
opts.on("-c", "--config CONFIG_FILE", "Load security policies from given configuration file") do |cf|
options[:config_file] = cf
end
opts.on("-p", "--policy POLICY_NAME", "Specify applied policy. Predefined policies: 'none', 'standard', 'strong', 'paranoid'") do |policy|
options[:policy] = policy
end
opts.on("-P", "--password PASSWORD", "Password to use if the document is encrypted") do |passwd|
options[:password] = passwd
end
opts.on("-n", "--no-color", "Turn off colorized output") do
options[:disable_colors] = true
end
opts.on_tail("-h", "--help", "Show this message") do
puts opts
exit
end
end
opts.parse!(args)
options
end
end
@options = OptParser.parse(ARGV)
if @options.has_key?(:output_log)
LOGGER = File.open(@options[:output_log], "a+")
else
LOGGER = STDOUT
end
if not @options.has_key?(:policy)
@options[:policy] = DEFAULT_POLICY
end
String.disable_colorization @options[:disable_colors]
load_config_file(@options[:config_file] || DEFAULT_CONFIG_FILE)
unless SECURITY_POLICIES.has_key?("POLICY_#{@options[:policy].upcase}")
abort "Undeclared policy `#{@options[:policy]}'"
end
if ARGV.empty?
abort "Error: No filename was specified. #{$0} --help for details."
else
TARGET = ARGV.shift
end
def log(str, color = :default)
LOGGER.puts("[#{Time.now}]".cyan + " #{str.colorize(color)}")
end
def reject(cause)
log("Document rejected by policy `#{@options[:policy]}', caused by #{cause.inspect}.", :red)
abort
end
def check_rights(*required_rights)
current_rights = SECURITY_POLICIES["POLICY_#{@options[:policy].upcase}"]
reject(required_rights) if required_rights.any?{|right| current_rights[right.to_s] == false}
end
def analyze_xfa_forms(xfa)
case xfa
when Origami::Array then
xml = ""
i = 0
xfa.each do |packet|
if i % 2 == 1
xml << packet.solve.data
end
i = i + 1
end
when Origami::Stream then
xml = xfa.data
else
reject("Malformed XFA dictionary")
end
xfadoc = REXML::Document.new(xml)
REXML::XPath.match(xfadoc, "//script").each do |script|
case script.attributes["contentType"]
when "application/x-formcalc" then
check_rights(:allowFormCalc)
else
check_rights(:allowJS)
end
end
end
def analyze_annotation(annot, _level = 0)
check_rights(:allowAnnotations)
if annot.is_a?(Origami::Dictionary) and annot.has_key?(:Subtype)
case annot[:Subtype].solve.value
when :FileAttachment
check_rights(:allowAttachments, :allowFileAttachmentAnnotation)
when :Sound
check_rights(:allowSoundAnnotation)
when :Movie
check_rights(:allowMovieAnnotation)
when :Screen
check_rights(:allowScreenAnnotation)
when :Widget
check_rights(:allowAcroforms)
when :"3D"
check_rights(:allow3DAnnotation)
# 3D annotation might pull in JavaScript for real-time driven behavior.
if annot.has_key?(:"3DD")
dd = annot[:"3DD"].solve
u3dstream = nil
case dd
when Origami::Stream
u3dstream = dd
when Origami::Dictionary
u3dstream = dd[:"3DD"]
end
if u3dstream and u3dstream.key?(:OnInstantiate)
check_rights(:allowJS)
if annot.has_key?(:"3DA") # is 3d view instantiated automatically?
u3dactiv = annot[:"3DA"].solve
check_rights(:allowJSAtOpening) if u3dactiv.is_a?(Origami::Dictionary) and (u3dactiv[:A] == :PO or u3dactiv[:A] == :PV)
end
end
end
when :RichMedia
check_rights(:allowRichMediaAnnotation)
end
end
end
def analyze_page(page, level = 0)
section_prefix = " " * 2 * level + ">" * (level + 1)
log(section_prefix + " Inspecting page...")
text_prefix = " " * 2 * (level + 1) + "." * (level + 1)
if page.is_a?(Origami::Dictionary)
#
# Checking page additional actions.
#
if page.has_key?(:AA)
if page.AA.is_a?(Origami::Dictionary)
log(text_prefix + " Page has an action dictionary.")
aa = Origami::Page::AdditionalActions.new(page.AA); aa.parent = page.AA.parent
analyze_action(aa.O, true, level + 1) if aa.has_key?(:O)
analyze_action(aa.C, false, level + 1) if aa.has_key?(:C)
end
end
#
# Looking for page annotations.
#
page.each_annotation do |annot|
analyze_annotation(annot, level + 1)
end
end
end
def analyze_action(action, triggered_at_opening, level = 0)
section_prefix = " " * 2 * level + ">" * (level + 1)
log(section_prefix + " Inspecting action...")
text_prefix = " " * 2 * (level + 1) + "." * (level + 1)
if action.is_a?(Origami::Dictionary)
log(text_prefix + " Found #{action[:S]} action.")
type = action[:S].is_a?(Origami::Reference) ? action[:S].solve : action[:S]
case type.value
when :JavaScript
check_rights(:allowJS)
check_rights(:allowJSAtOpening) if triggered_at_opening
when :Launch
check_rights(:allowLaunchAction)
when :Named
check_rights(:allowNamedAction)
when :GoTo
check_rights(:allowGoToAction)
dest = action[:D].is_a?(Origami::Reference) ? action[:D].solve : action[:D]
if dest.is_a?(Origami::Array) and dest.length > 0 and dest.first.is_a?(Origami::Reference)
dest_page = dest.first.solve
if dest_page.is_a?(Origami::Page)
log(text_prefix + " Destination page found.")
analyze_page(dest_page, level + 1)
end
end
when :GoToE
check_rights(:allowAttachments,:allowGoToEAction)
when :GoToR
check_rights(:allowGoToRAction)
when :Thread
check_rights(:allowGoToRAction) if action.has_key?(:F)
when :URI
check_rights(:allowURIAction)
when :SubmitForm
check_rights(:allowAcroForms,:allowSubmitFormAction)
when :ImportData
check_rights(:allowAcroForms,:allowImportDataAction)
when :Rendition
check_rights(:allowScreenAnnotation,:allowRenditionAction)
when :Sound
check_rights(:allowSoundAnnotation,:allowSoundAction)
when :Movie
check_rights(:allowMovieAnnotation,:allowMovieAction)
when :RichMediaExecute
check_rights(:allowRichMediaAnnotation,:allowRichMediaAction)
when :GoTo3DView
check_rights(:allow3DAnnotation,:allowGoTo3DAction)
end
if action.has_key?(:Next)
log(text_prefix + "This action is chained to another action!")
check_rights(:allowChainedActions)
analyze_action(action.Next)
end
elsif action.is_a?(Origami::Array)
dest = action
if dest.length > 0 and dest.first.is_a?(Origami::Reference)
dest_page = dest.first.solve
if dest_page.is_a?(Origami::Page)
log(text_prefix + " Destination page found.")
check_rights(:allowGoToAction)
analyze_page(dest_page, level + 1)
end
end
end
end
begin
log("PDFcop is running on target `#{TARGET}', policy = `#{@options[:policy]}'", :green)
log(" File size: #{File.size(TARGET)} bytes", :magenta)
log(" MD5: #{Digest::MD5.hexdigest(File.read(TARGET))}", :magenta)
@pdf = Origami::PDF.read(TARGET,
verbosity: Origami::Parser::VERBOSE_QUIET,
ignore_errors: SECURITY_POLICIES["POLICY_#{@options[:policy].upcase}"]['allowParserErrors'],
decrypt: SECURITY_POLICIES["POLICY_#{@options[:policy].upcase}"]['allowEncryption'],
prompt_password: lambda { '' },
password: @options[:password]
)
log("> Inspecting document structure...", :yellow)
if @pdf.encrypted?
log(" . Encryption = YES")
check_rights(:allowEncryption)
end
log("> Inspecting document catalog...", :yellow)
catalog = @pdf.Catalog
reject("Invalid document catalog") unless catalog.is_a?(Origami::Catalog)
if catalog.has_key?(:OpenAction)
log(" . OpenAction entry = YES")
check_rights(:allowOpenAction)
action = catalog.OpenAction
analyze_action(action, true, 1)
end
if catalog.has_key?(:AA)
if catalog.AA.is_a?(Origami::Dictionary)
aa = Origami::CatalogAdditionalActions.new(catalog.AA); aa.parent = catalog;
log(" . Additional actions dictionary = YES")
analyze_action(aa.WC, false, 1) if aa.has_key?(:WC)
analyze_action(aa.WS, false, 1) if aa.has_key?(:WS)
analyze_action(aa.DS, false, 1) if aa.has_key?(:DS)
analyze_action(aa.WP, false, 1) if aa.has_key?(:WP)
analyze_action(aa.DP, false, 1) if aa.has_key?(:DP)
end
end
if catalog.has_key?(:AcroForm)
acroform = catalog.AcroForm
if acroform.is_a?(Origami::Dictionary)
log(" . AcroForm = YES")
check_rights(:allowAcroForms)
if acroform.has_key?(:XFA)
log(" . XFA = YES")
check_rights(:allowXFAForms)
analyze_xfa_forms(acroform[:XFA].solve)
end
end
end
log("> Inspecting JavaScript names directory...", :yellow)
if @pdf.each_named_script.any?
check_rights(:allowJS)
check_rights(:allowJSAtOpening)
end
log("> Inspecting attachment names directory...", :yellow)
if @pdf.each_attachment.any?
check_rights(:allowAttachments)
end
log("> Inspecting document pages...", :yellow)
@pdf.each_page do |page|
analyze_page(page, 1)
end
log("> Inspecting document streams...", :yellow)
@pdf.indirect_objects.find_all{|obj| obj.is_a?(Origami::Stream)}.each do |stream|
if stream.dictionary.has_key?(:Filter)
filters = stream.Filter
filters = [ filters ] if filters.is_a?(Origami::Name)
if filters.is_a?(Origami::Array)
filters.each do |filter|
case filter.value
when :ASCIIHexDecode
check_rights(:allowASCIIHexFilter)
when :ASCII85Decode
check_rights(:allowASCII85Filter)
when :LZWDecode
check_rights(:allowLZWFilter)
when :FlateDecode
check_rights(:allowFlateDecode)
when :RunLengthDecode
check_rights(:allowRunLengthFilter)
when :CCITTFaxDecode
check_rights(:allowCCITTFaxFilter)
when :JBIG2Decode
check_rights(:allowJBIG2Filter)
when :DCTDecode
check_rights(:allowDCTFilter)
when :JPXDecode
check_rights(:allowJPXFilter)
when :Crypt
check_rights(:allowCryptFilter)
end
end
end
end
end
#
# TODO: Detect JS at opening in XFA (check event tag)
# Check image encoding in XFA ?
# Only allow valid signed documents ?
# Recursively scan attached files.
# On-the-fly injection of prerun JS code to hook vulnerable methods (dynamic exploit detection) ???
# ...
#
log("Document accepted by policy `#{@options[:policy]}'.", :green)
rescue
log("An error occured during analysis : #{$!.class} (#{$!.message})")
reject("Analysis failure")
ensure
LOGGER.close
end