require 'shellwords' class PDFKit class Error < StandardError; end class NoExecutableError < Error def initialize msg = "No wkhtmltopdf executable found at #{PDFKit.configuration.wkhtmltopdf}\n" msg << ">> Please install wkhtmltopdf - https://github.com/pdfkit/PDFKit/wiki/Installing-WKHTMLTOPDF" super(msg) end end class ImproperSourceError < Error def initialize(msg) super("Improper Source: #{msg}") end end class ImproperWkhtmltopdfExitStatus < Error def initialize(invoke) super("Command failed (exitstatus=#{$?.exitstatus}): #{invoke}") end end attr_accessor :source, :stylesheets attr_reader :renderer def initialize(url_file_or_html, options = {}) @source = Source.new(url_file_or_html) @stylesheets = [] options = PDFKit.configuration.default_options.merge(options) options.delete(:quiet) if PDFKit.configuration.verbose? options.merge! find_options_in_meta(url_file_or_html) unless source.url? @root_url = options.delete(:root_url) @protocol = options.delete(:protocol) @renderer = WkHTMLtoPDF.new options @renderer.normalize_options raise NoExecutableError unless File.exists?(PDFKit.configuration.wkhtmltopdf) end def command(path = nil) args = @renderer.options_for_command shell_escaped_command = [executable, OS::shell_escape_for_os(args)].join ' ' # In order to allow for URL parameters (e.g. https://www.google.com/search?q=pdfkit) we do # not escape the source. The user is responsible for ensuring that no vulnerabilities exist # in the source. Please see https://github.com/pdfkit/pdfkit/issues/164. input_for_command = @source.to_input_for_command output_for_command = path ? Shellwords.shellescape(path) : '-' "#{shell_escaped_command} #{input_for_command} #{output_for_command}" end def options # TODO(cdwort,sigmavirus24): Replace this with an attr_reader for @renderer instead in 1.0.0 @renderer.options end def executable PDFKit.configuration.executable end def to_pdf(path=nil) preprocess_html append_stylesheets invoke = command(path) result = IO.popen(invoke, "wb+") do |pdf| pdf.puts(@source.to_s) if @source.html? pdf.close_write pdf.gets(nil) if path.nil? end # $? is thread safe per # http://stackoverflow.com/questions/2164887/thread-safe-external-process-in-ruby-plus-checking-exitstatus raise ImproperWkhtmltopdfExitStatus, invoke if empty_result?(path, result) || !successful?($?) return result end def to_file(path) self.to_pdf(path) File.new(path) end protected def find_options_in_meta(content) # Read file if content is a File content = content.read if content.is_a?(File) || content.is_a?(Tempfile) found = {} content.scan(/]*>/) do |meta| if meta.match(/name=["']#{PDFKit.configuration.meta_tag_prefix}/) name = meta.scan(/name=["']#{PDFKit.configuration.meta_tag_prefix}([^"']*)/)[0][0].split found[name] = meta.scan(/content=["']([^"'\\]+)["']/)[0][0] end end tuple_keys = found.keys.select { |k| k.is_a? Array } tuple_keys.each do |key| value = found.delete key new_key = key.shift found[new_key] ||= {} found[new_key][key] = value end found end def style_tag_for(stylesheet) "" end def preprocess_html if @source.html? processed_html = PDFKit::HTMLPreprocessor.process(@source.to_s, @root_url, @protocol) @source = Source.new(processed_html) end end def append_stylesheets raise ImproperSourceError, 'Stylesheets may only be added to an HTML source' if stylesheets.any? && !@source.html? stylesheets.each do |stylesheet| if @source.to_s.match(/<\/head>/) @source = Source.new(@source.to_s.gsub(/(<\/head>)/) {|s| style_tag_for(stylesheet) + s }) else @source.to_s.insert(0, style_tag_for(stylesheet)) end end end def successful?(status) return true if status.success? # Some of the codes: https://code.google.com/p/wkhtmltopdf/issues/detail?id=1088 # returned when assets are missing (404): https://code.google.com/p/wkhtmltopdf/issues/detail?id=548 return true if status.exitstatus == 2 && @renderer.error_handling? false end def empty_result?(path, result) (path && File.size(path) == 0) || (path.nil? && result.to_s.strip.empty?) end end