lib/pdfkit/pdfkit.rb in pdfkit-0.8.2 vs lib/pdfkit/pdfkit.rb in pdfkit-0.8.3
- old
+ new
@@ -1,7 +1,6 @@
require 'shellwords'
-require 'rbconfig'
class PDFKit
class NoExecutableError < StandardError
def initialize
msg = "No wkhtmltopdf executable found at #{PDFKit.configuration.wkhtmltopdf}\n"
@@ -15,49 +14,52 @@
super("Improper Source: #{msg}")
end
end
attr_accessor :source, :stylesheets
- attr_reader :options
+ attr_reader :renderer
def initialize(url_file_or_html, options = {})
@source = Source.new(url_file_or_html)
@stylesheets = []
- @options = PDFKit.configuration.default_options.merge(options)
- @options.delete(:quiet) if PDFKit.configuration.verbose?
- @options.merge! find_options_in_meta(url_file_or_html) unless source.url?
- @options = normalize_options(@options)
+ options = PDFKit.configuration.default_options.merge(options)
+ options.delete(:quiet) if PDFKit.configuration.verbose?
+ options.merge! find_options_in_meta(url_file_or_html) unless source.url?
+ @root_url = options.delete(:root_url)
+ @protocol = options.delete(:protocol)
+ @renderer = WkHTMLtoPDF.new options
+ @renderer.normalize_options
raise NoExecutableError.new unless File.exists?(PDFKit.configuration.wkhtmltopdf)
end
def command(path = nil)
- args = @options.to_a.flatten.compact
- shell_escaped_command = [executable, shell_escape_for_os(args)].join ' '
+ args = @renderer.options_for_command
+ shell_escaped_command = [executable, OS::shell_escape_for_os(args)].join ' '
# In order to allow for URL parameters (e.g. https://www.google.com/search?q=pdfkit) we do
# not escape the source. The user is responsible for ensuring that no vulnerabilities exist
# in the source. Please see https://github.com/pdfkit/pdfkit/issues/164.
input_for_command = @source.to_input_for_command
output_for_command = path ? Shellwords.shellescape(path) : '-'
"#{shell_escaped_command} #{input_for_command} #{output_for_command}"
end
+ def options
+ # TODO(cdwort,sigmavirus24): Replace this with an attr_reader for @renderer instead in 1.0.0
+ @renderer.options
+ end
+
def executable
- default = PDFKit.configuration.wkhtmltopdf
- return default if default !~ /^\// # its not a path, so nothing we can do
- if File.exist?(default)
- default
- else
- default.split('/').last
- end
+ PDFKit.configuration.wkhtmltopdf
end
def to_pdf(path=nil)
+ preprocess_html
append_stylesheets
invoke = command(path)
result = IO.popen(invoke, "wb+") do |pdf|
@@ -77,15 +79,10 @@
File.new(path)
end
protected
- # Pulled from:
- # https://github.com/wkhtmltopdf/wkhtmltopdf/blob/ebf9b6cfc4c58a31349fb94c568b254fac37b3d3/README_WKHTMLTOIMAGE#L27
- REPEATABLE_OPTIONS = %w[--allow --cookie --custom-header --post --post-file --run-script]
- SPECIAL_OPTIONS = %w[cover toc]
-
def find_options_in_meta(content)
# Read file if content is a File
content = content.read if content.is_a?(File)
found = {}
@@ -109,10 +106,17 @@
def style_tag_for(stylesheet)
"<style>#{File.read(stylesheet)}</style>"
end
+ def preprocess_html
+ if @source.html?
+ processed_html = PDFKit::HTMLPreprocessor.process(@source.to_s, @root_url, @protocol)
+ @source = Source.new(processed_html)
+ end
+ end
+
def append_stylesheets
raise ImproperSourceError.new('Stylesheets may only be added to an HTML source') if stylesheets.any? && !@source.html?
stylesheets.each do |stylesheet|
if @source.to_s.match(/<\/head>/)
@@ -121,93 +125,19 @@
@source.to_s.insert(0, style_tag_for(stylesheet))
end
end
end
- def normalize_options(options)
- normalized_options = {}
-
- options.each do |key, value|
- next if !value
-
- # The actual option for wkhtmltopdf
- normalized_key = normalize_arg key
- normalized_key = "--#{normalized_key}" unless SPECIAL_OPTIONS.include?(normalized_key)
-
- # If the option is repeatable, attempt to normalize all values
- if REPEATABLE_OPTIONS.include? normalized_key
- normalize_repeatable_value(normalized_key, value) do |normalized_unique_key, normalized_value|
- normalized_options[normalized_unique_key] = normalized_value
- end
- else # Otherwise, just normalize it like usual
- normalized_options[normalized_key] = normalize_value(value)
- end
- end
-
- normalized_options
- end
-
- def normalize_arg(arg)
- arg.to_s.downcase.gsub(/[^a-z0-9]/,'-')
- end
-
- def normalize_value(value)
- case value
- when nil
- nil
- when TrueClass, 'true' #ie, ==true, see http://www.ruby-doc.org/core-1.9.3/TrueClass.html
- nil
- when Hash
- value.to_a.flatten.collect{|x| normalize_value(x)}.compact
- when Array
- value.flatten.collect{|x| x.to_s}
- else
- (host_is_windows? && value.to_s.index(' ')) ? "'#{ value.to_s }'" : value.to_s
- end
- end
-
- def normalize_repeatable_value(option_name, value)
- case value
- when Hash, Array
- value.each do |(key, val)|
- yield [[option_name, normalize_value(key)], normalize_value(val)]
- end
- else
- yield [[option_name, normalize_value(value)], nil]
- end
- end
-
def successful?(status)
return true if status.success?
# Some of the codes: https://code.google.com/p/wkhtmltopdf/issues/detail?id=1088
# returned when assets are missing (404): https://code.google.com/p/wkhtmltopdf/issues/detail?id=548
- return true if status.exitstatus == 2 && error_handling?
+ return true if status.exitstatus == 2 && @renderer.error_handling?
false
end
def empty_result?(path, result)
(path && File.size(path) == 0) || (path.nil? && result.to_s.strip.empty?)
- end
-
- def error_handling?
- @options.key?('--ignore-load-errors') ||
- # wkhtmltopdf v0.10.0 beta4 replaces ignore-load-errors with load-error-handling
- # https://code.google.com/p/wkhtmltopdf/issues/detail?id=55
- %w(skip ignore).include?(@options['--load-error-handling'])
- end
-
- def host_is_windows?
- @host_is_windows ||= !(RbConfig::CONFIG['host_os'] =~ /mswin|msys|mingw|cygwin|bccwin|wince/).nil?
- end
-
- def shell_escape_for_os(args)
- if (host_is_windows?)
- # Windows reserved shell characters are: & | ( ) < > ^
- # See http://technet.microsoft.com/en-us/library/cc723564.aspx#XSLTsection123121120120
- args.map { |arg| arg.gsub(/([&|()<>^])/,'^\1') }.join(" ")
- else
- args.shelljoin
- end
end
end