module Landable
module TidyService
class TidyError < StandardError; end
mattr_accessor :options
@@options = [
# is what we have
'-utf8',
# two-space soft indents
'-indent',
# no wrapping
'--wrap 0',
# make some guesses about how the code should look
'--clean true',
# kill microsoft word crap
'--bare true',
# quote 'em up
'--quote-ampersand true',
# whitespace niceness
'--break-before-br true',
# allow
'--merge-divs false',
# silence will fall
'--quiet true',
'--show-warnings false',
]
# list of liquid tags that also render tags - things that we should
# consider to be element-level, and therefore to be tidied along with the
# rest of the dom
mattr_accessor :liquid_elements
@@liquid_elements = [
'template',
'title_tag',
'meta_tags',
'img_tag',
]
def self.call! input
self.call input, raise_on_error: true
end
def self.call input, runtime_options={}
if not tidyable?
raise TidyError, 'Your system doesn\'t seem to have tidy installed. Please see https://github.com/w3c/tidy-html5.'
end
# wrapping known liquid in a span to allow tidy to format them nicely
input = wrap_liquid input
# off to tidy
output = IO.popen("tidy #{options.join(' ')}", 'r+') do |io|
io.puts input
io.close_write
io.read
end
# 0: success
# 1: warning
# 2: error
# 3: ???
# 4: profit
if $?.exitstatus >= 2 and runtime_options[:raise_on_error]
raise TidyError, "Tidy exited with status #{$?} - check stderr."
end
# unnwrapping the liquid that we wrapped earlier
output = unwrap_liquid output
# create and return a Result, allowing access to specific bits of the output
Result.new output
end
def self.tidyable?
@@is_tidyable ||= Kernel.system('which tidy > /dev/null')
end
protected
def self.wrap_liquid input
output = input.dup
output.scan(/(\s*(\{% *(?:#{liquid_elements.join('|')}) *.*?%})\s*)/).each do |match, liquid|
# encode and stash in a div, inserted between newlines, to allow tidy
# to nudge this element around as appropriate
encoded = Base64.encode64(liquid).strip
output.gsub! match, " "
end
output
end
def self.unwrap_liquid input
output = input.dup
output.scan(/(
<\/div>)/).each do |match, liquid|
# ensure we match utf8 for utf8
decoded = Base64.decode64(liquid).force_encoding(match.encoding)
output.gsub! match, decoded
end
output
end
class Result < Object
def initialize source
@source = source
end
def to_s
@source
end
def body
if match = @source.match(/]*)?>(.*)<\/body>/m)
deindent match[1]
end
end
def head
if match = @source.match(/(.*)<\/head>/m)
deindent match[1]
end
end
def css
links = head.try :scan, /]*type=['"]text\/css['"][^>]*>/
styles = head.try :scan, /